mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-26 15:04:18 +00:00
Update for unprivileged usage.
This commit is contained in:
parent
f731634ba1
commit
95adc4d804
@ -56,4 +56,7 @@ RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /et
|
|||||||
COPY hocrtotei /usr/local/bin
|
COPY hocrtotei /usr/local/bin
|
||||||
COPY ocr /usr/local/bin
|
COPY ocr /usr/local/bin
|
||||||
|
|
||||||
|
mkdir /input /output
|
||||||
|
chmod a+rw /input /output
|
||||||
|
|
||||||
ENTRYPOINT ["ocr"]
|
ENTRYPOINT ["ocr"]
|
||||||
|
26
README.md
26
README.md
@ -42,12 +42,12 @@ mkdir -p /<mydatalocation>/files_for_ocr /<mydatalocation>/files_from_ocr
|
|||||||
docker run \
|
docker run \
|
||||||
--rm \
|
--rm \
|
||||||
-it \
|
-it \
|
||||||
-v /<mydatalocation>/files_for_ocr:/files_for_ocr \
|
-v /<mydatalocation>/files_for_ocr:/input \
|
||||||
-v /<mydatalocation>/files_from_ocr:/files_from_ocr \
|
-v /<mydatalocation>/files_from_ocr:/output \
|
||||||
sfb1288inf/ocr:latest \
|
sfb1288inf/ocr:latest \
|
||||||
-i /files_for_ocr \
|
-i /input \
|
||||||
-o /files_from_ocr \
|
-l <languagecode> \
|
||||||
-l <languagecode>
|
-o /output
|
||||||
```
|
```
|
||||||
The arguments below `sfb1288inf/ocr:latest` are described in the [OCR arguments](#ocr-arguments) part.
|
The arguments below `sfb1288inf/ocr:latest` are described in the [OCR arguments](#ocr-arguments) part.
|
||||||
|
|
||||||
@ -57,14 +57,6 @@ If you want to use the prebuilt image, replace `sfb1288inf/ocr:latest` with `git
|
|||||||
|
|
||||||
### OCR arguments
|
### OCR arguments
|
||||||
|
|
||||||
`-i path`
|
|
||||||
* Sets the input directory using the specified path.
|
|
||||||
* required = True
|
|
||||||
|
|
||||||
`-o path`
|
|
||||||
* Sets the output directory using the specified path.
|
|
||||||
* required = True
|
|
||||||
|
|
||||||
`-l languagecode`
|
`-l languagecode`
|
||||||
* Tells tesseract which language will be used.
|
* Tells tesseract which language will be used.
|
||||||
* options = deu (German), deu_frak (German Fraktur), eng (English), enm (Middle englisch), fra (French), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish)
|
* options = deu (German), deu_frak (German Fraktur), eng (English), enm (Middle englisch), fra (French), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish)
|
||||||
@ -90,12 +82,12 @@ Example with all arguments used:
|
|||||||
docker run \
|
docker run \
|
||||||
--rm \
|
--rm \
|
||||||
-it \
|
-it \
|
||||||
-v $HOME/ocr/files_for_ocr:/files_for_ocr \
|
-v "$HOME"/ocr/files_for_ocr:/input \
|
||||||
-v $HOME/ocr/files_from_ocr:/files_from_ocr \
|
-v "$HOME"/ocr/files_from_ocr:/output \
|
||||||
sfb1288inf/ocr:latest \
|
sfb1288inf/ocr:latest \
|
||||||
-i /files_for_ocr \
|
-i /input \
|
||||||
-o /files_from_ocr \
|
|
||||||
-l eng \
|
-l eng \
|
||||||
|
-o /output \
|
||||||
--keep_intermediates \
|
--keep_intermediates \
|
||||||
--nCores 8 \
|
--nCores 8 \
|
||||||
--skip-binarisation
|
--skip-binarisation
|
||||||
|
23
wrapper/ocr
23
wrapper/ocr
@ -17,15 +17,16 @@ parser.add_argument(
|
|||||||
dest='output_dir',
|
dest='output_dir',
|
||||||
required=False
|
required=False
|
||||||
)
|
)
|
||||||
args, ocr_args = parser.parse_known_args()
|
dirs, args = parser.parse_known_args()
|
||||||
|
|
||||||
if args.input_dir == None or args.output_dir == None:
|
cmd = ['docker', 'run', '--rm', '-it']
|
||||||
if args.input_dir != None:
|
if dirs.input_dir is not None:
|
||||||
ocr_args.append('-i')
|
cmd += ['-v', dirs.input_dir + ':/input']
|
||||||
ocr_args.append('/files_for_ocr')
|
args += ['-i', '/input']
|
||||||
if args.output_dir != None:
|
if dirs.output_dir is not None:
|
||||||
ocr_args.append('-o')
|
cmd += ['-v', dirs.output_dir + ':/output']
|
||||||
ocr_args.append('/files_from_ocr')
|
args += ['-o', '/output']
|
||||||
subprocess.run(['docker', 'run', '--rm', '-it', container_image] + ocr_args)
|
cmd.append(container_image)
|
||||||
else:
|
cmd += args
|
||||||
subprocess.run(['docker', 'run', '--rm', '-it', '-v', args.input_dir + ':/files_for_ocr', '-v', args.output_dir + ':/files_from_ocr', container_image, '-i', '/files_for_ocr', '-o', '/files_from_ocr'] + ocr_args)
|
|
||||||
|
subprocess.run(cmd)
|
||||||
|
Loading…
Reference in New Issue
Block a user