mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-26 03:14:21 +00:00
Update for unprivileged usage.
This commit is contained in:
parent
f731634ba1
commit
95adc4d804
@ -56,4 +56,7 @@ RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /et
|
||||
COPY hocrtotei /usr/local/bin
|
||||
COPY ocr /usr/local/bin
|
||||
|
||||
mkdir /input /output
|
||||
chmod a+rw /input /output
|
||||
|
||||
ENTRYPOINT ["ocr"]
|
||||
|
26
README.md
26
README.md
@ -42,12 +42,12 @@ mkdir -p /<mydatalocation>/files_for_ocr /<mydatalocation>/files_from_ocr
|
||||
docker run \
|
||||
--rm \
|
||||
-it \
|
||||
-v /<mydatalocation>/files_for_ocr:/files_for_ocr \
|
||||
-v /<mydatalocation>/files_from_ocr:/files_from_ocr \
|
||||
-v /<mydatalocation>/files_for_ocr:/input \
|
||||
-v /<mydatalocation>/files_from_ocr:/output \
|
||||
sfb1288inf/ocr:latest \
|
||||
-i /files_for_ocr \
|
||||
-o /files_from_ocr \
|
||||
-l <languagecode>
|
||||
-i /input \
|
||||
-l <languagecode> \
|
||||
-o /output
|
||||
```
|
||||
The arguments below `sfb1288inf/ocr:latest` are described in the [OCR arguments](#ocr-arguments) part.
|
||||
|
||||
@ -57,14 +57,6 @@ If you want to use the prebuilt image, replace `sfb1288inf/ocr:latest` with `git
|
||||
|
||||
### OCR arguments
|
||||
|
||||
`-i path`
|
||||
* Sets the input directory using the specified path.
|
||||
* required = True
|
||||
|
||||
`-o path`
|
||||
* Sets the output directory using the specified path.
|
||||
* required = True
|
||||
|
||||
`-l languagecode`
|
||||
* Tells tesseract which language will be used.
|
||||
* options = deu (German), deu_frak (German Fraktur), eng (English), enm (Middle englisch), fra (French), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish)
|
||||
@ -90,12 +82,12 @@ Example with all arguments used:
|
||||
docker run \
|
||||
--rm \
|
||||
-it \
|
||||
-v $HOME/ocr/files_for_ocr:/files_for_ocr \
|
||||
-v $HOME/ocr/files_from_ocr:/files_from_ocr \
|
||||
-v "$HOME"/ocr/files_for_ocr:/input \
|
||||
-v "$HOME"/ocr/files_from_ocr:/output \
|
||||
sfb1288inf/ocr:latest \
|
||||
-i /files_for_ocr \
|
||||
-o /files_from_ocr \
|
||||
-i /input \
|
||||
-l eng \
|
||||
-o /output \
|
||||
--keep_intermediates \
|
||||
--nCores 8 \
|
||||
--skip-binarisation
|
||||
|
23
wrapper/ocr
23
wrapper/ocr
@ -17,15 +17,16 @@ parser.add_argument(
|
||||
dest='output_dir',
|
||||
required=False
|
||||
)
|
||||
args, ocr_args = parser.parse_known_args()
|
||||
dirs, args = parser.parse_known_args()
|
||||
|
||||
if args.input_dir == None or args.output_dir == None:
|
||||
if args.input_dir != None:
|
||||
ocr_args.append('-i')
|
||||
ocr_args.append('/files_for_ocr')
|
||||
if args.output_dir != None:
|
||||
ocr_args.append('-o')
|
||||
ocr_args.append('/files_from_ocr')
|
||||
subprocess.run(['docker', 'run', '--rm', '-it', container_image] + ocr_args)
|
||||
else:
|
||||
subprocess.run(['docker', 'run', '--rm', '-it', '-v', args.input_dir + ':/files_for_ocr', '-v', args.output_dir + ':/files_from_ocr', container_image, '-i', '/files_for_ocr', '-o', '/files_from_ocr'] + ocr_args)
|
||||
cmd = ['docker', 'run', '--rm', '-it']
|
||||
if dirs.input_dir is not None:
|
||||
cmd += ['-v', dirs.input_dir + ':/input']
|
||||
args += ['-i', '/input']
|
||||
if dirs.output_dir is not None:
|
||||
cmd += ['-v', dirs.output_dir + ':/output']
|
||||
args += ['-o', '/output']
|
||||
cmd.append(container_image)
|
||||
cmd += args
|
||||
|
||||
subprocess.run(cmd)
|
||||
|
Loading…
Reference in New Issue
Block a user