Update for unprivileged usage.

This commit is contained in:
Patrick Jentsch 2019-06-02 21:38:30 +02:00
parent f731634ba1
commit 95adc4d804
3 changed files with 24 additions and 28 deletions

View File

@ -56,4 +56,7 @@ RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /et
COPY hocrtotei /usr/local/bin
COPY ocr /usr/local/bin
mkdir /input /output
chmod a+rw /input /output
ENTRYPOINT ["ocr"]

View File

@ -42,12 +42,12 @@ mkdir -p /<mydatalocation>/files_for_ocr /<mydatalocation>/files_from_ocr
docker run \
--rm \
-it \
-v /<mydatalocation>/files_for_ocr:/files_for_ocr \
-v /<mydatalocation>/files_from_ocr:/files_from_ocr \
-v /<mydatalocation>/files_for_ocr:/input \
-v /<mydatalocation>/files_from_ocr:/output \
sfb1288inf/ocr:latest \
-i /files_for_ocr \
-o /files_from_ocr \
-l <languagecode>
-i /input \
-l <languagecode> \
-o /output
```
The arguments below `sfb1288inf/ocr:latest` are described in the [OCR arguments](#ocr-arguments) part.
@ -57,14 +57,6 @@ If you want to use the prebuilt image, replace `sfb1288inf/ocr:latest` with `git
### OCR arguments
`-i path`
* Sets the input directory using the specified path.
* required = True
`-o path`
* Sets the output directory using the specified path.
* required = True
`-l languagecode`
* Tells tesseract which language will be used.
* options = deu (German), deu_frak (German Fraktur), eng (English), enm (Middle englisch), fra (French), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish)
@ -90,12 +82,12 @@ Example with all arguments used:
docker run \
--rm \
-it \
-v $HOME/ocr/files_for_ocr:/files_for_ocr \
-v $HOME/ocr/files_from_ocr:/files_from_ocr \
-v "$HOME"/ocr/files_for_ocr:/input \
-v "$HOME"/ocr/files_from_ocr:/output \
sfb1288inf/ocr:latest \
-i /files_for_ocr \
-o /files_from_ocr \
-i /input \
-l eng \
-o /output \
--keep_intermediates \
--nCores 8 \
--skip-binarisation

View File

@ -17,15 +17,16 @@ parser.add_argument(
dest='output_dir',
required=False
)
args, ocr_args = parser.parse_known_args()
dirs, args = parser.parse_known_args()
if args.input_dir == None or args.output_dir == None:
if args.input_dir != None:
ocr_args.append('-i')
ocr_args.append('/files_for_ocr')
if args.output_dir != None:
ocr_args.append('-o')
ocr_args.append('/files_from_ocr')
subprocess.run(['docker', 'run', '--rm', '-it', container_image] + ocr_args)
else:
subprocess.run(['docker', 'run', '--rm', '-it', '-v', args.input_dir + ':/files_for_ocr', '-v', args.output_dir + ':/files_from_ocr', container_image, '-i', '/files_for_ocr', '-o', '/files_from_ocr'] + ocr_args)
cmd = ['docker', 'run', '--rm', '-it']
if dirs.input_dir is not None:
cmd += ['-v', dirs.input_dir + ':/input']
args += ['-i', '/input']
if dirs.output_dir is not None:
cmd += ['-v', dirs.output_dir + ':/output']
args += ['-o', '/output']
cmd.append(container_image)
cmd += args
subprocess.run(cmd)