Update for unprivileged usage.

This commit is contained in:
Patrick Jentsch 2019-06-02 21:38:30 +02:00
parent f731634ba1
commit 95adc4d804
3 changed files with 24 additions and 28 deletions

View File

@ -56,4 +56,7 @@ RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /et
COPY hocrtotei /usr/local/bin COPY hocrtotei /usr/local/bin
COPY ocr /usr/local/bin COPY ocr /usr/local/bin
mkdir /input /output
chmod a+rw /input /output
ENTRYPOINT ["ocr"] ENTRYPOINT ["ocr"]

View File

@ -42,12 +42,12 @@ mkdir -p /<mydatalocation>/files_for_ocr /<mydatalocation>/files_from_ocr
docker run \ docker run \
--rm \ --rm \
-it \ -it \
-v /<mydatalocation>/files_for_ocr:/files_for_ocr \ -v /<mydatalocation>/files_for_ocr:/input \
-v /<mydatalocation>/files_from_ocr:/files_from_ocr \ -v /<mydatalocation>/files_from_ocr:/output \
sfb1288inf/ocr:latest \ sfb1288inf/ocr:latest \
-i /files_for_ocr \ -i /input \
-o /files_from_ocr \ -l <languagecode> \
-l <languagecode> -o /output
``` ```
The arguments below `sfb1288inf/ocr:latest` are described in the [OCR arguments](#ocr-arguments) part. The arguments below `sfb1288inf/ocr:latest` are described in the [OCR arguments](#ocr-arguments) part.
@ -57,14 +57,6 @@ If you want to use the prebuilt image, replace `sfb1288inf/ocr:latest` with `git
### OCR arguments ### OCR arguments
`-i path`
* Sets the input directory using the specified path.
* required = True
`-o path`
* Sets the output directory using the specified path.
* required = True
`-l languagecode` `-l languagecode`
* Tells tesseract which language will be used. * Tells tesseract which language will be used.
* options = deu (German), deu_frak (German Fraktur), eng (English), enm (Middle englisch), fra (French), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish) * options = deu (German), deu_frak (German Fraktur), eng (English), enm (Middle englisch), fra (French), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish)
@ -90,12 +82,12 @@ Example with all arguments used:
docker run \ docker run \
--rm \ --rm \
-it \ -it \
-v $HOME/ocr/files_for_ocr:/files_for_ocr \ -v "$HOME"/ocr/files_for_ocr:/input \
-v $HOME/ocr/files_from_ocr:/files_from_ocr \ -v "$HOME"/ocr/files_from_ocr:/output \
sfb1288inf/ocr:latest \ sfb1288inf/ocr:latest \
-i /files_for_ocr \ -i /input \
-o /files_from_ocr \
-l eng \ -l eng \
-o /output \
--keep_intermediates \ --keep_intermediates \
--nCores 8 \ --nCores 8 \
--skip-binarisation --skip-binarisation

View File

@ -17,15 +17,16 @@ parser.add_argument(
dest='output_dir', dest='output_dir',
required=False required=False
) )
args, ocr_args = parser.parse_known_args() dirs, args = parser.parse_known_args()
if args.input_dir == None or args.output_dir == None: cmd = ['docker', 'run', '--rm', '-it']
if args.input_dir != None: if dirs.input_dir is not None:
ocr_args.append('-i') cmd += ['-v', dirs.input_dir + ':/input']
ocr_args.append('/files_for_ocr') args += ['-i', '/input']
if args.output_dir != None: if dirs.output_dir is not None:
ocr_args.append('-o') cmd += ['-v', dirs.output_dir + ':/output']
ocr_args.append('/files_from_ocr') args += ['-o', '/output']
subprocess.run(['docker', 'run', '--rm', '-it', container_image] + ocr_args) cmd.append(container_image)
else: cmd += args
subprocess.run(['docker', 'run', '--rm', '-it', '-v', args.input_dir + ':/files_for_ocr', '-v', args.output_dir + ':/files_from_ocr', container_image, '-i', '/files_for_ocr', '-o', '/files_from_ocr'] + ocr_args)
subprocess.run(cmd)