From 95adc4d80459f8c3cc50a11cd0d84082bb4d4db9 Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Sun, 2 Jun 2019 21:38:30 +0200 Subject: [PATCH] Update for unprivileged usage. --- Dockerfile | 3 +++ README.md | 26 +++++++++----------------- wrapper/ocr | 23 ++++++++++++----------- 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/Dockerfile b/Dockerfile index 578c4d1..58308dd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,4 +56,7 @@ RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /et COPY hocrtotei /usr/local/bin COPY ocr /usr/local/bin +mkdir /input /output +chmod a+rw /input /output + ENTRYPOINT ["ocr"] diff --git a/README.md b/README.md index d427e11..dc40133 100644 --- a/README.md +++ b/README.md @@ -42,12 +42,12 @@ mkdir -p //files_for_ocr //files_from_ocr docker run \ --rm \ -it \ - -v //files_for_ocr:/files_for_ocr \ - -v //files_from_ocr:/files_from_ocr \ + -v //files_for_ocr:/input \ + -v //files_from_ocr:/output \ sfb1288inf/ocr:latest \ - -i /files_for_ocr \ - -o /files_from_ocr \ - -l + -i /input \ + -l \ + -o /output ``` The arguments below `sfb1288inf/ocr:latest` are described in the [OCR arguments](#ocr-arguments) part. @@ -57,14 +57,6 @@ If you want to use the prebuilt image, replace `sfb1288inf/ocr:latest` with `git ### OCR arguments -`-i path` -* Sets the input directory using the specified path. -* required = True - -`-o path` -* Sets the output directory using the specified path. -* required = True - `-l languagecode` * Tells tesseract which language will be used. * options = deu (German), deu_frak (German Fraktur), eng (English), enm (Middle englisch), fra (French), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish) @@ -90,12 +82,12 @@ Example with all arguments used: docker run \ --rm \ -it \ - -v $HOME/ocr/files_for_ocr:/files_for_ocr \ - -v $HOME/ocr/files_from_ocr:/files_from_ocr \ + -v "$HOME"/ocr/files_for_ocr:/input \ + -v "$HOME"/ocr/files_from_ocr:/output \ sfb1288inf/ocr:latest \ - -i /files_for_ocr \ - -o /files_from_ocr \ + -i /input \ -l eng \ + -o /output \ --keep_intermediates \ --nCores 8 \ --skip-binarisation diff --git a/wrapper/ocr b/wrapper/ocr index 7f74cd7..398f06a 100755 --- a/wrapper/ocr +++ b/wrapper/ocr @@ -17,15 +17,16 @@ parser.add_argument( dest='output_dir', required=False ) -args, ocr_args = parser.parse_known_args() +dirs, args = parser.parse_known_args() -if args.input_dir == None or args.output_dir == None: - if args.input_dir != None: - ocr_args.append('-i') - ocr_args.append('/files_for_ocr') - if args.output_dir != None: - ocr_args.append('-o') - ocr_args.append('/files_from_ocr') - subprocess.run(['docker', 'run', '--rm', '-it', container_image] + ocr_args) -else: - subprocess.run(['docker', 'run', '--rm', '-it', '-v', args.input_dir + ':/files_for_ocr', '-v', args.output_dir + ':/files_from_ocr', container_image, '-i', '/files_for_ocr', '-o', '/files_from_ocr'] + ocr_args) +cmd = ['docker', 'run', '--rm', '-it'] +if dirs.input_dir is not None: + cmd += ['-v', dirs.input_dir + ':/input'] + args += ['-i', '/input'] +if dirs.output_dir is not None: + cmd += ['-v', dirs.output_dir + ':/output'] + args += ['-o', '/output'] +cmd.append(container_image) +cmd += args + +subprocess.run(cmd)