diff --git a/README.md b/README.md index 412e9c5..85624d9 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ This software implements a heavily parallelized pipeline to recognize text in PD 1. Install Docker and Python 3. 2. Clone this repository: `git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git` -2. Build the Docker image: `docker build -t gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr:0.1.0 ocr` +2. Build the Docker image: `docker build -t gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr:v0.1.0 ocr` 2. Add the wrapper script (`wrapper/ocr` relative to this README file) to your `${PATH}`. 3. Create working directories for the pipeline: `mkdir -p //{input,models,output}`. 4. Place your Tesseract OCR model(s) inside `//models`. diff --git a/wrapper/ocr b/wrapper/ocr index 37dad98..58a0bca 100755 --- a/wrapper/ocr +++ b/wrapper/ocr @@ -6,7 +6,7 @@ import os import subprocess import sys -CONTAINER_IMAGE = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr:0.1.0' +CONTAINER_IMAGE = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr:v0.1.0' CONTAINER_INPUT_DIR = '/input' CONTAINER_OUTPUT_DIR = '/output' CONTAINER_MODELS_DIR = '/usr/local/share/tessdata'