diff --git a/README.md b/README.md index e59b8b8..265cbd8 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ mkdir -p //input //output 2. Place your PDF files inside `//input`. Files should all contain text of the same language. -3. Start the pipeline process. Check the [Pipeline arguments](#pipeline-arguments) section for more details. +3. Start the pipeline process. Check the pipeline help (`ocr --help`) for more details. ``` # Option one: Use the wrapper script ## Install the wrapper script (only on first run). Get it from https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/raw/development/wrapper/ocr, make it executeable and add it to your ${PATH} @@ -42,51 +42,3 @@ docker run \ ``` 4. Check your results in the `//output` directory. - -### Pipeline arguments - -#### Mandatory arguments - -`-i, --input-dir INPUT_DIR` -* Input directory - -`-o, --output-dir OUTPUT_DIR` -* Output directory - -`-l, --language {spa,fra,dan,deu,eng,frm,chi_tra,ara,enm,ita,ell,frk,rus,por}` -* Language of the input (3-character ISO 639-2 language codes) - -#### Optional arguments - -`--binarize` -* Add binarization as a preprocessing step - -`--log-dir` -* Logging directory - -`--mem-mb` -* Amount of system memory to be used (Default: min(--n-cores * 2048, available system memory)) - -`--n-cores` -* Number of CPU threads to be used (Default: min(4, available CPU cores)) - -`-v, --version` -* Returns the current version of the OCR pipeline - -``` bash -# Example with all arguments used -docker run \ - --rm \ - -it \ - -u $(id -u $USER):$(id -g $USER) \ - -v //input:/ocr_pipeline/input \ - -v //output:/ocr_pipeline/output \ - -v //logs:/ocr_pipeline/logs \ - gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr:development \ - -i /ocr_pipeline/input \ - -l eng \ - -o /ocr_pipeline/output \ - --binarize \ - --log-dir /ocr_pipeline/logs \ - --n-cores 8 \ -``` diff --git a/ocr b/ocr index 262a758..e15967a 100755 --- a/ocr +++ b/ocr @@ -322,7 +322,7 @@ def parse_args(): type=int) parser.add_argument('--n-cores', default=min(4, multiprocessing.cpu_count()), - help='Number of CPU threads to be used', + help='Number of CPU threads to be used (Default: min(4, number of CPUs))', # noqa type=int) parser.add_argument('--zip', help='Create one zip file per filetype')