Add new models

This commit is contained in:
Patrick Jentsch 2021-02-23 11:11:50 +01:00
parent ca7df6d0ed
commit 613bceb4ff
2 changed files with 7 additions and 2 deletions

View File

@ -65,7 +65,11 @@ RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \
ENV TESSDATA_BEST_RELEASE=4.1.0
ADD "https://github.com/tesseract-ocr/tessdata_best/archive/${TESSDATA_BEST_RELEASE}.tar.gz" .
RUN tar -xzf "${TESSDATA_BEST_RELEASE}.tar.gz" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ara.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/chi_tra.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/dan.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/deu.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ell.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/eng.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/enm.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/fra.traineddata" "/usr/local/share/tessdata/" \
@ -73,6 +77,7 @@ RUN tar -xzf "${TESSDATA_BEST_RELEASE}.tar.gz" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/frm.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ita.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/por.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/rus.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/spa.traineddata" "/usr/local/share/tessdata/" \
&& rm -r "tessdata_best-${TESSDATA_BEST_RELEASE}" "${TESSDATA_BEST_RELEASE}.tar.gz"

View File

@ -24,7 +24,7 @@ mkdir -p /<my_data_location>/input /<my_data_location>/output
# Option one: Use the wrapper script
## Install the wrapper script (only on first run). Get it from https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/raw/1.0.0/wrapper/ocr, make it executeable and add it to your ${PATH}
cd /<my_data_location>
ocr -i input -l <language_code> -o output <pipeline_arguments>
ocr -i input -l <language_code> -o output <optional_pipeline_arguments>
# Option two: Classic Docker style
docker run \
@ -47,7 +47,7 @@ docker run \
`-l languagecode`
* Tells tesseract which language will be used.
* options = deu (German), eng (English), enm (Middle englisch), fra (French), frk (German Fraktur), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish)
* options = ara (Arabic), chi_tra (Chinese - Traditional), dan (Danish), deu (German), ell (Greek, Modern (1453-)), eng (English), enm (Middle englisch), fra (French), frk (German Fraktur), frm (Middle french), ita (Italian), por (Portuguese), rus (Russian), spa (Spanish)
* required = True
`--keep-intermediates`