mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-25 17:24:18 +00:00
Add new models
This commit is contained in:
parent
ca7df6d0ed
commit
613bceb4ff
@ -65,7 +65,11 @@ RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \
|
||||
ENV TESSDATA_BEST_RELEASE=4.1.0
|
||||
ADD "https://github.com/tesseract-ocr/tessdata_best/archive/${TESSDATA_BEST_RELEASE}.tar.gz" .
|
||||
RUN tar -xzf "${TESSDATA_BEST_RELEASE}.tar.gz" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ara.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/chi_tra.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/dan.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/deu.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ell.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/eng.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/enm.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/fra.traineddata" "/usr/local/share/tessdata/" \
|
||||
@ -73,6 +77,7 @@ RUN tar -xzf "${TESSDATA_BEST_RELEASE}.tar.gz" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/frm.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ita.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/por.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/rus.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/spa.traineddata" "/usr/local/share/tessdata/" \
|
||||
&& rm -r "tessdata_best-${TESSDATA_BEST_RELEASE}" "${TESSDATA_BEST_RELEASE}.tar.gz"
|
||||
|
||||
|
@ -24,7 +24,7 @@ mkdir -p /<my_data_location>/input /<my_data_location>/output
|
||||
# Option one: Use the wrapper script
|
||||
## Install the wrapper script (only on first run). Get it from https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/raw/1.0.0/wrapper/ocr, make it executeable and add it to your ${PATH}
|
||||
cd /<my_data_location>
|
||||
ocr -i input -l <language_code> -o output <pipeline_arguments>
|
||||
ocr -i input -l <language_code> -o output <optional_pipeline_arguments>
|
||||
|
||||
# Option two: Classic Docker style
|
||||
docker run \
|
||||
@ -47,7 +47,7 @@ docker run \
|
||||
|
||||
`-l languagecode`
|
||||
* Tells tesseract which language will be used.
|
||||
* options = deu (German), eng (English), enm (Middle englisch), fra (French), frk (German Fraktur), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish)
|
||||
* options = ara (Arabic), chi_tra (Chinese - Traditional), dan (Danish), deu (German), ell (Greek, Modern (1453-)), eng (English), enm (Middle englisch), fra (French), frk (German Fraktur), frm (Middle french), ita (Italian), por (Portuguese), rus (Russian), spa (Spanish)
|
||||
* required = True
|
||||
|
||||
`--keep-intermediates`
|
||||
|
Loading…
Reference in New Issue
Block a user