mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-27 06:04:18 +00:00
Add new models
This commit is contained in:
parent
ca7df6d0ed
commit
613bceb4ff
@ -65,7 +65,11 @@ RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \
|
|||||||
ENV TESSDATA_BEST_RELEASE=4.1.0
|
ENV TESSDATA_BEST_RELEASE=4.1.0
|
||||||
ADD "https://github.com/tesseract-ocr/tessdata_best/archive/${TESSDATA_BEST_RELEASE}.tar.gz" .
|
ADD "https://github.com/tesseract-ocr/tessdata_best/archive/${TESSDATA_BEST_RELEASE}.tar.gz" .
|
||||||
RUN tar -xzf "${TESSDATA_BEST_RELEASE}.tar.gz" \
|
RUN tar -xzf "${TESSDATA_BEST_RELEASE}.tar.gz" \
|
||||||
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ara.traineddata" "/usr/local/share/tessdata/" \
|
||||||
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/chi_tra.traineddata" "/usr/local/share/tessdata/" \
|
||||||
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/dan.traineddata" "/usr/local/share/tessdata/" \
|
||||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/deu.traineddata" "/usr/local/share/tessdata/" \
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/deu.traineddata" "/usr/local/share/tessdata/" \
|
||||||
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ell.traineddata" "/usr/local/share/tessdata/" \
|
||||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/eng.traineddata" "/usr/local/share/tessdata/" \
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/eng.traineddata" "/usr/local/share/tessdata/" \
|
||||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/enm.traineddata" "/usr/local/share/tessdata/" \
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/enm.traineddata" "/usr/local/share/tessdata/" \
|
||||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/fra.traineddata" "/usr/local/share/tessdata/" \
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/fra.traineddata" "/usr/local/share/tessdata/" \
|
||||||
@ -73,6 +77,7 @@ RUN tar -xzf "${TESSDATA_BEST_RELEASE}.tar.gz" \
|
|||||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/frm.traineddata" "/usr/local/share/tessdata/" \
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/frm.traineddata" "/usr/local/share/tessdata/" \
|
||||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ita.traineddata" "/usr/local/share/tessdata/" \
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ita.traineddata" "/usr/local/share/tessdata/" \
|
||||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/por.traineddata" "/usr/local/share/tessdata/" \
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/por.traineddata" "/usr/local/share/tessdata/" \
|
||||||
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/rus.traineddata" "/usr/local/share/tessdata/" \
|
||||||
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/spa.traineddata" "/usr/local/share/tessdata/" \
|
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/spa.traineddata" "/usr/local/share/tessdata/" \
|
||||||
&& rm -r "tessdata_best-${TESSDATA_BEST_RELEASE}" "${TESSDATA_BEST_RELEASE}.tar.gz"
|
&& rm -r "tessdata_best-${TESSDATA_BEST_RELEASE}" "${TESSDATA_BEST_RELEASE}.tar.gz"
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ mkdir -p /<my_data_location>/input /<my_data_location>/output
|
|||||||
# Option one: Use the wrapper script
|
# Option one: Use the wrapper script
|
||||||
## Install the wrapper script (only on first run). Get it from https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/raw/1.0.0/wrapper/ocr, make it executeable and add it to your ${PATH}
|
## Install the wrapper script (only on first run). Get it from https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/raw/1.0.0/wrapper/ocr, make it executeable and add it to your ${PATH}
|
||||||
cd /<my_data_location>
|
cd /<my_data_location>
|
||||||
ocr -i input -l <language_code> -o output <pipeline_arguments>
|
ocr -i input -l <language_code> -o output <optional_pipeline_arguments>
|
||||||
|
|
||||||
# Option two: Classic Docker style
|
# Option two: Classic Docker style
|
||||||
docker run \
|
docker run \
|
||||||
@ -47,7 +47,7 @@ docker run \
|
|||||||
|
|
||||||
`-l languagecode`
|
`-l languagecode`
|
||||||
* Tells tesseract which language will be used.
|
* Tells tesseract which language will be used.
|
||||||
* options = deu (German), eng (English), enm (Middle englisch), fra (French), frk (German Fraktur), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish)
|
* options = ara (Arabic), chi_tra (Chinese - Traditional), dan (Danish), deu (German), ell (Greek, Modern (1453-)), eng (English), enm (Middle englisch), fra (French), frk (German Fraktur), frm (Middle french), ita (Italian), por (Portuguese), rus (Russian), spa (Spanish)
|
||||||
* required = True
|
* required = True
|
||||||
|
|
||||||
`--keep-intermediates`
|
`--keep-intermediates`
|
||||||
|
Loading…
Reference in New Issue
Block a user