From 613bceb4ff32040be7829e6601ccbd4dd340f26c Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Tue, 23 Feb 2021 11:11:50 +0100
Subject: [PATCH] Add new models
---
Dockerfile | 5 +++++
README.md | 4 ++--
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index 2e07b90..f42246f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -65,7 +65,11 @@ RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \
ENV TESSDATA_BEST_RELEASE=4.1.0
ADD "https://github.com/tesseract-ocr/tessdata_best/archive/${TESSDATA_BEST_RELEASE}.tar.gz" .
RUN tar -xzf "${TESSDATA_BEST_RELEASE}.tar.gz" \
+ && mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ara.traineddata" "/usr/local/share/tessdata/" \
+ && mv "tessdata_best-${TESSDATA_BEST_RELEASE}/chi_tra.traineddata" "/usr/local/share/tessdata/" \
+ && mv "tessdata_best-${TESSDATA_BEST_RELEASE}/dan.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/deu.traineddata" "/usr/local/share/tessdata/" \
+ && mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ell.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/eng.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/enm.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/fra.traineddata" "/usr/local/share/tessdata/" \
@@ -73,6 +77,7 @@ RUN tar -xzf "${TESSDATA_BEST_RELEASE}.tar.gz" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/frm.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ita.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/por.traineddata" "/usr/local/share/tessdata/" \
+ && mv "tessdata_best-${TESSDATA_BEST_RELEASE}/rus.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/spa.traineddata" "/usr/local/share/tessdata/" \
&& rm -r "tessdata_best-${TESSDATA_BEST_RELEASE}" "${TESSDATA_BEST_RELEASE}.tar.gz"
diff --git a/README.md b/README.md
index c371cff..d4ba53b 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ mkdir -p //input //output
# Option one: Use the wrapper script
## Install the wrapper script (only on first run). Get it from https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/raw/1.0.0/wrapper/ocr, make it executeable and add it to your ${PATH}
cd /
-ocr -i input -l -o output
+ocr -i input -l -o output
# Option two: Classic Docker style
docker run \
@@ -47,7 +47,7 @@ docker run \
`-l languagecode`
* Tells tesseract which language will be used.
-* options = deu (German), eng (English), enm (Middle englisch), fra (French), frk (German Fraktur), frm (Middle french), ita (Italian), por (Portuguese), spa (Spanish)
+* options = ara (Arabic), chi_tra (Chinese - Traditional), dan (Danish), deu (German), ell (Greek, Modern (1453-)), eng (English), enm (Middle englisch), fra (French), frk (German Fraktur), frm (Middle french), ita (Italian), por (Portuguese), rus (Russian), spa (Spanish)
* required = True
`--keep-intermediates`