From ce864e205af6787b93e2888605424b4c54ddfbcc Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Wed, 10 Oct 2018 15:20:34 +0200
Subject: [PATCH] Added missing dependencies for ocropus.
---
Dockerfile | 3 +++
parse_hocr | 2 +-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/Dockerfile b/Dockerfile
index 5e048f1..8a7b0b5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -26,6 +26,8 @@ RUN apt-get update && \
poppler-utils \
python2.7 \
python3.6 \
+ python-pip \
+ python-tk \
tesseract-ocr \
wget
@@ -54,6 +56,7 @@ RUN wget -nv http://github.com/tesseract-ocr/tessdata_best/raw/master/deu.traine
RUN git clone http://github.com/tmbdev/ocropy && \
cd ocropy && \
apt-get install -y --no-install-recommends $(cat PACKAGES) && \
+ pip install -r requirements.txt && \
wget -nv http://www.tmbdev.net/en-default.pyrnn.gz && \
mv en-default.pyrnn.gz models/ && \
python2.7 setup.py install && \
diff --git a/parse_hocr b/parse_hocr
index 0e1f258..5a0ad2a 100755
--- a/parse_hocr
+++ b/parse_hocr
@@ -39,5 +39,5 @@ for input_file in input_files:
output_file.write(' \n')
output_file.write('