Added missing dependencies for ocropus.

This commit is contained in:
Patrick Jentsch 2018-10-10 15:20:34 +02:00
parent aa48ea6ed2
commit ce864e205a
2 changed files with 4 additions and 1 deletions

View File

@ -26,6 +26,8 @@ RUN apt-get update && \
poppler-utils \ poppler-utils \
python2.7 \ python2.7 \
python3.6 \ python3.6 \
python-pip \
python-tk \
tesseract-ocr \ tesseract-ocr \
wget wget
@ -54,6 +56,7 @@ RUN wget -nv http://github.com/tesseract-ocr/tessdata_best/raw/master/deu.traine
RUN git clone http://github.com/tmbdev/ocropy && \ RUN git clone http://github.com/tmbdev/ocropy && \
cd ocropy && \ cd ocropy && \
apt-get install -y --no-install-recommends $(cat PACKAGES) && \ apt-get install -y --no-install-recommends $(cat PACKAGES) && \
pip install -r requirements.txt && \
wget -nv http://www.tmbdev.net/en-default.pyrnn.gz && \ wget -nv http://www.tmbdev.net/en-default.pyrnn.gz && \
mv en-default.pyrnn.gz models/ && \ mv en-default.pyrnn.gz models/ && \
python2.7 setup.py install && \ python2.7 setup.py install && \

View File

@ -39,5 +39,5 @@ for input_file in input_files:
output_file.write(' </p>\n') output_file.write(' </p>\n')
output_file.write(' </body>\n' + output_file.write(' </body>\n' +
' </text>\n' + ' </text>\n' +
'</TEI>\n') '</TEI>')
output_file.close() output_file.close()