diff --git a/Dockerfile b/Dockerfile index 5e048f1..8a7b0b5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,6 +26,8 @@ RUN apt-get update && \ poppler-utils \ python2.7 \ python3.6 \ + python-pip \ + python-tk \ tesseract-ocr \ wget @@ -54,6 +56,7 @@ RUN wget -nv http://github.com/tesseract-ocr/tessdata_best/raw/master/deu.traine RUN git clone http://github.com/tmbdev/ocropy && \ cd ocropy && \ apt-get install -y --no-install-recommends $(cat PACKAGES) && \ + pip install -r requirements.txt && \ wget -nv http://www.tmbdev.net/en-default.pyrnn.gz && \ mv en-default.pyrnn.gz models/ && \ python2.7 setup.py install && \ diff --git a/parse_hocr b/parse_hocr index 0e1f258..5a0ad2a 100755 --- a/parse_hocr +++ b/parse_hocr @@ -39,5 +39,5 @@ for input_file in input_files: output_file.write('

\n') output_file.write(' \n' + ' \n' + - '\n') + '') output_file.close() \ No newline at end of file