diff --git a/Dockerfile b/Dockerfile index c848bad..ca91973 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,26 +1,25 @@ -FROM debian:9-slim +FROM debian:10-slim -# Define image metadata LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de" ENV LANG=C.UTF-8 -# Install prerequisites RUN apt-get update \ && apt-get install -y --no-install-recommends \ apt-transport-https \ + build-essential \ ca-certificates \ gnupg2 \ imagemagick \ poppler-utils \ python2.7 \ - python3.5 \ + python3.7 \ wget \ - zip \ - && rm -rf /var/lib/apt/lists/* + zip + ENV OCROPY_VERSION 1.3.3 ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_VERSION}.tar.gz" . @@ -31,13 +30,13 @@ RUN tar -xzf "v${OCROPY_VERSION}.tar.gz" \ python-pil \ python-tk \ $(cat PACKAGES) \ - && rm -rf /var/lib/apt/lists/* \ && python2.7 setup.py install \ && cd .. \ && rm -rf \ "ocropy-${OCROPY_VERSION}" \ "v${OCROPY_VERSION}.tar.gz" + ENV PYFLOW_VERSION=1.1.20 ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" . RUN tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \ @@ -48,7 +47,8 @@ RUN tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \ "pyflow-${PYFLOW_VERSION}" \ "pyflow-${PYFLOW_VERSION}.tar.gz" -RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list \ + +RUN echo "deb https://notesalexp.org/tesseract-ocr/buster/ buster main" >> /etc/apt/sources.list \ && wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - \ && apt-get update \ && apt-get install -y --no-install-recommends \ @@ -61,11 +61,12 @@ RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /et tesseract-ocr-frm \ tesseract-ocr-ita \ tesseract-ocr-por \ - tesseract-ocr-spa \ - && rm -rf /var/lib/apt/lists/* + tesseract-ocr-spa + + +RUN rm -rf /var/lib/apt/lists/* -# Install OCR pipeline COPY hocrtotei /usr/local/bin COPY ocr /usr/local/bin diff --git a/hocrtotei b/hocrtotei index a762d99..5f33a93 100755 --- a/hocrtotei +++ b/hocrtotei @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3.7 # coding=utf-8 from xml.sax.saxutils import escape