FROM debian:9-slim # Define image metadata LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de" ENV LANG=C.UTF-8 # Install prerequisites RUN apt-get update \ && apt-get install -y --no-install-recommends \ apt-transport-https \ ca-certificates \ gnupg2 \ imagemagick \ poppler-utils \ python2.7 \ python3.5 \ wget \ && rm -rf /var/lib/apt/lists/* ENV OCROPY_VERSION 1.3.3 ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_VERSION}.tar.gz" . RUN tar -xzf "v${OCROPY_VERSION}.tar.gz" \ && cd "ocropy-${OCROPY_VERSION}" \ && apt-get update \ && apt-get install -y --no-install-recommends \ python-pil \ python-tk \ $(cat PACKAGES) \ && rm -rf /var/lib/apt/lists/* \ && python2.7 setup.py install \ && cd .. \ && rm -rf \ "ocropy-${OCROPY_VERSION}" \ "v${OCROPY_VERSION}.tar.gz" ENV PYFLOW_VERSION=1.1.20 ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" . RUN tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \ && cd "pyflow-${PYFLOW_VERSION}" \ && python2.7 setup.py build install \ && cd .. \ && rm -rf \ "pyflow-${PYFLOW_VERSION}" \ "pyflow-${PYFLOW_VERSION}.tar.gz" RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list \ && wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - \ && apt-get update \ && apt-get install -y --no-install-recommends \ tesseract-ocr \ tesseract-ocr-deu \ tesseract-ocr-eng \ tesseract-ocr-enm \ tesseract-ocr-fra \ tesseract-ocr-frk \ tesseract-ocr-frm \ tesseract-ocr-ita \ tesseract-ocr-por \ tesseract-ocr-spa \ && rm -rf /var/lib/apt/lists/* # Install OCR pipeline COPY hocrtotei /usr/local/bin COPY ocr /usr/local/bin ENTRYPOINT ["ocr"] CMD ["--help"]