FROM debian:10-slim


LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de"


ENV LANG=C.UTF-8


RUN apt-get update \
 && apt-get install -y --no-install-recommends \
      apt-transport-https \
      build-essential \
      ca-certificates \
      gnupg2 \
      ghostscript \
      poppler-utils \
      python2.7 \
      python3.7 \
      wget \
      zip


ENV OCROPY_VERSION 1.3.3
ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_VERSION}.tar.gz" .
RUN tar -xzf "v${OCROPY_VERSION}.tar.gz" \
 && cd "ocropy-${OCROPY_VERSION}" \
 && apt-get update \
 && apt-get install -y --no-install-recommends \
      python-pil \
      python-tk \
      $(cat PACKAGES) \
 && python2.7 setup.py install \
 && cd .. \
 && rm -rf \
      "ocropy-${OCROPY_VERSION}" \
      "v${OCROPY_VERSION}.tar.gz"


ENV PYFLOW_VERSION=1.1.20
ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" .
RUN tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \
 && cd "pyflow-${PYFLOW_VERSION}" \
 && python2.7 setup.py build install \
 && cd .. \
 && rm -rf \
      "pyflow-${PYFLOW_VERSION}" \
      "pyflow-${PYFLOW_VERSION}.tar.gz"


RUN echo "deb https://notesalexp.org/tesseract-ocr/buster/ buster main" >> /etc/apt/sources.list \
 && wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - \
 && apt-get update \
 && apt-get install -y --no-install-recommends \
      tesseract-ocr  \
      tesseract-ocr-deu \
      tesseract-ocr-eng \
      tesseract-ocr-enm \
      tesseract-ocr-fra \
      tesseract-ocr-frk \
      tesseract-ocr-frm \
      tesseract-ocr-ita \
      tesseract-ocr-por \
      tesseract-ocr-spa


RUN rm -rf /var/lib/apt/lists/*


COPY hocrtotei /usr/local/bin
COPY ocr /usr/local/bin


ENTRYPOINT ["ocr"]
CMD ["--help"]