2020-04-06 07:21:52 +00:00
|
|
|
FROM debian:10-slim
|
2018-10-09 12:43:23 +00:00
|
|
|
|
2019-09-11 13:15:00 +00:00
|
|
|
|
2019-05-13 13:03:43 +00:00
|
|
|
LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de"
|
2018-10-09 12:43:23 +00:00
|
|
|
|
2019-09-11 13:15:00 +00:00
|
|
|
|
2019-09-12 09:30:52 +00:00
|
|
|
ENV LANG=C.UTF-8
|
|
|
|
|
|
|
|
|
2019-09-11 13:15:00 +00:00
|
|
|
RUN apt-get update \
|
|
|
|
&& apt-get install -y --no-install-recommends \
|
|
|
|
apt-transport-https \
|
2020-04-06 07:21:52 +00:00
|
|
|
build-essential \
|
2019-09-11 13:15:00 +00:00
|
|
|
ca-certificates \
|
|
|
|
gnupg2 \
|
|
|
|
imagemagick \
|
|
|
|
poppler-utils \
|
|
|
|
python2.7 \
|
2020-04-06 07:21:52 +00:00
|
|
|
python3.7 \
|
2019-09-11 13:15:00 +00:00
|
|
|
wget \
|
2020-04-06 07:21:52 +00:00
|
|
|
zip
|
|
|
|
|
2019-09-11 13:15:00 +00:00
|
|
|
|
2018-10-29 09:49:19 +00:00
|
|
|
ENV OCROPY_VERSION 1.3.3
|
2019-09-11 13:15:00 +00:00
|
|
|
ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_VERSION}.tar.gz" .
|
|
|
|
RUN tar -xzf "v${OCROPY_VERSION}.tar.gz" \
|
|
|
|
&& cd "ocropy-${OCROPY_VERSION}" \
|
|
|
|
&& apt-get update \
|
|
|
|
&& apt-get install -y --no-install-recommends \
|
|
|
|
python-pil \
|
|
|
|
python-tk \
|
|
|
|
$(cat PACKAGES) \
|
|
|
|
&& python2.7 setup.py install \
|
|
|
|
&& cd .. \
|
|
|
|
&& rm -rf \
|
|
|
|
"ocropy-${OCROPY_VERSION}" \
|
|
|
|
"v${OCROPY_VERSION}.tar.gz"
|
|
|
|
|
2020-04-06 07:21:52 +00:00
|
|
|
|
2019-09-11 13:15:00 +00:00
|
|
|
ENV PYFLOW_VERSION=1.1.20
|
|
|
|
ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" .
|
|
|
|
RUN tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \
|
|
|
|
&& cd "pyflow-${PYFLOW_VERSION}" \
|
|
|
|
&& python2.7 setup.py build install \
|
|
|
|
&& cd .. \
|
|
|
|
&& rm -rf \
|
|
|
|
"pyflow-${PYFLOW_VERSION}" \
|
|
|
|
"pyflow-${PYFLOW_VERSION}.tar.gz"
|
2018-10-29 09:38:50 +00:00
|
|
|
|
2020-04-06 07:21:52 +00:00
|
|
|
|
|
|
|
RUN echo "deb https://notesalexp.org/tesseract-ocr/buster/ buster main" >> /etc/apt/sources.list \
|
2019-09-11 13:15:00 +00:00
|
|
|
&& wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - \
|
|
|
|
&& apt-get update \
|
|
|
|
&& apt-get install -y --no-install-recommends \
|
|
|
|
tesseract-ocr \
|
|
|
|
tesseract-ocr-deu \
|
|
|
|
tesseract-ocr-eng \
|
|
|
|
tesseract-ocr-enm \
|
|
|
|
tesseract-ocr-fra \
|
|
|
|
tesseract-ocr-frk \
|
|
|
|
tesseract-ocr-frm \
|
|
|
|
tesseract-ocr-ita \
|
|
|
|
tesseract-ocr-por \
|
2020-04-06 07:21:52 +00:00
|
|
|
tesseract-ocr-spa
|
|
|
|
|
|
|
|
|
|
|
|
RUN rm -rf /var/lib/apt/lists/*
|
2019-09-11 13:15:00 +00:00
|
|
|
|
|
|
|
|
2019-03-10 20:04:14 +00:00
|
|
|
COPY hocrtotei /usr/local/bin
|
2019-05-13 13:03:43 +00:00
|
|
|
COPY ocr /usr/local/bin
|
2018-10-09 12:43:23 +00:00
|
|
|
|
2019-09-11 13:15:00 +00:00
|
|
|
|
2019-05-13 13:03:43 +00:00
|
|
|
ENTRYPOINT ["ocr"]
|
2019-09-11 13:15:00 +00:00
|
|
|
CMD ["--help"]
|