ocr/Dockerfile

75 lines
1.9 KiB
Docker
Raw Normal View History

2019-06-02 21:45:11 +02:00
FROM debian:9-slim
2018-10-09 14:43:23 +02:00
2019-09-11 15:15:00 +02:00
# Define image metadata
2019-05-13 15:03:43 +02:00
LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de"
2018-10-09 14:43:23 +02:00
2019-09-11 15:15:00 +02:00
2019-09-12 11:30:52 +02:00
ENV LANG=C.UTF-8
2019-09-11 15:15:00 +02:00
# Install prerequisites
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
apt-transport-https \
ca-certificates \
gnupg2 \
imagemagick \
poppler-utils \
python2.7 \
python3.5 \
wget \
2020-01-20 15:04:55 +01:00
zip \
2019-09-11 15:15:00 +02:00
&& rm -rf /var/lib/apt/lists/*
2018-10-29 10:49:19 +01:00
ENV OCROPY_VERSION 1.3.3
2019-09-11 15:15:00 +02:00
ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_VERSION}.tar.gz" .
RUN tar -xzf "v${OCROPY_VERSION}.tar.gz" \
&& cd "ocropy-${OCROPY_VERSION}" \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python-pil \
python-tk \
$(cat PACKAGES) \
&& rm -rf /var/lib/apt/lists/* \
&& python2.7 setup.py install \
&& cd .. \
&& rm -rf \
"ocropy-${OCROPY_VERSION}" \
"v${OCROPY_VERSION}.tar.gz"
ENV PYFLOW_VERSION=1.1.20
ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" .
RUN tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \
&& cd "pyflow-${PYFLOW_VERSION}" \
&& python2.7 setup.py build install \
&& cd .. \
&& rm -rf \
"pyflow-${PYFLOW_VERSION}" \
"pyflow-${PYFLOW_VERSION}.tar.gz"
2018-10-29 10:38:50 +01:00
2019-09-11 15:15:00 +02:00
RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list \
&& wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
tesseract-ocr \
tesseract-ocr-deu \
tesseract-ocr-eng \
tesseract-ocr-enm \
tesseract-ocr-fra \
tesseract-ocr-frk \
tesseract-ocr-frm \
tesseract-ocr-ita \
tesseract-ocr-por \
tesseract-ocr-spa \
&& rm -rf /var/lib/apt/lists/*
# Install OCR pipeline
2019-03-10 21:04:14 +01:00
COPY hocrtotei /usr/local/bin
2019-05-13 15:03:43 +02:00
COPY ocr /usr/local/bin
2018-10-09 14:43:23 +02:00
2019-09-11 15:15:00 +02:00
2019-05-13 15:03:43 +02:00
ENTRYPOINT ["ocr"]
2019-09-11 15:15:00 +02:00
CMD ["--help"]