Codestyle

This commit is contained in:
Patrick Jentsch 2019-09-11 15:15:00 +02:00
parent 1fd85d1b44
commit cebc53da03

View File

@ -1,12 +1,13 @@
FROM debian:9-slim FROM debian:9-slim
# Define image metadata
LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de" LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de"
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
RUN apt-get update && \ # Install prerequisites
apt-get install -y --no-install-recommends \ RUN apt-get update \
&& apt-get install -y --no-install-recommends \
apt-transport-https \ apt-transport-https \
ca-certificates \ ca-certificates \
gnupg2 \ gnupg2 \
@ -14,34 +15,39 @@ RUN apt-get update && \
poppler-utils \ poppler-utils \
python2.7 \ python2.7 \
python3.5 \ python3.5 \
python-numpy \ wget \
wget && rm -rf /var/lib/apt/lists/*
# Install ocropy
ENV OCROPY_VERSION 1.3.3 ENV OCROPY_VERSION 1.3.3
RUN wget -nv https://github.com/tmbdev/ocropy/archive/v"$OCROPY_VERSION".tar.gz && \ ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_VERSION}.tar.gz" .
tar -xzf v"$OCROPY_VERSION".tar.gz && \ RUN tar -xzf "v${OCROPY_VERSION}.tar.gz" \
cd ocropy-"$OCROPY_VERSION" && \ && cd "ocropy-${OCROPY_VERSION}" \
apt-get install -y --no-install-recommends $(cat PACKAGES) python-pil python-tk && \ && apt-get update \
wget -nv http://www.tmbdev.net/en-default.pyrnn.gz -P models/ && \ && apt-get install -y --no-install-recommends \
python2.7 setup.py install && \ python-pil \
cd .. && \ python-tk \
rm -r ocropy-"$OCROPY_VERSION" v"$OCROPY_VERSION".tar.gz $(cat PACKAGES) \
&& rm -rf /var/lib/apt/lists/* \
&& python2.7 setup.py install \
&& cd .. \
&& rm -rf \
"ocropy-${OCROPY_VERSION}" \
"v${OCROPY_VERSION}.tar.gz"
# Install pyFlow ENV PYFLOW_VERSION=1.1.20
ENV PYFLOW_VERSION 1.1.20 ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" .
RUN wget -nv https://github.com/Illumina/pyflow/releases/download/v"$PYFLOW_VERSION"/pyflow-"$PYFLOW_VERSION".tar.gz && \ RUN tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \
tar -xzf pyflow-"$PYFLOW_VERSION".tar.gz && \ && cd "pyflow-${PYFLOW_VERSION}" \
cd pyflow-"$PYFLOW_VERSION" && \ && python2.7 setup.py build install \
python2.7 setup.py build install && \ && cd .. \
cd .. && \ && rm -rf \
rm -r pyflow-"$PYFLOW_VERSION" pyflow-"$PYFLOW_VERSION".tar.gz "pyflow-${PYFLOW_VERSION}" \
"pyflow-${PYFLOW_VERSION}.tar.gz"
# Install Tesseract OCR and Data Files RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list \
RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list && \ && wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - \
wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - && \ && apt-get update \
apt-get update && \ && apt-get install -y --no-install-recommends \
apt-get install -y --no-install-recommends \
tesseract-ocr \ tesseract-ocr \
tesseract-ocr-deu \ tesseract-ocr-deu \
tesseract-ocr-eng \ tesseract-ocr-eng \
@ -51,9 +57,14 @@ RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /et
tesseract-ocr-frm \ tesseract-ocr-frm \
tesseract-ocr-ita \ tesseract-ocr-ita \
tesseract-ocr-por \ tesseract-ocr-por \
tesseract-ocr-spa tesseract-ocr-spa \
&& rm -rf /var/lib/apt/lists/*
# Install OCR pipeline
COPY hocrtotei /usr/local/bin COPY hocrtotei /usr/local/bin
COPY ocr /usr/local/bin COPY ocr /usr/local/bin
ENTRYPOINT ["ocr"] ENTRYPOINT ["ocr"]
CMD ["--help"]