From cebc53da034277f94a7c61cc2c05f408b3f0d16a Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Wed, 11 Sep 2019 15:15:00 +0200
Subject: [PATCH] Codestyle
---
Dockerfile | 101 +++++++++++++++++++++++++++++------------------------
1 file changed, 56 insertions(+), 45 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index 704d5a2..e48b895 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,59 +1,70 @@
FROM debian:9-slim
+
+# Define image metadata
LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de"
-ENV DEBIAN_FRONTEND=noninteractive
-ENV LANG=C.UTF-8
-RUN apt-get update && \
- apt-get install -y --no-install-recommends \
- apt-transport-https \
- ca-certificates \
- gnupg2 \
- imagemagick \
- poppler-utils \
- python2.7 \
- python3.5 \
- python-numpy \
- wget
+# Install prerequisites
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+ apt-transport-https \
+ ca-certificates \
+ gnupg2 \
+ imagemagick \
+ poppler-utils \
+ python2.7 \
+ python3.5 \
+ wget \
+ && rm -rf /var/lib/apt/lists/*
-# Install ocropy
ENV OCROPY_VERSION 1.3.3
-RUN wget -nv https://github.com/tmbdev/ocropy/archive/v"$OCROPY_VERSION".tar.gz && \
- tar -xzf v"$OCROPY_VERSION".tar.gz && \
- cd ocropy-"$OCROPY_VERSION" && \
- apt-get install -y --no-install-recommends $(cat PACKAGES) python-pil python-tk && \
- wget -nv http://www.tmbdev.net/en-default.pyrnn.gz -P models/ && \
- python2.7 setup.py install && \
- cd .. && \
- rm -r ocropy-"$OCROPY_VERSION" v"$OCROPY_VERSION".tar.gz
+ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_VERSION}.tar.gz" .
+RUN tar -xzf "v${OCROPY_VERSION}.tar.gz" \
+ && cd "ocropy-${OCROPY_VERSION}" \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends \
+ python-pil \
+ python-tk \
+ $(cat PACKAGES) \
+ && rm -rf /var/lib/apt/lists/* \
+ && python2.7 setup.py install \
+ && cd .. \
+ && rm -rf \
+ "ocropy-${OCROPY_VERSION}" \
+ "v${OCROPY_VERSION}.tar.gz"
-# Install pyFlow
-ENV PYFLOW_VERSION 1.1.20
-RUN wget -nv https://github.com/Illumina/pyflow/releases/download/v"$PYFLOW_VERSION"/pyflow-"$PYFLOW_VERSION".tar.gz && \
- tar -xzf pyflow-"$PYFLOW_VERSION".tar.gz && \
- cd pyflow-"$PYFLOW_VERSION" && \
- python2.7 setup.py build install && \
- cd .. && \
- rm -r pyflow-"$PYFLOW_VERSION" pyflow-"$PYFLOW_VERSION".tar.gz
+ENV PYFLOW_VERSION=1.1.20
+ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" .
+RUN tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \
+ && cd "pyflow-${PYFLOW_VERSION}" \
+ && python2.7 setup.py build install \
+ && cd .. \
+ && rm -rf \
+ "pyflow-${PYFLOW_VERSION}" \
+ "pyflow-${PYFLOW_VERSION}.tar.gz"
-# Install Tesseract OCR and Data Files
-RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list && \
- wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - && \
- apt-get update && \
- apt-get install -y --no-install-recommends \
- tesseract-ocr \
- tesseract-ocr-deu \
- tesseract-ocr-eng \
- tesseract-ocr-enm \
- tesseract-ocr-fra \
- tesseract-ocr-frk \
- tesseract-ocr-frm \
- tesseract-ocr-ita \
- tesseract-ocr-por \
- tesseract-ocr-spa
+RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list \
+ && wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends \
+ tesseract-ocr \
+ tesseract-ocr-deu \
+ tesseract-ocr-eng \
+ tesseract-ocr-enm \
+ tesseract-ocr-fra \
+ tesseract-ocr-frk \
+ tesseract-ocr-frm \
+ tesseract-ocr-ita \
+ tesseract-ocr-por \
+ tesseract-ocr-spa \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install OCR pipeline
COPY hocrtotei /usr/local/bin
COPY ocr /usr/local/bin
+
ENTRYPOINT ["ocr"]
+CMD ["--help"]