Use new Dockerfile structure

This commit is contained in:
Patrick Jentsch 2020-10-08 23:09:10 +02:00
parent e941f64ee4
commit c0069d5453

View File

@ -10,13 +10,16 @@ ENV LANG=C.UTF-8
RUN apt-get update RUN apt-get update
# Install pipeline dependencies #
## Install pyFlow ## ## Install pyFlow ##
ENV PYFLOW_RELEASE=1.1.20 ENV PYFLOW_RELEASE=1.1.20
ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_RELEASE}/pyflow-${PYFLOW_RELEASE}.tar.gz" . ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_RELEASE}/pyflow-${PYFLOW_RELEASE}.tar.gz" .
RUN tar -xzf "pyflow-${PYFLOW_RELEASE}.tar.gz" \ RUN tar -xzf "pyflow-${PYFLOW_RELEASE}.tar.gz" \
&& cd "pyflow-${PYFLOW_RELEASE}" \ && cd "pyflow-${PYFLOW_RELEASE}" \
&& apt-get install -y --no-install-recommends \ && apt-get update \
&& apt-get install --no-install-recommends --yes \
python2.7 \ python2.7 \
&& rm -r /var/lib/apt/lists/* \
&& python2.7 setup.py build install \ && python2.7 setup.py build install \
&& cd .. \ && cd .. \
&& rm -r "pyflow-${PYFLOW_RELEASE}" "pyflow-${PYFLOW_RELEASE}.tar.gz" && rm -r "pyflow-${PYFLOW_RELEASE}" "pyflow-${PYFLOW_RELEASE}.tar.gz"
@ -27,10 +30,12 @@ ENV OCROPY_RELEASE 1.3.3
ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_RELEASE}.tar.gz" . ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_RELEASE}.tar.gz" .
RUN tar -xzf "v${OCROPY_RELEASE}.tar.gz" \ RUN tar -xzf "v${OCROPY_RELEASE}.tar.gz" \
&& cd "ocropy-${OCROPY_RELEASE}" \ && cd "ocropy-${OCROPY_RELEASE}" \
&& apt-get install -y --no-install-recommends \ && apt-get update \
&& apt-get install --no-install-recommends --yes \
python-pil \ python-pil \
python-tk \ python-tk \
$(cat PACKAGES) \ $(cat PACKAGES) \
&& rm -r /var/lib/apt/lists/* \
&& python2.7 setup.py install \ && python2.7 setup.py install \
&& cd .. \ && cd .. \
&& rm -r "ocropy-${OCROPY_RELEASE}" "v${OCROPY_RELEASE}.tar.gz" && rm -r "ocropy-${OCROPY_RELEASE}" "v${OCROPY_RELEASE}.tar.gz"
@ -41,7 +46,8 @@ ENV TESSERACT_RELEASE=4.1.1
ADD "https://github.com/tesseract-ocr/tesseract/archive/${TESSERACT_RELEASE}.tar.gz" . ADD "https://github.com/tesseract-ocr/tesseract/archive/${TESSERACT_RELEASE}.tar.gz" .
RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \ RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \
&& cd "tesseract-${TESSERACT_RELEASE}" \ && cd "tesseract-${TESSERACT_RELEASE}" \
&& apt-get install -y --no-install-recommends \ && apt-get update \
&& apt-get install --no-install-recommends --yes \
autoconf \ autoconf \
automake \ automake \
g++ \ g++ \
@ -53,6 +59,7 @@ RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \
make \ make \
pkg-config \ pkg-config \
zlib1g-dev \ zlib1g-dev \
&& rm -r /var/lib/apt/lists/* \
&& ./autogen.sh \ && ./autogen.sh \
&& ./configure \ && ./configure \
&& make \ && make \
@ -73,18 +80,19 @@ ADD "https://github.com/tesseract-ocr/tessdata_best/raw/master/deu.traineddata"
RUN chmod 644 /usr/local/share/tessdata/*.traineddata RUN chmod 644 /usr/local/share/tessdata/*.traineddata
## Install Pipeline ## ## Further dependencies ##
RUN apt-get install -y --no-install-recommends \ RUN apt-get update \
&& apt-get install --no-install-recommends --yes \
ghostscript \ ghostscript \
python-pip \ python-pip \
python3.7 \ python3.7 \
zip \ zip \
&& rm -r /var/lib/apt/lists/* \
&& pip install natsort && pip install natsort
COPY "hocrtotei" "ocr" "/usr/local/bin/"
## Cleanup ## ## Install Pipeline ##
RUN rm -r /var/lib/apt/lists/* COPY hocrtotei ocr /usr/local/bin/
ENTRYPOINT ["ocr"] ENTRYPOINT ["ocr"]