diff --git a/Dockerfile b/Dockerfile index 2a299fc..534a5c1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,13 +10,16 @@ ENV LANG=C.UTF-8 RUN apt-get update +# Install pipeline dependencies # ## Install pyFlow ## ENV PYFLOW_RELEASE=1.1.20 ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_RELEASE}/pyflow-${PYFLOW_RELEASE}.tar.gz" . RUN tar -xzf "pyflow-${PYFLOW_RELEASE}.tar.gz" \ && cd "pyflow-${PYFLOW_RELEASE}" \ - && apt-get install -y --no-install-recommends \ + && apt-get update \ + && apt-get install --no-install-recommends --yes \ python2.7 \ + && rm -r /var/lib/apt/lists/* \ && python2.7 setup.py build install \ && cd .. \ && rm -r "pyflow-${PYFLOW_RELEASE}" "pyflow-${PYFLOW_RELEASE}.tar.gz" @@ -27,10 +30,12 @@ ENV OCROPY_RELEASE 1.3.3 ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_RELEASE}.tar.gz" . RUN tar -xzf "v${OCROPY_RELEASE}.tar.gz" \ && cd "ocropy-${OCROPY_RELEASE}" \ - && apt-get install -y --no-install-recommends \ + && apt-get update \ + && apt-get install --no-install-recommends --yes \ python-pil \ python-tk \ $(cat PACKAGES) \ + && rm -r /var/lib/apt/lists/* \ && python2.7 setup.py install \ && cd .. \ && rm -r "ocropy-${OCROPY_RELEASE}" "v${OCROPY_RELEASE}.tar.gz" @@ -41,7 +46,8 @@ ENV TESSERACT_RELEASE=4.1.1 ADD "https://github.com/tesseract-ocr/tesseract/archive/${TESSERACT_RELEASE}.tar.gz" . RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \ && cd "tesseract-${TESSERACT_RELEASE}" \ - && apt-get install -y --no-install-recommends \ + && apt-get update \ + && apt-get install --no-install-recommends --yes \ autoconf \ automake \ g++ \ @@ -53,6 +59,7 @@ RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \ make \ pkg-config \ zlib1g-dev \ + && rm -r /var/lib/apt/lists/* \ && ./autogen.sh \ && ./configure \ && make \ @@ -73,18 +80,19 @@ ADD "https://github.com/tesseract-ocr/tessdata_best/raw/master/deu.traineddata" RUN chmod 644 /usr/local/share/tessdata/*.traineddata -## Install Pipeline ## -RUN apt-get install -y --no-install-recommends \ +## Further dependencies ## +RUN apt-get update \ + && apt-get install --no-install-recommends --yes \ ghostscript \ python-pip \ python3.7 \ zip \ + && rm -r /var/lib/apt/lists/* \ && pip install natsort -COPY "hocrtotei" "ocr" "/usr/local/bin/" -## Cleanup ## -RUN rm -r /var/lib/apt/lists/* +## Install Pipeline ## +COPY hocrtotei ocr /usr/local/bin/ ENTRYPOINT ["ocr"]