First work on version 1.0.0

This commit is contained in:
Patrick Jentsch
2021-02-19 13:04:03 +01:00
parent 07635dcdfa
commit ca7df6d0ed
5 changed files with 73 additions and 80 deletions

View File

@ -1,7 +1,7 @@
FROM debian:buster-slim
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>, Stephan Porada <sporada@uni-bielefeld.de>"
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
ENV LANG=C.UTF-8
@ -16,26 +16,22 @@ ENV PYFLOW_RELEASE=1.1.20
ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_RELEASE}/pyflow-${PYFLOW_RELEASE}.tar.gz" .
RUN tar -xzf "pyflow-${PYFLOW_RELEASE}.tar.gz" \
&& cd "pyflow-${PYFLOW_RELEASE}" \
&& apt-get update \
&& apt-get install --no-install-recommends --yes \
python2.7 \
&& rm -r /var/lib/apt/lists/* \
&& python2.7 setup.py build install \
&& cd .. \
&& rm -r "pyflow-${PYFLOW_RELEASE}" "pyflow-${PYFLOW_RELEASE}.tar.gz"
## Install ocropy ##
ENV OCROPY_RELEASE 1.3.3
ENV OCROPY_RELEASE=1.3.3
ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_RELEASE}.tar.gz" .
RUN tar -xzf "v${OCROPY_RELEASE}.tar.gz" \
&& cd "ocropy-${OCROPY_RELEASE}" \
&& apt-get update \
&& apt-get install --no-install-recommends --yes \
python-pil \
python-tk \
$(cat PACKAGES) \
&& rm -r /var/lib/apt/lists/* \
&& python2.7 setup.py install \
&& cd .. \
&& rm -r "ocropy-${OCROPY_RELEASE}" "v${OCROPY_RELEASE}.tar.gz"
@ -46,7 +42,6 @@ ENV TESSERACT_RELEASE=4.1.1
ADD "https://github.com/tesseract-ocr/tesseract/archive/${TESSERACT_RELEASE}.tar.gz" .
RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \
&& cd "tesseract-${TESSERACT_RELEASE}" \
&& apt-get update \
&& apt-get install --no-install-recommends --yes \
autoconf \
automake \
@ -59,7 +54,6 @@ RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \
make \
pkg-config \
zlib1g-dev \
&& rm -r /var/lib/apt/lists/* \
&& ./autogen.sh \
&& ./configure \
&& make \
@ -67,30 +61,34 @@ RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \
&& ldconfig \
&& cd - > /dev/null \
&& rm -r "tesseract-${TESSERACT_RELEASE}" "${TESSERACT_RELEASE}.tar.gz"
ADD "https://github.com/tesseract-ocr/tessdata_best/raw/master/deu.traineddata" \
"https://github.com/tesseract-ocr/tessdata_best/raw/master/eng.traineddata" \
"https://github.com/tesseract-ocr/tessdata_best/raw/master/enm.traineddata" \
"https://github.com/tesseract-ocr/tessdata_best/raw/master/fra.traineddata" \
"https://github.com/tesseract-ocr/tessdata_best/raw/master/frk.traineddata" \
"https://github.com/tesseract-ocr/tessdata_best/raw/master/frm.traineddata" \
"https://github.com/tesseract-ocr/tessdata_best/raw/master/ita.traineddata" \
"https://github.com/tesseract-ocr/tessdata_best/raw/master/por.traineddata" \
"https://github.com/tesseract-ocr/tessdata_best/raw/master/spa.traineddata" \
"/usr/local/share/tessdata/"
RUN chmod 644 /usr/local/share/tessdata/*.traineddata
ENV TESSDATA_BEST_RELEASE=4.1.0
ADD "https://github.com/tesseract-ocr/tessdata_best/archive/${TESSDATA_BEST_RELEASE}.tar.gz" .
RUN tar -xzf "${TESSDATA_BEST_RELEASE}.tar.gz" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/deu.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/eng.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/enm.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/fra.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/frk.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/frm.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/ita.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/por.traineddata" "/usr/local/share/tessdata/" \
&& mv "tessdata_best-${TESSDATA_BEST_RELEASE}/spa.traineddata" "/usr/local/share/tessdata/" \
&& rm -r "tessdata_best-${TESSDATA_BEST_RELEASE}" "${TESSDATA_BEST_RELEASE}.tar.gz"
## Further dependencies ##
RUN apt-get update \
&& apt-get install --no-install-recommends --yes \
RUN apt-get install --no-install-recommends --yes \
ghostscript \
python-pip \
python3.7 \
zip \
&& rm -r /var/lib/apt/lists/* \
&& pip install natsort
RUN rm -r /var/lib/apt/lists/*
## Install Pipeline ##
COPY hocrtotei ocr /usr/local/bin/