mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-07-01 10:30:36 +00:00
Correct order for output files.
This commit is contained in:
20
Dockerfile
20
Dockerfile
@ -1,6 +1,6 @@
|
||||
FROM debian:stretch-slim
|
||||
|
||||
MAINTAINER Patrick Jentsch <p.jentsch@uni-bielefeld.de>
|
||||
LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de"
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV LANG=C.UTF-8
|
||||
@ -11,34 +11,31 @@ RUN apt-get update && \
|
||||
ca-certificates \
|
||||
gnupg2 \
|
||||
imagemagick \
|
||||
pdftk \
|
||||
poppler-utils \
|
||||
python2.7 \
|
||||
python3.5 \
|
||||
python-numpy \
|
||||
wget
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
# Install ocropy
|
||||
ENV OCROPY_VERSION 1.3.3
|
||||
RUN wget -nv https://github.com/tmbdev/ocropy/archive/v"$OCROPY_VERSION".tar.gz && \
|
||||
tar -xzf v"$OCROPY_VERSION".tar.gz && \
|
||||
rm v"$OCROPY_VERSION".tar.gz && \
|
||||
cd ocropy-"$OCROPY_VERSION" && \
|
||||
apt-get install -y --no-install-recommends $(cat PACKAGES) python-pil python-tk && \
|
||||
wget -nv http://www.tmbdev.net/en-default.pyrnn.gz -P models/ && \
|
||||
python2.7 setup.py install && \
|
||||
cd ..
|
||||
cd .. && \
|
||||
rm -r v"$OCROPY_VERSION".tar.gz ocropy-"$OCROPY_VERSION"
|
||||
|
||||
# Install pyFlow
|
||||
ENV PYFLOW_VERSION 1.1.20
|
||||
RUN wget -nv https://github.com/Illumina/pyflow/releases/download/v"$PYFLOW_VERSION"/pyflow-"$PYFLOW_VERSION".tar.gz && \
|
||||
tar -xzf pyflow-"$PYFLOW_VERSION".tar.gz && \
|
||||
rm pyflow-"$PYFLOW_VERSION".tar.gz && \
|
||||
cd pyflow-"$PYFLOW_VERSION" && \
|
||||
python2.7 setup.py build install && \
|
||||
cd ..
|
||||
cd .. && \
|
||||
rm -r pyflow-"$PYFLOW_VERSION".tar.gz pyflow-"$PYFLOW_VERSION"
|
||||
|
||||
# Install Tesseract OCR and Data Files
|
||||
RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list && \
|
||||
@ -52,11 +49,12 @@ RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /et
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/enm.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/fra.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/frm.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/por.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/ita.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/por.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/spa.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata
|
||||
|
||||
COPY ocr /usr/local/bin
|
||||
COPY hocrtotei /usr/local/bin
|
||||
COPY ocr /usr/local/bin
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
ENTRYPOINT ["ocr"]
|
||||
CMD ["--help"]
|
||||
|
Reference in New Issue
Block a user