mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-07-01 10:10:34 +00:00
Change tif split handling, sort files before merging
This commit is contained in:
22
Dockerfile
22
Dockerfile
@ -11,7 +11,6 @@ RUN apt-get update && \
|
||||
ca-certificates \
|
||||
gnupg2 \
|
||||
imagemagick \
|
||||
libtiff-tools \
|
||||
pdftk \
|
||||
poppler-utils \
|
||||
python2.7 \
|
||||
@ -45,17 +44,18 @@ RUN wget -nv https://github.com/Illumina/pyflow/releases/download/v"$PYFLOW_VERS
|
||||
RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list && \
|
||||
wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends tesseract-ocr && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/deu.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata/raw/master/deu_frak.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/eng.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/enm.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/fra.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/frm.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/por.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/spa.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata
|
||||
apt-get install -y --no-install-recommends \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-deu \
|
||||
tesseract-ocr-frk \
|
||||
tesseract-ocr-eng \
|
||||
tesseract-ocr-enm \
|
||||
tesseract-ocr-fra \
|
||||
tesseract-ocr-frm \
|
||||
tesseract-ocr-por \
|
||||
tesseract-ocr-spa
|
||||
|
||||
COPY ocr /usr/local/bin
|
||||
COPY hocrtotei /usr/local/bin
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
CMD ["/bin/bash"]
|
||||
|
Reference in New Issue
Block a user