mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
				synced 2025-11-04 02:32:44 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			60 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Docker
		
	
	
	
	
	
			
		
		
	
	
			60 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Docker
		
	
	
	
	
	
FROM debian:9-slim
 | 
						|
 | 
						|
LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de"
 | 
						|
 | 
						|
ENV DEBIAN_FRONTEND=noninteractive
 | 
						|
ENV LANG=C.UTF-8
 | 
						|
 | 
						|
RUN apt-get update && \
 | 
						|
    apt-get install -y --no-install-recommends \
 | 
						|
    apt-transport-https \
 | 
						|
    ca-certificates \
 | 
						|
    gnupg2 \
 | 
						|
    imagemagick \
 | 
						|
    poppler-utils \
 | 
						|
    python2.7 \
 | 
						|
    python3.5 \
 | 
						|
    python-numpy \
 | 
						|
    wget
 | 
						|
 | 
						|
# Install ocropy
 | 
						|
ENV OCROPY_VERSION 1.3.3
 | 
						|
RUN wget -nv https://github.com/tmbdev/ocropy/archive/v"$OCROPY_VERSION".tar.gz && \
 | 
						|
    tar -xzf v"$OCROPY_VERSION".tar.gz && \
 | 
						|
    cd ocropy-"$OCROPY_VERSION" && \
 | 
						|
    apt-get install -y --no-install-recommends $(cat PACKAGES) python-pil python-tk && \
 | 
						|
    wget -nv http://www.tmbdev.net/en-default.pyrnn.gz -P models/ && \
 | 
						|
    python2.7 setup.py install && \
 | 
						|
    cd .. && \
 | 
						|
    rm -r ocropy-"$OCROPY_VERSION" v"$OCROPY_VERSION".tar.gz
 | 
						|
 | 
						|
# Install pyFlow
 | 
						|
ENV PYFLOW_VERSION 1.1.20
 | 
						|
RUN wget -nv https://github.com/Illumina/pyflow/releases/download/v"$PYFLOW_VERSION"/pyflow-"$PYFLOW_VERSION".tar.gz && \
 | 
						|
    tar -xzf pyflow-"$PYFLOW_VERSION".tar.gz && \
 | 
						|
    cd pyflow-"$PYFLOW_VERSION" && \
 | 
						|
    python2.7 setup.py build install && \
 | 
						|
    cd .. && \
 | 
						|
    rm -r pyflow-"$PYFLOW_VERSION" pyflow-"$PYFLOW_VERSION".tar.gz
 | 
						|
 | 
						|
# Install Tesseract OCR and Data Files
 | 
						|
RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list && \
 | 
						|
    wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - && \
 | 
						|
    apt-get update && \
 | 
						|
    apt-get install -y --no-install-recommends \
 | 
						|
    tesseract-ocr  \
 | 
						|
    tesseract-ocr-deu \
 | 
						|
    tesseract-ocr-eng \
 | 
						|
    tesseract-ocr-enm \
 | 
						|
    tesseract-ocr-fra \
 | 
						|
    tesseract-ocr-frk \
 | 
						|
    tesseract-ocr-frm \
 | 
						|
    tesseract-ocr-ita \
 | 
						|
    tesseract-ocr-por \
 | 
						|
    tesseract-ocr-spa
 | 
						|
 | 
						|
COPY hocrtotei /usr/local/bin
 | 
						|
COPY ocr /usr/local/bin
 | 
						|
 | 
						|
ENTRYPOINT ["ocr"]
 |