mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
				synced 2025-10-31 21:23:14 +00:00 
			
		
		
		
	Codestyle
This commit is contained in:
		
							
								
								
									
										101
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										101
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -1,59 +1,70 @@ | ||||
| FROM debian:9-slim | ||||
|  | ||||
|  | ||||
| # Define image metadata | ||||
| LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de" | ||||
|  | ||||
| ENV DEBIAN_FRONTEND=noninteractive | ||||
| ENV LANG=C.UTF-8 | ||||
|  | ||||
| RUN apt-get update && \ | ||||
|     apt-get install -y --no-install-recommends \ | ||||
|     apt-transport-https \ | ||||
|     ca-certificates \ | ||||
|     gnupg2 \ | ||||
|     imagemagick \ | ||||
|     poppler-utils \ | ||||
|     python2.7 \ | ||||
|     python3.5 \ | ||||
|     python-numpy \ | ||||
|     wget | ||||
| # Install prerequisites | ||||
| RUN apt-get update \ | ||||
|  && apt-get install -y --no-install-recommends \ | ||||
|       apt-transport-https \ | ||||
|       ca-certificates \ | ||||
|       gnupg2 \ | ||||
|       imagemagick \ | ||||
|       poppler-utils \ | ||||
|       python2.7 \ | ||||
|       python3.5 \ | ||||
|       wget \ | ||||
|  && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
| # Install ocropy | ||||
| ENV OCROPY_VERSION 1.3.3 | ||||
| RUN wget -nv https://github.com/tmbdev/ocropy/archive/v"$OCROPY_VERSION".tar.gz && \ | ||||
|     tar -xzf v"$OCROPY_VERSION".tar.gz && \ | ||||
|     cd ocropy-"$OCROPY_VERSION" && \ | ||||
|     apt-get install -y --no-install-recommends $(cat PACKAGES) python-pil python-tk && \ | ||||
|     wget -nv http://www.tmbdev.net/en-default.pyrnn.gz -P models/ && \ | ||||
|     python2.7 setup.py install && \ | ||||
|     cd .. && \ | ||||
|     rm -r ocropy-"$OCROPY_VERSION" v"$OCROPY_VERSION".tar.gz | ||||
| ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_VERSION}.tar.gz" . | ||||
| RUN tar -xzf "v${OCROPY_VERSION}.tar.gz" \ | ||||
|  && cd "ocropy-${OCROPY_VERSION}" \ | ||||
|  && apt-get update \ | ||||
|  && apt-get install -y --no-install-recommends \ | ||||
|       python-pil \ | ||||
|       python-tk \ | ||||
|       $(cat PACKAGES) \ | ||||
|  && rm -rf /var/lib/apt/lists/* \ | ||||
|  && python2.7 setup.py install \ | ||||
|  && cd .. \ | ||||
|  && rm -rf \ | ||||
|       "ocropy-${OCROPY_VERSION}" \ | ||||
|       "v${OCROPY_VERSION}.tar.gz" | ||||
|  | ||||
| # Install pyFlow | ||||
| ENV PYFLOW_VERSION 1.1.20 | ||||
| RUN wget -nv https://github.com/Illumina/pyflow/releases/download/v"$PYFLOW_VERSION"/pyflow-"$PYFLOW_VERSION".tar.gz && \ | ||||
|     tar -xzf pyflow-"$PYFLOW_VERSION".tar.gz && \ | ||||
|     cd pyflow-"$PYFLOW_VERSION" && \ | ||||
|     python2.7 setup.py build install && \ | ||||
|     cd .. && \ | ||||
|     rm -r pyflow-"$PYFLOW_VERSION" pyflow-"$PYFLOW_VERSION".tar.gz | ||||
| ENV PYFLOW_VERSION=1.1.20 | ||||
| ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" . | ||||
| RUN tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \ | ||||
|  && cd "pyflow-${PYFLOW_VERSION}" \ | ||||
|  && python2.7 setup.py build install \ | ||||
|  && cd .. \ | ||||
|  && rm -rf \ | ||||
|       "pyflow-${PYFLOW_VERSION}" \ | ||||
|       "pyflow-${PYFLOW_VERSION}.tar.gz" | ||||
|  | ||||
| # Install Tesseract OCR and Data Files | ||||
| RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list && \ | ||||
|     wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - && \ | ||||
|     apt-get update && \ | ||||
|     apt-get install -y --no-install-recommends \ | ||||
|     tesseract-ocr  \ | ||||
|     tesseract-ocr-deu \ | ||||
|     tesseract-ocr-eng \ | ||||
|     tesseract-ocr-enm \ | ||||
|     tesseract-ocr-fra \ | ||||
|     tesseract-ocr-frk \ | ||||
|     tesseract-ocr-frm \ | ||||
|     tesseract-ocr-ita \ | ||||
|     tesseract-ocr-por \ | ||||
|     tesseract-ocr-spa | ||||
| RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list \ | ||||
|  && wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - \ | ||||
|  && apt-get update \ | ||||
|  && apt-get install -y --no-install-recommends \ | ||||
|       tesseract-ocr  \ | ||||
|       tesseract-ocr-deu \ | ||||
|       tesseract-ocr-eng \ | ||||
|       tesseract-ocr-enm \ | ||||
|       tesseract-ocr-fra \ | ||||
|       tesseract-ocr-frk \ | ||||
|       tesseract-ocr-frm \ | ||||
|       tesseract-ocr-ita \ | ||||
|       tesseract-ocr-por \ | ||||
|       tesseract-ocr-spa \ | ||||
|  && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
|  | ||||
| # Install OCR pipeline | ||||
| COPY hocrtotei /usr/local/bin | ||||
| COPY ocr /usr/local/bin | ||||
|  | ||||
|  | ||||
| ENTRYPOINT ["ocr"] | ||||
| CMD ["--help"] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user