diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ea08fd4..00d3567 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -9,36 +9,62 @@ variables:
stages:
- build
- push
+ - clean
before_script:
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
-Build:
+build_image:
script:
- - docker build --pull -t $CI_REGISTRY_IMAGE:tmp .
- - docker push $CI_REGISTRY_IMAGE:tmp
+ - docker build -t $INTERMEDIATE_IMAGE_TAG .
+ - docker push $INTERMEDIATE_IMAGE_TAG
stage: build
tags:
- - docker
+ - docker
+ variables:
+ INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
-Push latest:
+push_master:
only:
- master
script:
- - docker pull $CI_REGISTRY_IMAGE:tmp
- - docker tag $CI_REGISTRY_IMAGE:tmp $CI_REGISTRY_IMAGE:latest
- - docker push $CI_REGISTRY_IMAGE:latest
+ - docker pull $INTERMEDIATE_IMAGE_TAG
+ - docker tag $INTERMEDIATE_IMAGE_TAG $IMAGE_TAG
+ - docker push $IMAGE_TAG
stage: push
tags:
- - docker
+ - docker
+ variables:
+ IMAGE_TAG: $CI_REGISTRY_IMAGE:latest
+ INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
-Push tag:
+push_other:
+ except:
+ - master
only:
+ - branches
- tags
script:
- - docker pull $CI_REGISTRY_IMAGE:tmp
- - docker tag $CI_REGISTRY_IMAGE:tmp $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
- - docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
+ - docker pull $INTERMEDIATE_IMAGE_TAG
+ - docker tag $INTERMEDIATE_IMAGE_TAG $IMAGE_TAG
+ - docker push $IMAGE_TAG
stage: push
tags:
- - docker
+ - docker
+ variables:
+ IMAGE_TAG: $CI_REGISTRY_IMAGE:CI_COMMIT_REF_NAME
+ INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
+
+delete_image:
+ before_script:
+ - apk add --no-cache curl
+ - curl --fail --show-error --location "https://github.com/genuinetools/reg/releases/download/v$REG_VERSION/reg-linux-amd64" --output /usr/local/bin/reg
+ - echo "$REG_SHA256 /usr/local/bin/reg" | sha256sum -c -
+ - chmod a+x /usr/local/bin/reg
+ script:
+ - /usr/local/bin/reg rm -d --auth-url $CI_REGISTRY -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $INTERMEDIATE_IMAGE_TAG
+ stage: clean
+ variables:
+ INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
+ REG_SHA256: ade837fc5224acd8c34732bf54a94f579b47851cc6a7fd5899a98386b782e228
+ REG_VERSION: 0.16.1
diff --git a/Dockerfile b/Dockerfile
index bc4ae93..3ed80ef 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,73 +1,88 @@
FROM debian:10-slim
-LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de"
+LABEL authors="Patrick Jentsch
, Stephan Porada "
ENV LANG=C.UTF-8
-RUN apt-get update \
+RUN apt-get update
+
+
+## Install pyFlow ##
+ENV PYFLOW_RELEASE=1.1.20
+ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_RELEASE}/pyflow-${PYFLOW_RELEASE}.tar.gz" .
+RUN tar -xzf "pyflow-${PYFLOW_RELEASE}.tar.gz" \
+ && cd "pyflow-${PYFLOW_RELEASE}" \
&& apt-get install -y --no-install-recommends \
- apt-transport-https \
- build-essential \
- ca-certificates \
- gnupg2 \
- ghostscript \
python2.7 \
- python3.7 \
- wget \
- zip
+ && python2.7 setup.py build install \
+ && cd .. \
+ && rm -r "pyflow-${PYFLOW_RELEASE}" "pyflow-${PYFLOW_RELEASE}.tar.gz"
-ENV OCROPY_VERSION 1.3.3
-ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_VERSION}.tar.gz" .
-RUN tar -xzf "v${OCROPY_VERSION}.tar.gz" \
- && cd "ocropy-${OCROPY_VERSION}" \
- && apt-get update \
+## Install ocropy ##
+ENV OCROPY_RELEASE 1.3.3
+ADD "https://github.com/tmbdev/ocropy/archive/v${OCROPY_RELEASE}.tar.gz" .
+RUN tar -xzf "v${OCROPY_RELEASE}.tar.gz" \
+ && cd "ocropy-${OCROPY_RELEASE}" \
&& apt-get install -y --no-install-recommends \
python-pil \
python-tk \
$(cat PACKAGES) \
&& python2.7 setup.py install \
&& cd .. \
- && rm -rf \
- "ocropy-${OCROPY_VERSION}" \
- "v${OCROPY_VERSION}.tar.gz"
+ && rm -r "ocropy-${OCROPY_RELEASE}" "v${OCROPY_RELEASE}.tar.gz"
-ENV PYFLOW_VERSION=1.1.20
-ADD "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" .
-RUN tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \
- && cd "pyflow-${PYFLOW_VERSION}" \
- && python2.7 setup.py build install \
- && cd .. \
- && rm -rf \
- "pyflow-${PYFLOW_VERSION}" \
- "pyflow-${PYFLOW_VERSION}.tar.gz"
-
-
-RUN echo "deb https://notesalexp.org/tesseract-ocr/buster/ buster main" >> /etc/apt/sources.list \
- && wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - \
- && apt-get update \
+## Install Tesseract OCR ##
+ENV TESSERACT_RELEASE=4.1.1
+ADD "https://github.com/tesseract-ocr/tesseract/archive/${TESSERACT_RELEASE}.tar.gz" .
+RUN tar -xzf "${TESSERACT_RELEASE}.tar.gz" \
+ && cd "tesseract-${TESSERACT_RELEASE}" \
&& apt-get install -y --no-install-recommends \
- tesseract-ocr \
- tesseract-ocr-deu \
- tesseract-ocr-eng \
- tesseract-ocr-enm \
- tesseract-ocr-fra \
- tesseract-ocr-frk \
- tesseract-ocr-frm \
- tesseract-ocr-ita \
- tesseract-ocr-por \
- tesseract-ocr-spa
+ autoconf \
+ automake \
+ g++ \
+ libjpeg62-turbo-dev \
+ libleptonica-dev \
+ libtiff5-dev \
+ libtool \
+ libpng-dev \
+ make \
+ pkg-config \
+ zlib1g-dev \
+ && ./autogen.sh \
+ && ./configure \
+ && make \
+ && make install \
+ && ldconfig \
+ && cd - > /dev/null \
+ && rm -r "tesseract-${TESSERACT_RELEASE}" "${TESSERACT_RELEASE}.tar.gz"
+ADD "https://github.com/tesseract-ocr/tessdata_best/raw/master/deu.traineddata" \
+ "https://github.com/tesseract-ocr/tessdata_best/raw/master/eng.traineddata" \
+ "https://github.com/tesseract-ocr/tessdata_best/raw/master/enm.traineddata" \
+ "https://github.com/tesseract-ocr/tessdata_best/raw/master/fra.traineddata" \
+ "https://github.com/tesseract-ocr/tessdata_best/raw/master/frk.traineddata" \
+ "https://github.com/tesseract-ocr/tessdata_best/raw/master/frm.traineddata" \
+ "https://github.com/tesseract-ocr/tessdata_best/raw/master/ita.traineddata" \
+ "https://github.com/tesseract-ocr/tessdata_best/raw/master/por.traineddata" \
+ "https://github.com/tesseract-ocr/tessdata_best/raw/master/spa.traineddata" \
+ "/usr/local/share/tessdata/"
+RUN chmod 644 /usr/local/share/tessdata/*.traineddata
-RUN rm -rf /var/lib/apt/lists/*
+## Install Pipeline ##
+RUN apt-get install -y --no-install-recommends \
+ ghostscript \
+ python3.7 \
+ zip
+COPY "hocrtotei" "ocr" "/usr/local/bin/"
-COPY hocrtotei /usr/local/bin
-COPY ocr /usr/local/bin
+## Cleanup ##
+RUN rm -r /var/lib/apt/lists/*
ENTRYPOINT ["ocr"]
diff --git a/ocr b/ocr
index a857931..10ee021 100755
--- a/ocr
+++ b/ocr
@@ -6,7 +6,8 @@
ocr
Usage: For usage instructions run with option --help
-Author: Patrick Jentsch
+Authors: Patrick Jentsch
"""