This commit is contained in:
Patrick Jentsch 2020-09-23 15:52:24 +02:00
parent ac4b5c2fd8
commit 7d52ad9f68
2 changed files with 25 additions and 28 deletions

View File

@ -3,19 +3,26 @@ image: docker:stable
services:
- docker:stable-dind
variables:
DOCKER_DRIVER: overlay2
stages:
- build
- push
- clean
before_script:
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
variables:
DOCKER_DRIVER: overlay2
.reg_setup:
before_script:
- apk add --no-cache curl
- curl --fail --show-error --location "https://github.com/genuinetools/reg/releases/download/v$REG_VERSION/reg-linux-amd64" --output /usr/local/bin/reg
- echo "$REG_SHA256 /usr/local/bin/reg" | sha256sum -c -
- chmod a+x /usr/local/bin/reg
variables:
REG_SHA256: ade837fc5224acd8c34732bf54a94f579b47851cc6a7fd5899a98386b782e228
REG_VERSION: 0.16.1
build_image:
script:
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
- docker build -t $INTERMEDIATE_IMAGE_TAG .
- docker push $INTERMEDIATE_IMAGE_TAG
stage: build
@ -25,10 +32,14 @@ build_image:
INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
push_master:
extends:
- .reg_setup
only:
- master
script:
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
- docker pull $INTERMEDIATE_IMAGE_TAG
- /usr/local/bin/reg rm -d --auth-url $CI_REGISTRY -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $INTERMEDIATE_IMAGE_TAG
- docker tag $INTERMEDIATE_IMAGE_TAG $IMAGE_TAG
- docker push $IMAGE_TAG
stage: push
@ -39,13 +50,17 @@ push_master:
INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
push_other:
extends:
- .reg_setup
except:
- master
only:
- branches
- tags
script:
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
- docker pull $INTERMEDIATE_IMAGE_TAG
- /usr/local/bin/reg rm -d --auth-url $CI_REGISTRY -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $INTERMEDIATE_IMAGE_TAG
- docker tag $INTERMEDIATE_IMAGE_TAG $IMAGE_TAG
- docker push $IMAGE_TAG
stage: push
@ -54,19 +69,3 @@ push_other:
variables:
IMAGE_TAG: $CI_REGISTRY_IMAGE:CI_COMMIT_REF_NAME
INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
delete_image:
before_script:
- apk add --no-cache curl
- curl --fail --show-error --location "https://github.com/genuinetools/reg/releases/download/v$REG_VERSION/reg-linux-amd64" --output /usr/local/bin/reg
- echo "$REG_SHA256 /usr/local/bin/reg" | sha256sum -c -
- chmod a+x /usr/local/bin/reg
script:
- /usr/local/bin/reg rm -d --auth-url $CI_REGISTRY -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $INTERMEDIATE_IMAGE_TAG
stage: clean
tags:
- docker
variables:
INTERMEDIATE_IMAGE_TAG: $CI_PROJECT_PATH:$CI_COMMIT_SHA
REG_SHA256: ade837fc5224acd8c34732bf54a94f579b47851cc6a7fd5899a98386b782e228
REG_VERSION: 0.16.1

10
ocr
View File

@ -1,16 +1,14 @@
#!/usr/bin/env python2.7
# coding=utf-8
"""
ocr
Usage: For usage instructions run with option --help
Usage: For usage instructions run with option --help
Authors: Patrick Jentsch <p.jentsch@uni-bielefeld.de
Stephan Porada <sporada@uni-bielefeld.de>
"""
from argparse import ArgumentParser
from natsort import natsorted
from pyflow import WorkflowRunner
@ -20,8 +18,7 @@ import sys
import tempfile
TESSERACT_MODELS = ['deu', 'eng', 'enm', 'fra', 'frk', 'frm', 'ita', 'por',
'spa']
TESSERACT_MODELS = ['deu', 'eng', 'enm', 'fra', 'frk', 'frm', 'ita', 'por', 'spa'] # noqa
def parse_args():
@ -448,7 +445,8 @@ def collect_jobs(input_dir, output_dir, intermediate_dir):
for file in os.listdir(input_dir):
if os.path.isdir(os.path.join(input_dir, file)):
jobs += collect_jobs(os.path.join(input_dir, file),
os.path.join(output_dir, file))
os.path.join(output_dir, file),
os.path.join(intermediate_dir, file))
elif file.lower().endswith('.pdf'):
job = OCRPipelineJob(os.path.join(input_dir, file),
os.path.join(output_dir, file),