From 7d52ad9f68ca80948c4248a6e83a92c4330111ba Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Wed, 23 Sep 2020 15:52:24 +0200
Subject: [PATCH] Update
---
.gitlab-ci.yml | 43 +++++++++++++++++++++----------------------
ocr | 10 ++++------
2 files changed, 25 insertions(+), 28 deletions(-)
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4ace872..cac14f5 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -3,19 +3,26 @@ image: docker:stable
services:
- docker:stable-dind
-variables:
- DOCKER_DRIVER: overlay2
-
stages:
- build
- push
- - clean
-before_script:
- - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
+variables:
+ DOCKER_DRIVER: overlay2
+
+.reg_setup:
+ before_script:
+ - apk add --no-cache curl
+ - curl --fail --show-error --location "https://github.com/genuinetools/reg/releases/download/v$REG_VERSION/reg-linux-amd64" --output /usr/local/bin/reg
+ - echo "$REG_SHA256 /usr/local/bin/reg" | sha256sum -c -
+ - chmod a+x /usr/local/bin/reg
+ variables:
+ REG_SHA256: ade837fc5224acd8c34732bf54a94f579b47851cc6a7fd5899a98386b782e228
+ REG_VERSION: 0.16.1
build_image:
script:
+ - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
- docker build -t $INTERMEDIATE_IMAGE_TAG .
- docker push $INTERMEDIATE_IMAGE_TAG
stage: build
@@ -25,10 +32,14 @@ build_image:
INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
push_master:
+ extends:
+ - .reg_setup
only:
- master
script:
+ - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
- docker pull $INTERMEDIATE_IMAGE_TAG
+ - /usr/local/bin/reg rm -d --auth-url $CI_REGISTRY -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $INTERMEDIATE_IMAGE_TAG
- docker tag $INTERMEDIATE_IMAGE_TAG $IMAGE_TAG
- docker push $IMAGE_TAG
stage: push
@@ -39,13 +50,17 @@ push_master:
INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
push_other:
+ extends:
+ - .reg_setup
except:
- master
only:
- branches
- tags
script:
+ - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
- docker pull $INTERMEDIATE_IMAGE_TAG
+ - /usr/local/bin/reg rm -d --auth-url $CI_REGISTRY -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $INTERMEDIATE_IMAGE_TAG
- docker tag $INTERMEDIATE_IMAGE_TAG $IMAGE_TAG
- docker push $IMAGE_TAG
stage: push
@@ -54,19 +69,3 @@ push_other:
variables:
IMAGE_TAG: $CI_REGISTRY_IMAGE:CI_COMMIT_REF_NAME
INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
-
-delete_image:
- before_script:
- - apk add --no-cache curl
- - curl --fail --show-error --location "https://github.com/genuinetools/reg/releases/download/v$REG_VERSION/reg-linux-amd64" --output /usr/local/bin/reg
- - echo "$REG_SHA256 /usr/local/bin/reg" | sha256sum -c -
- - chmod a+x /usr/local/bin/reg
- script:
- - /usr/local/bin/reg rm -d --auth-url $CI_REGISTRY -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $INTERMEDIATE_IMAGE_TAG
- stage: clean
- tags:
- - docker
- variables:
- INTERMEDIATE_IMAGE_TAG: $CI_PROJECT_PATH:$CI_COMMIT_SHA
- REG_SHA256: ade837fc5224acd8c34732bf54a94f579b47851cc6a7fd5899a98386b782e228
- REG_VERSION: 0.16.1
diff --git a/ocr b/ocr
index ab50ebf..0ae9f52 100755
--- a/ocr
+++ b/ocr
@@ -1,16 +1,14 @@
#!/usr/bin/env python2.7
# coding=utf-8
-
"""
ocr
-Usage: For usage instructions run with option --help
+Usage: For usage instructions run with option --help
Authors: Patrick Jentsch
"""
-
from argparse import ArgumentParser
from natsort import natsorted
from pyflow import WorkflowRunner
@@ -20,8 +18,7 @@ import sys
import tempfile
-TESSERACT_MODELS = ['deu', 'eng', 'enm', 'fra', 'frk', 'frm', 'ita', 'por',
- 'spa']
+TESSERACT_MODELS = ['deu', 'eng', 'enm', 'fra', 'frk', 'frm', 'ita', 'por', 'spa'] # noqa
def parse_args():
@@ -448,7 +445,8 @@ def collect_jobs(input_dir, output_dir, intermediate_dir):
for file in os.listdir(input_dir):
if os.path.isdir(os.path.join(input_dir, file)):
jobs += collect_jobs(os.path.join(input_dir, file),
- os.path.join(output_dir, file))
+ os.path.join(output_dir, file),
+ os.path.join(intermediate_dir, file))
elif file.lower().endswith('.pdf'):
job = OCRPipelineJob(os.path.join(input_dir, file),
os.path.join(output_dir, file),