mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-01-12 21:10:35 +00:00
Update
This commit is contained in:
parent
ac4b5c2fd8
commit
7d52ad9f68
@ -3,19 +3,26 @@ image: docker:stable
|
||||
services:
|
||||
- docker:stable-dind
|
||||
|
||||
variables:
|
||||
DOCKER_DRIVER: overlay2
|
||||
|
||||
stages:
|
||||
- build
|
||||
- push
|
||||
- clean
|
||||
|
||||
before_script:
|
||||
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
|
||||
variables:
|
||||
DOCKER_DRIVER: overlay2
|
||||
|
||||
.reg_setup:
|
||||
before_script:
|
||||
- apk add --no-cache curl
|
||||
- curl --fail --show-error --location "https://github.com/genuinetools/reg/releases/download/v$REG_VERSION/reg-linux-amd64" --output /usr/local/bin/reg
|
||||
- echo "$REG_SHA256 /usr/local/bin/reg" | sha256sum -c -
|
||||
- chmod a+x /usr/local/bin/reg
|
||||
variables:
|
||||
REG_SHA256: ade837fc5224acd8c34732bf54a94f579b47851cc6a7fd5899a98386b782e228
|
||||
REG_VERSION: 0.16.1
|
||||
|
||||
build_image:
|
||||
script:
|
||||
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
|
||||
- docker build -t $INTERMEDIATE_IMAGE_TAG .
|
||||
- docker push $INTERMEDIATE_IMAGE_TAG
|
||||
stage: build
|
||||
@ -25,10 +32,14 @@ build_image:
|
||||
INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
|
||||
|
||||
push_master:
|
||||
extends:
|
||||
- .reg_setup
|
||||
only:
|
||||
- master
|
||||
script:
|
||||
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
|
||||
- docker pull $INTERMEDIATE_IMAGE_TAG
|
||||
- /usr/local/bin/reg rm -d --auth-url $CI_REGISTRY -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $INTERMEDIATE_IMAGE_TAG
|
||||
- docker tag $INTERMEDIATE_IMAGE_TAG $IMAGE_TAG
|
||||
- docker push $IMAGE_TAG
|
||||
stage: push
|
||||
@ -39,13 +50,17 @@ push_master:
|
||||
INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
|
||||
|
||||
push_other:
|
||||
extends:
|
||||
- .reg_setup
|
||||
except:
|
||||
- master
|
||||
only:
|
||||
- branches
|
||||
- tags
|
||||
script:
|
||||
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
|
||||
- docker pull $INTERMEDIATE_IMAGE_TAG
|
||||
- /usr/local/bin/reg rm -d --auth-url $CI_REGISTRY -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $INTERMEDIATE_IMAGE_TAG
|
||||
- docker tag $INTERMEDIATE_IMAGE_TAG $IMAGE_TAG
|
||||
- docker push $IMAGE_TAG
|
||||
stage: push
|
||||
@ -54,19 +69,3 @@ push_other:
|
||||
variables:
|
||||
IMAGE_TAG: $CI_REGISTRY_IMAGE:CI_COMMIT_REF_NAME
|
||||
INTERMEDIATE_IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
|
||||
|
||||
delete_image:
|
||||
before_script:
|
||||
- apk add --no-cache curl
|
||||
- curl --fail --show-error --location "https://github.com/genuinetools/reg/releases/download/v$REG_VERSION/reg-linux-amd64" --output /usr/local/bin/reg
|
||||
- echo "$REG_SHA256 /usr/local/bin/reg" | sha256sum -c -
|
||||
- chmod a+x /usr/local/bin/reg
|
||||
script:
|
||||
- /usr/local/bin/reg rm -d --auth-url $CI_REGISTRY -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $INTERMEDIATE_IMAGE_TAG
|
||||
stage: clean
|
||||
tags:
|
||||
- docker
|
||||
variables:
|
||||
INTERMEDIATE_IMAGE_TAG: $CI_PROJECT_PATH:$CI_COMMIT_SHA
|
||||
REG_SHA256: ade837fc5224acd8c34732bf54a94f579b47851cc6a7fd5899a98386b782e228
|
||||
REG_VERSION: 0.16.1
|
||||
|
10
ocr
10
ocr
@ -1,16 +1,14 @@
|
||||
#!/usr/bin/env python2.7
|
||||
# coding=utf-8
|
||||
|
||||
|
||||
"""
|
||||
ocr
|
||||
|
||||
Usage: For usage instructions run with option --help
|
||||
Usage: For usage instructions run with option --help
|
||||
Authors: Patrick Jentsch <p.jentsch@uni-bielefeld.de
|
||||
Stephan Porada <sporada@uni-bielefeld.de>
|
||||
"""
|
||||
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from natsort import natsorted
|
||||
from pyflow import WorkflowRunner
|
||||
@ -20,8 +18,7 @@ import sys
|
||||
import tempfile
|
||||
|
||||
|
||||
TESSERACT_MODELS = ['deu', 'eng', 'enm', 'fra', 'frk', 'frm', 'ita', 'por',
|
||||
'spa']
|
||||
TESSERACT_MODELS = ['deu', 'eng', 'enm', 'fra', 'frk', 'frm', 'ita', 'por', 'spa'] # noqa
|
||||
|
||||
|
||||
def parse_args():
|
||||
@ -448,7 +445,8 @@ def collect_jobs(input_dir, output_dir, intermediate_dir):
|
||||
for file in os.listdir(input_dir):
|
||||
if os.path.isdir(os.path.join(input_dir, file)):
|
||||
jobs += collect_jobs(os.path.join(input_dir, file),
|
||||
os.path.join(output_dir, file))
|
||||
os.path.join(output_dir, file),
|
||||
os.path.join(intermediate_dir, file))
|
||||
elif file.lower().endswith('.pdf'):
|
||||
job = OCRPipelineJob(os.path.join(input_dir, file),
|
||||
os.path.join(output_dir, file),
|
||||
|
Loading…
x
Reference in New Issue
Block a user