Merge branch 'development' into binarization-threshold

This commit is contained in:
Inga Kirschnick
2022-11-11 15:39:40 +01:00
23 changed files with 1248 additions and 88 deletions

View File

@ -3,7 +3,8 @@ from app.models import (
Job,
JobResult,
JobStatus,
TesseractOCRPipelineModel
TesseractOCRPipelineModel,
SpaCyNLPPipelineModel
)
from datetime import datetime
from flask import current_app
@ -52,7 +53,12 @@ def _create_job_service(job):
command += f' --mem-mb {mem_mb}'
command += f' --n-cores {n_cores}'
if job.service == 'spacy-nlp-pipeline':
command += f' -m {job.service_args["model"]}'
model_id = hashids.decode(job.service_args['model'])
model = SpaCyNLPPipelineModel.query.get(model_id)
if model is None:
job.status = JobStatus.FAILED
return
command += f' -m {model.pipeline_name}'
if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']:
command += ' --check-encoding'
elif job.service == 'tesseract-ocr-pipeline':
@ -105,6 +111,16 @@ def _create_job_service(job):
models_mount_target = f'/usr/local/share/tessdata/{model.filename}'
models_mount = f'{models_mount_source}:{models_mount_target}:ro'
mounts.append(models_mount)
elif job.service == 'spacy-nlp-pipeline':
model_id = hashids.decode(job.service_args['model'])
model = SpaCyNLPPipelineModel.query.get(model_id)
if model is None:
job.status = JobStatus.FAILED
return
models_mount_source = model.path
models_mount_target = f'/usr/local/share/spacy/models/{model.filename}'
models_mount = f'{models_mount_source}:{models_mount_target}:ro'
mounts.append(models_mount)
''' ### Output mount ### '''
output_mount_source = os.path.join(job.path, 'results')
output_mount_target = '/output'
@ -130,6 +146,8 @@ def _create_job_service(job):
)
''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy()
print(command)
print(mounts)
try:
docker_client.services.create(
image,