diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index ab17e760..cfb362db 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -65,6 +65,9 @@ def _create_job_service(job): command += f' -m {job.service_args["model"]}' if 'binarization' in job.service_args and job.service_args['binarization']: command += ' --binarize' + if 'ocropus_nlbin_threshold' in job.service_args and job.service_args['ocropus_nlbin_threshold']: + value = job.service_args['ocropus_nlbin_threshold'] + command += f' --ocropus-nlbin-threshold {value}' elif job.service == 'transkribus-htr-pipeline': transkribus_htr_pipeline_model_id = job.service_args['model'] command += f' -m {transkribus_htr_pipeline_model_id}' @@ -144,8 +147,6 @@ def _create_job_service(job): ) ''' ## Restart policy ## ''' restart_policy = docker.types.RestartPolicy() - print(command) - print(mounts) try: docker_client.services.create( image, diff --git a/app/services/forms.py b/app/services/forms.py index 58bab164..96caecc4 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -1,16 +1,12 @@ from flask_login import current_user from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired -from wtforms import ( - BooleanField, - MultipleFileField, - SelectField, - StringField, - SubmitField, - ValidationError -) +from wtforms import (BooleanField, DecimalRangeField, MultipleFileField, + SelectField, StringField, SubmitField, ValidationError) from wtforms.validators import InputRequired, Length -from app.models import TesseractOCRPipelineModel, SpaCyNLPPipelineModel + +from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel + from . import SERVICES @@ -49,13 +45,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): binarization = BooleanField('Binarization') pdf = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[InputRequired()]) + ocropus_nlbin_threshold = DecimalRangeField( + render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5], 'disabled': True} + ) def validate_binarization(self, field): service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data] if field.data: if not('methods' in service_info and 'binarization' in service_info['methods']): raise ValidationError('Binarization is not available') - + def validate_pdf(self, field): if field.data.mimetype != 'application/pdf': raise ValidationError('PDF files only!') @@ -68,10 +67,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): if self.binarization.render_kw is None: self.binarization.render_kw = {} self.binarization.render_kw['disabled'] = True + if self.ocropus_nlbin_threshold.render_kw is None: + self.ocropus_nlbin_threshold.render_kw = {} + self.ocropus_nlbin_threshold.render_kw['disabled'] = True if 'methods' in service_info: if 'binarization' in service_info['methods']: if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] + if 'ocropus_nlbin_threshold' in service_info['methods']: + if 'disabled' in self.ocropus_nlbin_threshold.render_kw: + del self.ocropus_nlbin_threshold.render_kw['disabled'] models = [ x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all() if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) diff --git a/app/services/routes.py b/app/services/routes.py index 4bfca9bb..7748240c 100644 --- a/app/services/routes.py +++ b/app/services/routes.py @@ -79,7 +79,8 @@ def tesseract_ocr_pipeline(): service=service_name, service_args={ 'binarization': form.binarization.data, - 'model': hashids.decode(form.model.data) + 'model': hashids.decode(form.model.data), + 'ocropus_nlbin_threshold': float(form.ocropus_nlbin_threshold.data) }, service_version=form.version.data, user=current_user diff --git a/app/services/services.yml b/app/services/services.yml index c9d61e08..8a8377d5 100644 --- a/app/services/services.yml +++ b/app/services/services.yml @@ -20,6 +20,7 @@ tesseract-ocr-pipeline: 0.1.1: methods: - 'binarization' + - 'ocropus_nlbin_threshold' publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1' transkribus-htr-pipeline: diff --git a/app/templates/services/tesseract_ocr_pipeline.html.j2 b/app/templates/services/tesseract_ocr_pipeline.html.j2 index b66e968b..29ff60e8 100644 --- a/app/templates/services/tesseract_ocr_pipeline.html.j2 +++ b/app/templates/services/tesseract_ocr_pipeline.html.j2 @@ -84,6 +84,11 @@ +
Intensity (between 0 and 1)
+{{ form.ocropus_nlbin_threshold() }}
+