Merge branch 'development' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into development

This commit is contained in:
Patrick Jentsch 2022-11-15 15:11:19 +01:00
commit 9d34a5b71b
5 changed files with 26 additions and 13 deletions

View File

@ -65,6 +65,9 @@ def _create_job_service(job):
command += f' -m {job.service_args["model"]}' command += f' -m {job.service_args["model"]}'
if 'binarization' in job.service_args and job.service_args['binarization']: if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize' command += ' --binarize'
if 'ocropus_nlbin_threshold' in job.service_args and job.service_args['ocropus_nlbin_threshold']:
value = job.service_args['ocropus_nlbin_threshold']
command += f' --ocropus-nlbin-threshold {value}'
elif job.service == 'transkribus-htr-pipeline': elif job.service == 'transkribus-htr-pipeline':
transkribus_htr_pipeline_model_id = job.service_args['model'] transkribus_htr_pipeline_model_id = job.service_args['model']
command += f' -m {transkribus_htr_pipeline_model_id}' command += f' -m {transkribus_htr_pipeline_model_id}'
@ -144,8 +147,6 @@ def _create_job_service(job):
) )
''' ## Restart policy ## ''' ''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy() restart_policy = docker.types.RestartPolicy()
print(command)
print(mounts)
try: try:
docker_client.services.create( docker_client.services.create(
image, image,

View File

@ -1,16 +1,12 @@
from flask_login import current_user from flask_login import current_user
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired from flask_wtf.file import FileField, FileRequired
from wtforms import ( from wtforms import (BooleanField, DecimalRangeField, MultipleFileField,
BooleanField, SelectField, StringField, SubmitField, ValidationError)
MultipleFileField,
SelectField,
StringField,
SubmitField,
ValidationError
)
from wtforms.validators import InputRequired, Length from wtforms.validators import InputRequired, Length
from app.models import TesseractOCRPipelineModel, SpaCyNLPPipelineModel
from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel
from . import SERVICES from . import SERVICES
@ -49,13 +45,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
binarization = BooleanField('Binarization') binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()]) pdf = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[InputRequired()]) model = SelectField('Model', validators=[InputRequired()])
ocropus_nlbin_threshold = DecimalRangeField(
render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5], 'disabled': True}
)
def validate_binarization(self, field): def validate_binarization(self, field):
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data] service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
if field.data: if field.data:
if not('methods' in service_info and 'binarization' in service_info['methods']): if not('methods' in service_info and 'binarization' in service_info['methods']):
raise ValidationError('Binarization is not available') raise ValidationError('Binarization is not available')
def validate_pdf(self, field): def validate_pdf(self, field):
if field.data.mimetype != 'application/pdf': if field.data.mimetype != 'application/pdf':
raise ValidationError('PDF files only!') raise ValidationError('PDF files only!')
@ -68,10 +67,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
if self.binarization.render_kw is None: if self.binarization.render_kw is None:
self.binarization.render_kw = {} self.binarization.render_kw = {}
self.binarization.render_kw['disabled'] = True self.binarization.render_kw['disabled'] = True
if self.ocropus_nlbin_threshold.render_kw is None:
self.ocropus_nlbin_threshold.render_kw = {}
self.ocropus_nlbin_threshold.render_kw['disabled'] = True
if 'methods' in service_info: if 'methods' in service_info:
if 'binarization' in service_info['methods']: if 'binarization' in service_info['methods']:
if 'disabled' in self.binarization.render_kw: if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled'] del self.binarization.render_kw['disabled']
if 'ocropus_nlbin_threshold' in service_info['methods']:
if 'disabled' in self.ocropus_nlbin_threshold.render_kw:
del self.ocropus_nlbin_threshold.render_kw['disabled']
models = [ models = [
x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all() x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)

View File

@ -79,7 +79,8 @@ def tesseract_ocr_pipeline():
service=service_name, service=service_name,
service_args={ service_args={
'binarization': form.binarization.data, 'binarization': form.binarization.data,
'model': hashids.decode(form.model.data) 'model': hashids.decode(form.model.data),
'ocropus_nlbin_threshold': float(form.ocropus_nlbin_threshold.data)
}, },
service_version=form.version.data, service_version=form.version.data,
user=current_user user=current_user

View File

@ -20,6 +20,7 @@ tesseract-ocr-pipeline:
0.1.1: 0.1.1:
methods: methods:
- 'binarization' - 'binarization'
- 'ocropus_nlbin_threshold'
publishing_year: 2022 publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1' url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
transkribus-htr-pipeline: transkribus-htr-pipeline:

View File

@ -84,6 +84,11 @@
</label> </label>
</div> </div>
</div> </div>
<div class="col s12"><p>&nbsp;</p></div>
<div class="col s9">
<p>Intensity (between 0 and 1)</p>
<p class="range-field">{{ form.ocropus_nlbin_threshold() }}</p>
</div>
<div class="col s12"><p>&nbsp;</p></div> <div class="col s12"><p>&nbsp;</p></div>
<div class="col s12 divider"></div> <div class="col s12 divider"></div>
<div class="col s12"><p>&nbsp;</p></div> <div class="col s12"><p>&nbsp;</p></div>