mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-15 01:05:42 +00:00
Merge branch 'binarization-threshold' into development
This commit is contained in:
commit
7bfbe4fd32
@ -65,6 +65,9 @@ def _create_job_service(job):
|
|||||||
command += f' -m {job.service_args["model"]}'
|
command += f' -m {job.service_args["model"]}'
|
||||||
if 'binarization' in job.service_args and job.service_args['binarization']:
|
if 'binarization' in job.service_args and job.service_args['binarization']:
|
||||||
command += ' --binarize'
|
command += ' --binarize'
|
||||||
|
if 'ocropus_nlbin_threshold' in job.service_args and job.service_args['ocropus_nlbin_threshold']:
|
||||||
|
value = job.service_args['ocropus_nlbin_threshold']
|
||||||
|
command += f' --ocropus-nlbin-threshold {value}'
|
||||||
elif job.service == 'transkribus-htr-pipeline':
|
elif job.service == 'transkribus-htr-pipeline':
|
||||||
transkribus_htr_pipeline_model_id = job.service_args['model']
|
transkribus_htr_pipeline_model_id = job.service_args['model']
|
||||||
command += f' -m {transkribus_htr_pipeline_model_id}'
|
command += f' -m {transkribus_htr_pipeline_model_id}'
|
||||||
@ -144,8 +147,6 @@ def _create_job_service(job):
|
|||||||
)
|
)
|
||||||
''' ## Restart policy ## '''
|
''' ## Restart policy ## '''
|
||||||
restart_policy = docker.types.RestartPolicy()
|
restart_policy = docker.types.RestartPolicy()
|
||||||
print(command)
|
|
||||||
print(mounts)
|
|
||||||
try:
|
try:
|
||||||
docker_client.services.create(
|
docker_client.services.create(
|
||||||
image,
|
image,
|
||||||
|
@ -1,16 +1,12 @@
|
|||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from flask_wtf import FlaskForm
|
from flask_wtf import FlaskForm
|
||||||
from flask_wtf.file import FileField, FileRequired
|
from flask_wtf.file import FileField, FileRequired
|
||||||
from wtforms import (
|
from wtforms import (BooleanField, DecimalRangeField, MultipleFileField,
|
||||||
BooleanField,
|
SelectField, StringField, SubmitField, ValidationError)
|
||||||
MultipleFileField,
|
|
||||||
SelectField,
|
|
||||||
StringField,
|
|
||||||
SubmitField,
|
|
||||||
ValidationError
|
|
||||||
)
|
|
||||||
from wtforms.validators import InputRequired, Length
|
from wtforms.validators import InputRequired, Length
|
||||||
from app.models import TesseractOCRPipelineModel, SpaCyNLPPipelineModel
|
|
||||||
|
from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel
|
||||||
|
|
||||||
from . import SERVICES
|
from . import SERVICES
|
||||||
|
|
||||||
|
|
||||||
@ -49,13 +45,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
|
|||||||
binarization = BooleanField('Binarization')
|
binarization = BooleanField('Binarization')
|
||||||
pdf = FileField('File', validators=[FileRequired()])
|
pdf = FileField('File', validators=[FileRequired()])
|
||||||
model = SelectField('Model', validators=[InputRequired()])
|
model = SelectField('Model', validators=[InputRequired()])
|
||||||
|
ocropus_nlbin_threshold = DecimalRangeField(
|
||||||
|
render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5], 'disabled': True}
|
||||||
|
)
|
||||||
|
|
||||||
def validate_binarization(self, field):
|
def validate_binarization(self, field):
|
||||||
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
|
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
|
||||||
if field.data:
|
if field.data:
|
||||||
if not('methods' in service_info and 'binarization' in service_info['methods']):
|
if not('methods' in service_info and 'binarization' in service_info['methods']):
|
||||||
raise ValidationError('Binarization is not available')
|
raise ValidationError('Binarization is not available')
|
||||||
|
|
||||||
def validate_pdf(self, field):
|
def validate_pdf(self, field):
|
||||||
if field.data.mimetype != 'application/pdf':
|
if field.data.mimetype != 'application/pdf':
|
||||||
raise ValidationError('PDF files only!')
|
raise ValidationError('PDF files only!')
|
||||||
@ -68,10 +67,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
|
|||||||
if self.binarization.render_kw is None:
|
if self.binarization.render_kw is None:
|
||||||
self.binarization.render_kw = {}
|
self.binarization.render_kw = {}
|
||||||
self.binarization.render_kw['disabled'] = True
|
self.binarization.render_kw['disabled'] = True
|
||||||
|
if self.ocropus_nlbin_threshold.render_kw is None:
|
||||||
|
self.ocropus_nlbin_threshold.render_kw = {}
|
||||||
|
self.ocropus_nlbin_threshold.render_kw['disabled'] = True
|
||||||
if 'methods' in service_info:
|
if 'methods' in service_info:
|
||||||
if 'binarization' in service_info['methods']:
|
if 'binarization' in service_info['methods']:
|
||||||
if 'disabled' in self.binarization.render_kw:
|
if 'disabled' in self.binarization.render_kw:
|
||||||
del self.binarization.render_kw['disabled']
|
del self.binarization.render_kw['disabled']
|
||||||
|
if 'ocropus_nlbin_threshold' in service_info['methods']:
|
||||||
|
if 'disabled' in self.ocropus_nlbin_threshold.render_kw:
|
||||||
|
del self.ocropus_nlbin_threshold.render_kw['disabled']
|
||||||
models = [
|
models = [
|
||||||
x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all()
|
x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all()
|
||||||
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
|
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
|
||||||
|
@ -79,7 +79,8 @@ def tesseract_ocr_pipeline():
|
|||||||
service=service_name,
|
service=service_name,
|
||||||
service_args={
|
service_args={
|
||||||
'binarization': form.binarization.data,
|
'binarization': form.binarization.data,
|
||||||
'model': hashids.decode(form.model.data)
|
'model': hashids.decode(form.model.data),
|
||||||
|
'ocropus_nlbin_threshold': float(form.ocropus_nlbin_threshold.data)
|
||||||
},
|
},
|
||||||
service_version=form.version.data,
|
service_version=form.version.data,
|
||||||
user=current_user
|
user=current_user
|
||||||
|
@ -20,6 +20,7 @@ tesseract-ocr-pipeline:
|
|||||||
0.1.1:
|
0.1.1:
|
||||||
methods:
|
methods:
|
||||||
- 'binarization'
|
- 'binarization'
|
||||||
|
- 'ocropus_nlbin_threshold'
|
||||||
publishing_year: 2022
|
publishing_year: 2022
|
||||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
|
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
|
||||||
transkribus-htr-pipeline:
|
transkribus-htr-pipeline:
|
||||||
|
@ -84,6 +84,11 @@
|
|||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="col s12"><p> </p></div>
|
||||||
|
<div class="col s9">
|
||||||
|
<p>Intensity (between 0 and 1)</p>
|
||||||
|
<p class="range-field">{{ form.ocropus_nlbin_threshold() }}</p>
|
||||||
|
</div>
|
||||||
<div class="col s12"><p> </p></div>
|
<div class="col s12"><p> </p></div>
|
||||||
<div class="col s12 divider"></div>
|
<div class="col s12 divider"></div>
|
||||||
<div class="col s12"><p> </p></div>
|
<div class="col s12"><p> </p></div>
|
||||||
|
Loading…
Reference in New Issue
Block a user