Update service pages and how version data is gathered.

This commit is contained in:
Patrick Jentsch
2021-03-26 13:10:42 +01:00
parent 55d94ea329
commit 2f9ecf8048
9 changed files with 133 additions and 43 deletions

View File

@ -25,7 +25,14 @@ SERVICES = {
'latest': '1.0.0',
'1.0.0': {
'check_encoding': True,
'models': {},
'models': {
'de': 'German',
'en': 'English',
'it': 'Italian',
'nl': 'Dutch',
'pl': 'Polish',
'zh': 'Chinese'
},
'publishing_data': {
'date': None,
'title': 'nopaque NLP service',
@ -42,14 +49,19 @@ SERVICES = {
'1.0.0': {
'binarization': True,
'models': {
'ara': 'Arabic',
'chi_tra': 'Chinese - Traditional',
'dan': 'Danish',
'eng': 'English',
'enm': 'English, Middle 1100-1500',
'fra': 'French',
'frm': 'French, Middle ca. 1400-1600',
'deu': 'German',
'frk': 'German Fraktur',
'ell': 'Greek, Modern (1453-)',
'ita': 'Italian',
'por': 'Portuguese',
'rus': 'Russian',
'spa': 'Spanish; Castilian',
},
'publishing_data': {

View File

@ -10,16 +10,18 @@ class AddJobForm(FlaskForm):
validators=[DataRequired(), Length(1, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
version = SelectField('Version', validators=[DataRequired()])
class AddNLPJobForm(AddJobForm):
files = MultipleFileField('Files', validators=[DataRequired()])
model = SelectField('Model', validators=[DataRequired()])
version = SelectField('Version',
choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'], # noqa
default=SERVICES['nlp']['versions']['latest'],
validators=[DataRequired()])
check_encoding = BooleanField('Check encoding')
files = MultipleFileField('Files', validators=[DataRequired()])
language = SelectField('Language', choices=[('', 'Choose your option')],
default='', validators=[DataRequired()])
def validate_check_encoding(self, field):
if field.data and 'check_encoding' not in SERVICES['nlp']['versions'][self.version.data]: # noqa
raise ValidationError('Check encoding is not available in this version') # noqa
def validate_files(form, field):
for file in field.data:
@ -27,37 +29,44 @@ class AddNLPJobForm(AddJobForm):
raise ValidationError('File does not have an approved '
'extension: .txt')
def __init__(self, *args, **kwargs):
version = kwargs.pop('version', SERVICES['nlp']['versions']['latest'])
super().__init__(*args, **kwargs)
if 'check_encoding' not in SERVICES['nlp']['versions'][version]:
self.check_encoding.render_kw = {'disabled': True}
self.language.choices += [(x, y) for x, y in SERVICES['nlp']['versions'][version]['models'].items()] # noqa
self.version.choices = [(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'] # noqa
self.version.default = version
class AddOCRJobForm(AddJobForm):
binarization = BooleanField('Binarazation')
files = MultipleFileField('Files', validators=[DataRequired()])
model = SelectField('Model', validators=[DataRequired()])
version = SelectField('Version',
choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'], # noqa
default=SERVICES['ocr']['versions']['latest'],
validators=[DataRequired()])
language = SelectField('Language', choices=[('', 'Choose your option')],
default='', validators=[DataRequired()])
def validate_binarization(form, field):
if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]: # noqa
def validate_binarization(self, field):
if field.data and 'binarization' not in SERVICES['ocr']['versions'][self.version.data]: # noqa
raise ValidationError('Binarization is not available in this version') # noqa
def validate_files(form, field):
def validate_files(self, field):
for file in field.data:
if not file.filename.lower().endswith('.pdf'):
raise ValidationError('File does not have an approved '
'extension: .pdf')
def validate_model(form, field):
if field.data not in SERVICES['ocr'][form.versiondata]['models']:
raise ValidationError('Model is not available in this version')
def __init__(self, *args, **kwargs):
version = kwargs.pop('version', SERVICES['ocr']['versions']['latest'])
super().__init__(*args, **kwargs)
if 'binarization' not in SERVICES['ocr']['versions'][version]:
self.binarization.render_kw = {'disabled': True}
self.language.choices += [(x, y) for x, y in SERVICES['ocr']['versions'][version]['models'].items()] # noqa
self.version.choices = [(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'] # noqa
self.version.default = version
class AddFileSetupJobForm(AddJobForm):
files = MultipleFileField('Files', validators=[DataRequired()])
version = SelectField('Version',
choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'], # noqa
default=SERVICES['file-setup']['versions']['latest'],
validators=[DataRequired()])
def validate_files(form, field):
for file in field.data:
@ -66,3 +75,9 @@ class AddFileSetupJobForm(AddJobForm):
raise ValidationError('File does not have an approved '
'extension: .jpeg | .jpg | .png | .tiff '
'| .tif')
def __init__(self, *args, **kwargs):
version = kwargs.pop('version', SERVICES['file-setup']['versions']['latest'])
super().__init__(*args, **kwargs)
self.version.choices = [(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'] # noqa
self.version.default = version

View File

@ -23,19 +23,30 @@ def service(service):
elif service == 'file-setup':
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
elif service == 'nlp':
form = AddNLPJobForm(prefix='add-nlp-job-form')
version = request.args.get('version')
if version is None or version not in SERVICES['nlp']['versions']:
form = AddNLPJobForm(prefix='add-nlp-job-form')
else:
form = AddNLPJobForm(prefix='add-nlp-job-form', version=version)
form.version.data = version
elif service == 'ocr':
form = AddOCRJobForm(prefix='add-ocr-job-form')
version = request.args.get('version')
if version is None or version not in SERVICES['ocr']['versions']:
form = AddOCRJobForm(prefix='add-ocr-job-form')
else:
form = AddOCRJobForm(prefix='add-ocr-job-form', version=version)
form.version.data = version
if form.is_submitted():
if not form.validate():
logging.error(form.errors)
return make_response(form.errors, 400)
service_args = []
if service == 'nlp':
service_args.append('-l {}'.format(form.model.data))
service_args.append('-l {}'.format(form.language.data))
if form.check_encoding.data:
service_args.append('--check-encoding')
if service == 'ocr':
service_args.append('-l {}'.format(form.model.data))
service_args.append('-l {}'.format(form.language.data))
if form.binarization.data:
service_args.append('--binarize')
job = Job(creator=current_user,
@ -70,4 +81,5 @@ def service(service):
return make_response(
{'redirect_url': url_for('jobs.job', job_id=job.id)}, 201)
return render_template('services/{}.html.j2'.format(service),
form=form, title=SERVICES[service]['name'])
form=form, title=SERVICES[service]['name'],
versions=SERVICES[service]['versions'])