mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-27 00:10:35 +00:00
Update service pages and how version data is gathered.
This commit is contained in:
@ -25,7 +25,14 @@ SERVICES = {
|
||||
'latest': '1.0.0',
|
||||
'1.0.0': {
|
||||
'check_encoding': True,
|
||||
'models': {},
|
||||
'models': {
|
||||
'de': 'German',
|
||||
'en': 'English',
|
||||
'it': 'Italian',
|
||||
'nl': 'Dutch',
|
||||
'pl': 'Polish',
|
||||
'zh': 'Chinese'
|
||||
},
|
||||
'publishing_data': {
|
||||
'date': None,
|
||||
'title': 'nopaque NLP service',
|
||||
@ -42,14 +49,19 @@ SERVICES = {
|
||||
'1.0.0': {
|
||||
'binarization': True,
|
||||
'models': {
|
||||
'ara': 'Arabic',
|
||||
'chi_tra': 'Chinese - Traditional',
|
||||
'dan': 'Danish',
|
||||
'eng': 'English',
|
||||
'enm': 'English, Middle 1100-1500',
|
||||
'fra': 'French',
|
||||
'frm': 'French, Middle ca. 1400-1600',
|
||||
'deu': 'German',
|
||||
'frk': 'German Fraktur',
|
||||
'ell': 'Greek, Modern (1453-)',
|
||||
'ita': 'Italian',
|
||||
'por': 'Portuguese',
|
||||
'rus': 'Russian',
|
||||
'spa': 'Spanish; Castilian',
|
||||
},
|
||||
'publishing_data': {
|
||||
|
@ -10,16 +10,18 @@ class AddJobForm(FlaskForm):
|
||||
validators=[DataRequired(), Length(1, 255)])
|
||||
submit = SubmitField()
|
||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||
version = SelectField('Version', validators=[DataRequired()])
|
||||
|
||||
|
||||
class AddNLPJobForm(AddJobForm):
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
version = SelectField('Version',
|
||||
choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'], # noqa
|
||||
default=SERVICES['nlp']['versions']['latest'],
|
||||
validators=[DataRequired()])
|
||||
check_encoding = BooleanField('Check encoding')
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
language = SelectField('Language', choices=[('', 'Choose your option')],
|
||||
default='', validators=[DataRequired()])
|
||||
|
||||
def validate_check_encoding(self, field):
|
||||
if field.data and 'check_encoding' not in SERVICES['nlp']['versions'][self.version.data]: # noqa
|
||||
raise ValidationError('Check encoding is not available in this version') # noqa
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
@ -27,37 +29,44 @@ class AddNLPJobForm(AddJobForm):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .txt')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
version = kwargs.pop('version', SERVICES['nlp']['versions']['latest'])
|
||||
super().__init__(*args, **kwargs)
|
||||
if 'check_encoding' not in SERVICES['nlp']['versions'][version]:
|
||||
self.check_encoding.render_kw = {'disabled': True}
|
||||
self.language.choices += [(x, y) for x, y in SERVICES['nlp']['versions'][version]['models'].items()] # noqa
|
||||
self.version.choices = [(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'] # noqa
|
||||
self.version.default = version
|
||||
|
||||
|
||||
class AddOCRJobForm(AddJobForm):
|
||||
binarization = BooleanField('Binarazation')
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
version = SelectField('Version',
|
||||
choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'], # noqa
|
||||
default=SERVICES['ocr']['versions']['latest'],
|
||||
validators=[DataRequired()])
|
||||
language = SelectField('Language', choices=[('', 'Choose your option')],
|
||||
default='', validators=[DataRequired()])
|
||||
|
||||
def validate_binarization(form, field):
|
||||
if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]: # noqa
|
||||
def validate_binarization(self, field):
|
||||
if field.data and 'binarization' not in SERVICES['ocr']['versions'][self.version.data]: # noqa
|
||||
raise ValidationError('Binarization is not available in this version') # noqa
|
||||
|
||||
def validate_files(form, field):
|
||||
def validate_files(self, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith('.pdf'):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .pdf')
|
||||
|
||||
def validate_model(form, field):
|
||||
if field.data not in SERVICES['ocr'][form.versiondata]['models']:
|
||||
raise ValidationError('Model is not available in this version')
|
||||
def __init__(self, *args, **kwargs):
|
||||
version = kwargs.pop('version', SERVICES['ocr']['versions']['latest'])
|
||||
super().__init__(*args, **kwargs)
|
||||
if 'binarization' not in SERVICES['ocr']['versions'][version]:
|
||||
self.binarization.render_kw = {'disabled': True}
|
||||
self.language.choices += [(x, y) for x, y in SERVICES['ocr']['versions'][version]['models'].items()] # noqa
|
||||
self.version.choices = [(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'] # noqa
|
||||
self.version.default = version
|
||||
|
||||
|
||||
class AddFileSetupJobForm(AddJobForm):
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
version = SelectField('Version',
|
||||
choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'], # noqa
|
||||
default=SERVICES['file-setup']['versions']['latest'],
|
||||
validators=[DataRequired()])
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
@ -66,3 +75,9 @@ class AddFileSetupJobForm(AddJobForm):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .jpeg | .jpg | .png | .tiff '
|
||||
'| .tif')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
version = kwargs.pop('version', SERVICES['file-setup']['versions']['latest'])
|
||||
super().__init__(*args, **kwargs)
|
||||
self.version.choices = [(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'] # noqa
|
||||
self.version.default = version
|
||||
|
@ -23,19 +23,30 @@ def service(service):
|
||||
elif service == 'file-setup':
|
||||
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
|
||||
elif service == 'nlp':
|
||||
form = AddNLPJobForm(prefix='add-nlp-job-form')
|
||||
version = request.args.get('version')
|
||||
if version is None or version not in SERVICES['nlp']['versions']:
|
||||
form = AddNLPJobForm(prefix='add-nlp-job-form')
|
||||
else:
|
||||
form = AddNLPJobForm(prefix='add-nlp-job-form', version=version)
|
||||
form.version.data = version
|
||||
elif service == 'ocr':
|
||||
form = AddOCRJobForm(prefix='add-ocr-job-form')
|
||||
version = request.args.get('version')
|
||||
if version is None or version not in SERVICES['ocr']['versions']:
|
||||
form = AddOCRJobForm(prefix='add-ocr-job-form')
|
||||
else:
|
||||
form = AddOCRJobForm(prefix='add-ocr-job-form', version=version)
|
||||
form.version.data = version
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
logging.error(form.errors)
|
||||
return make_response(form.errors, 400)
|
||||
service_args = []
|
||||
if service == 'nlp':
|
||||
service_args.append('-l {}'.format(form.model.data))
|
||||
service_args.append('-l {}'.format(form.language.data))
|
||||
if form.check_encoding.data:
|
||||
service_args.append('--check-encoding')
|
||||
if service == 'ocr':
|
||||
service_args.append('-l {}'.format(form.model.data))
|
||||
service_args.append('-l {}'.format(form.language.data))
|
||||
if form.binarization.data:
|
||||
service_args.append('--binarize')
|
||||
job = Job(creator=current_user,
|
||||
@ -70,4 +81,5 @@ def service(service):
|
||||
return make_response(
|
||||
{'redirect_url': url_for('jobs.job', job_id=job.id)}, 201)
|
||||
return render_template('services/{}.html.j2'.format(service),
|
||||
form=form, title=SERVICES[service]['name'])
|
||||
form=form, title=SERVICES[service]['name'],
|
||||
versions=SERVICES[service]['versions'])
|
||||
|
Reference in New Issue
Block a user