First work on fixed versioning

This commit is contained in:
Patrick Jentsch 2021-02-19 13:00:52 +01:00
parent 76e3ffb9fa
commit 351004b795
5 changed files with 144 additions and 100 deletions

View File

@ -1,80 +0,0 @@
from flask_wtf import FlaskForm
from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
SubmitField, ValidationError)
from wtforms.validators import DataRequired, Length
class AddNLPJobForm(FlaskForm):
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
files = MultipleFileField('Files', validators=[DataRequired()])
language = SelectField('Language',
choices=[('', 'Choose your option'),
('nl', 'Dutch'),
('en', 'English'),
('fr', 'French'),
('de', 'German'),
('el', 'Greek'),
('it', 'Italian'),
('pt', 'Portuguese'),
('es', 'Spanish')],
validators=[DataRequired()])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
version = SelectField('Version', choices=[('latest', 'Latest')],
validators=[DataRequired()])
check_encoding = BooleanField('Check encoding')
def validate_files(form, field):
for file in field.data:
if not file.filename.lower().endswith('.txt'):
raise ValidationError('File does not have an approved '
'extension: .txt')
class AddOCRJobForm(FlaskForm):
binarization = BooleanField('Binarazation')
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
files = MultipleFileField('Files', validators=[DataRequired()])
language = SelectField('Language',
choices=[('', 'Choose your option'),
('eng', 'English'),
('enm', 'English, Middle (1100-1500)'),
('fra', 'French'),
('frm', 'French, Middle (ca. 1400-1600)'),
('deu', 'German'),
('frk', 'German Fraktur'),
('ita', 'Italian'),
('por', 'Portuguese'),
('spa', 'Spanish; Castilian')],
validators=[DataRequired()])
split = BooleanField('Split')
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
version = SelectField('Version', choices=[('latest', 'Latest')],
validators=[DataRequired()])
def validate_files(form, field):
for file in field.data:
if not file.filename.lower().endswith('.pdf'):
raise ValidationError('File does not have an approved '
'extension: .pdf')
class AddFileSetupJobForm(FlaskForm):
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
files = MultipleFileField('Files', validators=[DataRequired()])
version = SelectField('Version', choices=[('latest', 'Latest')],
validators=[DataRequired()])
def validate_files(form, field):
for file in field.data:
if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png',
'.tiff', '.tif')):
raise ValidationError('File does not have an approved '
'extension: .jpeg | .jpg | .png | .tiff '
'| .tif')

View File

@ -1,5 +1,68 @@
from flask import Blueprint
SERVICES = {
'corpus_analysis': {
'name': 'Corpus analysis'
},
'file-setup': {
'name': 'File setup',
'versions': {
'latest': '1.0.0',
'1.0.0': {
'publishing_data': {
'date': None,
'title': 'nopaque File setup service',
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup/-/tree/1.0.0', # noqa
'version': '1.0.0'
}
}
}
},
'nlp': {
'name': 'Natural Language Processing',
'versions': {
'latest': '1.0.0',
'1.0.0': {
'check_encoding': True,
'models': {},
'publishing_data': {
'date': None,
'title': 'nopaque NLP service',
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp/-/tree/1.0.0', # noqa
'version': '1.0.0'
}
}
}
},
'ocr': {
'name': 'Optical Character Recognition',
'versions': {
'latest': '1.0.0',
'1.0.0': {
'binarization': True,
'models': {
'eng': 'English',
'enm': 'English, Middle 1100-1500',
'fra': 'French',
'frm': 'French, Middle ca. 1400-1600',
'deu': 'German',
'frk': 'German Fraktur',
'ita': 'Italian',
'por': 'Portuguese',
'spa': 'Spanish; Castilian',
},
'publishing_data': {
'date': None,
'title': 'nopaque OCR service',
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/tree/1.0.0', # noqa
'version': '1.0.0'
}
}
}
}
}
services = Blueprint('services', __name__)
from . import views

68
web/app/services/forms.py Normal file
View File

@ -0,0 +1,68 @@
from flask_wtf import FlaskForm
from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
SubmitField, ValidationError)
from wtforms.validators import DataRequired, Length
from . import SERVICES
class AddJobForm(FlaskForm):
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
class AddNLPJobForm(AddJobForm):
files = MultipleFileField('Files', validators=[DataRequired()])
model = SelectField('Model', validators=[DataRequired()])
version = SelectField('Version',
choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'], # noqa
default=SERVICES['nlp']['versions']['latest'],
validators=[DataRequired()])
check_encoding = BooleanField('Check encoding')
def validate_files(form, field):
for file in field.data:
if not file.filename.lower().endswith('.txt'):
raise ValidationError('File does not have an approved '
'extension: .txt')
class AddOCRJobForm(AddJobForm):
binarization = BooleanField('Binarazation')
files = MultipleFileField('Files', validators=[DataRequired()])
model = SelectField('Model', validators=[DataRequired()])
version = SelectField('Version',
choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'], # noqa
default=SERVICES['ocr']['versions']['latest'],
validators=[DataRequired()])
def validate_binarization(form, field):
if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]: # noqa
raise ValidationError('Binarization is not available in this version') # noqa
def validate_files(form, field):
for file in field.data:
if not file.filename.lower().endswith('.pdf'):
raise ValidationError('File does not have an approved '
'extension: .pdf')
def validate_model(form, field):
if field.data not in SERVICES['ocr'][form.versiondata]['models']:
raise ValidationError('Model is not available in this version')
class AddFileSetupJobForm(AddJobForm):
files = MultipleFileField('Files', validators=[DataRequired()])
version = SelectField('Version',
choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'], # noqa
default=SERVICES['file-setup']['versions']['latest'],
validators=[DataRequired()])
def validate_files(form, field):
for file in field.data:
if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png',
'.tiff', '.tif')):
raise ValidationError('File does not have an approved '
'extension: .jpeg | .jpg | .png | .tiff '
'| .tif')

View File

@ -1,27 +1,17 @@
from flask import abort, flash, make_response, render_template, url_for
from flask import (abort, flash, make_response, render_template, request,
url_for)
from flask_login import current_user, login_required
from werkzeug.utils import secure_filename
from . import services
from . import SERVICES
from .forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
from .. import db, socketio
from ..jobs.forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
from ..models import Job, JobInput
import json
import logging
import os
SERVICES = {'corpus_analysis': {'name': 'Corpus analysis'},
'file-setup': {'name': 'File setup',
'resources': {'mem_mb': 4096, 'n_cores': 4},
'form': AddFileSetupJobForm},
'nlp': {'name': 'Natural Language Processing',
'resources': {'mem_mb': 4096, 'n_cores': 2},
'form': AddNLPJobForm},
'ocr': {'name': 'Optical Character Recognition',
'resources': {'mem_mb': 8192, 'n_cores': 4},
'form': AddOCRJobForm}}
@services.route('/<service>', methods=['GET', 'POST'])
@login_required
def service(service):
@ -30,23 +20,26 @@ def service(service):
if service == 'corpus_analysis':
return render_template('services/{}.html.j2'.format(service),
title=SERVICES[service]['name'])
form = SERVICES[service]['form'](prefix='add-job-form')
elif service == 'file-setup':
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
elif service == 'nlp':
form = AddNLPJobForm(prefix='add-nlp-job-form')
elif service == 'ocr':
form = AddOCRJobForm(prefix='add-ocr-job-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
service_args = []
if service == 'nlp':
service_args.append('-l {}'.format(form.language.data))
service_args.append('-l {}'.format(form.model.data))
if form.check_encoding.data:
service_args.append('--check-encoding')
if service == 'ocr':
service_args.append('-l {}'.format(form.language.data))
service_args.append('-l {}'.format(form.model.data))
if form.binarization.data:
service_args.append('--binarize')
job = Job(creator=current_user,
description=form.description.data,
mem_mb=SERVICES[service]['resources']['mem_mb'],
n_cores=SERVICES[service]['resources']['n_cores'],
service=service, service_args=json.dumps(service_args),
service_version=form.version.data,
status='preparing', title=form.title.data)

View File

@ -60,7 +60,7 @@
{{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
</div>
<div class="col s12 l4">
{{ wtf.render_field(form.language, material_icon='language') }}
{{ wtf.render_field(form.model, material_icon='language') }}
</div>
<div class="col s12 l3">
{{ wtf.render_field(form.version, material_icon='apps') }}