diff --git a/web/app/jobs/forms.py b/web/app/jobs/forms.py deleted file mode 100644 index 127a981a..00000000 --- a/web/app/jobs/forms.py +++ /dev/null @@ -1,80 +0,0 @@ -from flask_wtf import FlaskForm -from wtforms import (BooleanField, MultipleFileField, SelectField, StringField, - SubmitField, ValidationError) -from wtforms.validators import DataRequired, Length - - -class AddNLPJobForm(FlaskForm): - description = StringField('Description', - validators=[DataRequired(), Length(1, 255)]) - files = MultipleFileField('Files', validators=[DataRequired()]) - language = SelectField('Language', - choices=[('', 'Choose your option'), - ('nl', 'Dutch'), - ('en', 'English'), - ('fr', 'French'), - ('de', 'German'), - ('el', 'Greek'), - ('it', 'Italian'), - ('pt', 'Portuguese'), - ('es', 'Spanish')], - validators=[DataRequired()]) - submit = SubmitField() - title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) - version = SelectField('Version', choices=[('latest', 'Latest')], - validators=[DataRequired()]) - check_encoding = BooleanField('Check encoding') - - def validate_files(form, field): - for file in field.data: - if not file.filename.lower().endswith('.txt'): - raise ValidationError('File does not have an approved ' - 'extension: .txt') - - -class AddOCRJobForm(FlaskForm): - binarization = BooleanField('Binarazation') - description = StringField('Description', - validators=[DataRequired(), Length(1, 255)]) - files = MultipleFileField('Files', validators=[DataRequired()]) - language = SelectField('Language', - choices=[('', 'Choose your option'), - ('eng', 'English'), - ('enm', 'English, Middle (1100-1500)'), - ('fra', 'French'), - ('frm', 'French, Middle (ca. 1400-1600)'), - ('deu', 'German'), - ('frk', 'German Fraktur'), - ('ita', 'Italian'), - ('por', 'Portuguese'), - ('spa', 'Spanish; Castilian')], - validators=[DataRequired()]) - split = BooleanField('Split') - submit = SubmitField() - title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) - version = SelectField('Version', choices=[('latest', 'Latest')], - validators=[DataRequired()]) - - def validate_files(form, field): - for file in field.data: - if not file.filename.lower().endswith('.pdf'): - raise ValidationError('File does not have an approved ' - 'extension: .pdf') - - -class AddFileSetupJobForm(FlaskForm): - description = StringField('Description', - validators=[DataRequired(), Length(1, 255)]) - submit = SubmitField() - title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) - files = MultipleFileField('Files', validators=[DataRequired()]) - version = SelectField('Version', choices=[('latest', 'Latest')], - validators=[DataRequired()]) - - def validate_files(form, field): - for file in field.data: - if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png', - '.tiff', '.tif')): - raise ValidationError('File does not have an approved ' - 'extension: .jpeg | .jpg | .png | .tiff ' - '| .tif') diff --git a/web/app/services/__init__.py b/web/app/services/__init__.py index ea9a403f..03836993 100644 --- a/web/app/services/__init__.py +++ b/web/app/services/__init__.py @@ -1,5 +1,68 @@ from flask import Blueprint +SERVICES = { + 'corpus_analysis': { + 'name': 'Corpus analysis' + }, + 'file-setup': { + 'name': 'File setup', + 'versions': { + 'latest': '1.0.0', + '1.0.0': { + 'publishing_data': { + 'date': None, + 'title': 'nopaque File setup service', + 'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup/-/tree/1.0.0', # noqa + 'version': '1.0.0' + } + } + } + }, + 'nlp': { + 'name': 'Natural Language Processing', + 'versions': { + 'latest': '1.0.0', + '1.0.0': { + 'check_encoding': True, + 'models': {}, + 'publishing_data': { + 'date': None, + 'title': 'nopaque NLP service', + 'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp/-/tree/1.0.0', # noqa + 'version': '1.0.0' + } + } + } + }, + 'ocr': { + 'name': 'Optical Character Recognition', + 'versions': { + 'latest': '1.0.0', + '1.0.0': { + 'binarization': True, + 'models': { + 'eng': 'English', + 'enm': 'English, Middle 1100-1500', + 'fra': 'French', + 'frm': 'French, Middle ca. 1400-1600', + 'deu': 'German', + 'frk': 'German Fraktur', + 'ita': 'Italian', + 'por': 'Portuguese', + 'spa': 'Spanish; Castilian', + }, + 'publishing_data': { + 'date': None, + 'title': 'nopaque OCR service', + 'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/tree/1.0.0', # noqa + 'version': '1.0.0' + } + } + } + } +} + + services = Blueprint('services', __name__) from . import views diff --git a/web/app/services/forms.py b/web/app/services/forms.py new file mode 100644 index 00000000..cea741de --- /dev/null +++ b/web/app/services/forms.py @@ -0,0 +1,68 @@ +from flask_wtf import FlaskForm +from wtforms import (BooleanField, MultipleFileField, SelectField, StringField, + SubmitField, ValidationError) +from wtforms.validators import DataRequired, Length +from . import SERVICES + + +class AddJobForm(FlaskForm): + description = StringField('Description', + validators=[DataRequired(), Length(1, 255)]) + submit = SubmitField() + title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) + + +class AddNLPJobForm(AddJobForm): + files = MultipleFileField('Files', validators=[DataRequired()]) + model = SelectField('Model', validators=[DataRequired()]) + version = SelectField('Version', + choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'], # noqa + default=SERVICES['nlp']['versions']['latest'], + validators=[DataRequired()]) + check_encoding = BooleanField('Check encoding') + + def validate_files(form, field): + for file in field.data: + if not file.filename.lower().endswith('.txt'): + raise ValidationError('File does not have an approved ' + 'extension: .txt') + + +class AddOCRJobForm(AddJobForm): + binarization = BooleanField('Binarazation') + files = MultipleFileField('Files', validators=[DataRequired()]) + model = SelectField('Model', validators=[DataRequired()]) + version = SelectField('Version', + choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'], # noqa + default=SERVICES['ocr']['versions']['latest'], + validators=[DataRequired()]) + + def validate_binarization(form, field): + if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]: # noqa + raise ValidationError('Binarization is not available in this version') # noqa + + def validate_files(form, field): + for file in field.data: + if not file.filename.lower().endswith('.pdf'): + raise ValidationError('File does not have an approved ' + 'extension: .pdf') + + def validate_model(form, field): + if field.data not in SERVICES['ocr'][form.versiondata]['models']: + raise ValidationError('Model is not available in this version') + + +class AddFileSetupJobForm(AddJobForm): + files = MultipleFileField('Files', validators=[DataRequired()]) + version = SelectField('Version', + choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'], # noqa + default=SERVICES['file-setup']['versions']['latest'], + validators=[DataRequired()]) + + def validate_files(form, field): + for file in field.data: + if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png', + '.tiff', '.tif')): + raise ValidationError('File does not have an approved ' + 'extension: .jpeg | .jpg | .png | .tiff ' + '| .tif') diff --git a/web/app/services/views.py b/web/app/services/views.py index 1d81e6a8..4436d1ff 100644 --- a/web/app/services/views.py +++ b/web/app/services/views.py @@ -1,27 +1,17 @@ -from flask import abort, flash, make_response, render_template, url_for +from flask import (abort, flash, make_response, render_template, request, + url_for) from flask_login import current_user, login_required from werkzeug.utils import secure_filename from . import services +from . import SERVICES +from .forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm from .. import db, socketio -from ..jobs.forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm from ..models import Job, JobInput import json import logging import os -SERVICES = {'corpus_analysis': {'name': 'Corpus analysis'}, - 'file-setup': {'name': 'File setup', - 'resources': {'mem_mb': 4096, 'n_cores': 4}, - 'form': AddFileSetupJobForm}, - 'nlp': {'name': 'Natural Language Processing', - 'resources': {'mem_mb': 4096, 'n_cores': 2}, - 'form': AddNLPJobForm}, - 'ocr': {'name': 'Optical Character Recognition', - 'resources': {'mem_mb': 8192, 'n_cores': 4}, - 'form': AddOCRJobForm}} - - @services.route('/', methods=['GET', 'POST']) @login_required def service(service): @@ -30,23 +20,26 @@ def service(service): if service == 'corpus_analysis': return render_template('services/{}.html.j2'.format(service), title=SERVICES[service]['name']) - form = SERVICES[service]['form'](prefix='add-job-form') + elif service == 'file-setup': + form = AddFileSetupJobForm(prefix='add-file-setup-job-form') + elif service == 'nlp': + form = AddNLPJobForm(prefix='add-nlp-job-form') + elif service == 'ocr': + form = AddOCRJobForm(prefix='add-ocr-job-form') if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) service_args = [] if service == 'nlp': - service_args.append('-l {}'.format(form.language.data)) + service_args.append('-l {}'.format(form.model.data)) if form.check_encoding.data: service_args.append('--check-encoding') if service == 'ocr': - service_args.append('-l {}'.format(form.language.data)) + service_args.append('-l {}'.format(form.model.data)) if form.binarization.data: service_args.append('--binarize') job = Job(creator=current_user, description=form.description.data, - mem_mb=SERVICES[service]['resources']['mem_mb'], - n_cores=SERVICES[service]['resources']['n_cores'], service=service, service_args=json.dumps(service_args), service_version=form.version.data, status='preparing', title=form.title.data) diff --git a/web/app/templates/services/ocr.html.j2 b/web/app/templates/services/ocr.html.j2 index 09759e0c..5cb520bf 100644 --- a/web/app/templates/services/ocr.html.j2 +++ b/web/app/templates/services/ocr.html.j2 @@ -60,7 +60,7 @@ {{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
- {{ wtf.render_field(form.language, material_icon='language') }} + {{ wtf.render_field(form.model, material_icon='language') }}
{{ wtf.render_field(form.version, material_icon='apps') }}