mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-15 01:05:42 +00:00
First work on fixed versioning
This commit is contained in:
parent
76e3ffb9fa
commit
351004b795
@ -1,80 +0,0 @@
|
||||
from flask_wtf import FlaskForm
|
||||
from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
|
||||
SubmitField, ValidationError)
|
||||
from wtforms.validators import DataRequired, Length
|
||||
|
||||
|
||||
class AddNLPJobForm(FlaskForm):
|
||||
description = StringField('Description',
|
||||
validators=[DataRequired(), Length(1, 255)])
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
language = SelectField('Language',
|
||||
choices=[('', 'Choose your option'),
|
||||
('nl', 'Dutch'),
|
||||
('en', 'English'),
|
||||
('fr', 'French'),
|
||||
('de', 'German'),
|
||||
('el', 'Greek'),
|
||||
('it', 'Italian'),
|
||||
('pt', 'Portuguese'),
|
||||
('es', 'Spanish')],
|
||||
validators=[DataRequired()])
|
||||
submit = SubmitField()
|
||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||
version = SelectField('Version', choices=[('latest', 'Latest')],
|
||||
validators=[DataRequired()])
|
||||
check_encoding = BooleanField('Check encoding')
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith('.txt'):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .txt')
|
||||
|
||||
|
||||
class AddOCRJobForm(FlaskForm):
|
||||
binarization = BooleanField('Binarazation')
|
||||
description = StringField('Description',
|
||||
validators=[DataRequired(), Length(1, 255)])
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
language = SelectField('Language',
|
||||
choices=[('', 'Choose your option'),
|
||||
('eng', 'English'),
|
||||
('enm', 'English, Middle (1100-1500)'),
|
||||
('fra', 'French'),
|
||||
('frm', 'French, Middle (ca. 1400-1600)'),
|
||||
('deu', 'German'),
|
||||
('frk', 'German Fraktur'),
|
||||
('ita', 'Italian'),
|
||||
('por', 'Portuguese'),
|
||||
('spa', 'Spanish; Castilian')],
|
||||
validators=[DataRequired()])
|
||||
split = BooleanField('Split')
|
||||
submit = SubmitField()
|
||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||
version = SelectField('Version', choices=[('latest', 'Latest')],
|
||||
validators=[DataRequired()])
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith('.pdf'):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .pdf')
|
||||
|
||||
|
||||
class AddFileSetupJobForm(FlaskForm):
|
||||
description = StringField('Description',
|
||||
validators=[DataRequired(), Length(1, 255)])
|
||||
submit = SubmitField()
|
||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
version = SelectField('Version', choices=[('latest', 'Latest')],
|
||||
validators=[DataRequired()])
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png',
|
||||
'.tiff', '.tif')):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .jpeg | .jpg | .png | .tiff '
|
||||
'| .tif')
|
@ -1,5 +1,68 @@
|
||||
from flask import Blueprint
|
||||
|
||||
|
||||
SERVICES = {
|
||||
'corpus_analysis': {
|
||||
'name': 'Corpus analysis'
|
||||
},
|
||||
'file-setup': {
|
||||
'name': 'File setup',
|
||||
'versions': {
|
||||
'latest': '1.0.0',
|
||||
'1.0.0': {
|
||||
'publishing_data': {
|
||||
'date': None,
|
||||
'title': 'nopaque File setup service',
|
||||
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup/-/tree/1.0.0', # noqa
|
||||
'version': '1.0.0'
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'nlp': {
|
||||
'name': 'Natural Language Processing',
|
||||
'versions': {
|
||||
'latest': '1.0.0',
|
||||
'1.0.0': {
|
||||
'check_encoding': True,
|
||||
'models': {},
|
||||
'publishing_data': {
|
||||
'date': None,
|
||||
'title': 'nopaque NLP service',
|
||||
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp/-/tree/1.0.0', # noqa
|
||||
'version': '1.0.0'
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'ocr': {
|
||||
'name': 'Optical Character Recognition',
|
||||
'versions': {
|
||||
'latest': '1.0.0',
|
||||
'1.0.0': {
|
||||
'binarization': True,
|
||||
'models': {
|
||||
'eng': 'English',
|
||||
'enm': 'English, Middle 1100-1500',
|
||||
'fra': 'French',
|
||||
'frm': 'French, Middle ca. 1400-1600',
|
||||
'deu': 'German',
|
||||
'frk': 'German Fraktur',
|
||||
'ita': 'Italian',
|
||||
'por': 'Portuguese',
|
||||
'spa': 'Spanish; Castilian',
|
||||
},
|
||||
'publishing_data': {
|
||||
'date': None,
|
||||
'title': 'nopaque OCR service',
|
||||
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/tree/1.0.0', # noqa
|
||||
'version': '1.0.0'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
services = Blueprint('services', __name__)
|
||||
from . import views
|
||||
|
68
web/app/services/forms.py
Normal file
68
web/app/services/forms.py
Normal file
@ -0,0 +1,68 @@
|
||||
from flask_wtf import FlaskForm
|
||||
from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
|
||||
SubmitField, ValidationError)
|
||||
from wtforms.validators import DataRequired, Length
|
||||
from . import SERVICES
|
||||
|
||||
|
||||
class AddJobForm(FlaskForm):
|
||||
description = StringField('Description',
|
||||
validators=[DataRequired(), Length(1, 255)])
|
||||
submit = SubmitField()
|
||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||
|
||||
|
||||
class AddNLPJobForm(AddJobForm):
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
version = SelectField('Version',
|
||||
choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'], # noqa
|
||||
default=SERVICES['nlp']['versions']['latest'],
|
||||
validators=[DataRequired()])
|
||||
check_encoding = BooleanField('Check encoding')
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith('.txt'):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .txt')
|
||||
|
||||
|
||||
class AddOCRJobForm(AddJobForm):
|
||||
binarization = BooleanField('Binarazation')
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
version = SelectField('Version',
|
||||
choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'], # noqa
|
||||
default=SERVICES['ocr']['versions']['latest'],
|
||||
validators=[DataRequired()])
|
||||
|
||||
def validate_binarization(form, field):
|
||||
if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]: # noqa
|
||||
raise ValidationError('Binarization is not available in this version') # noqa
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith('.pdf'):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .pdf')
|
||||
|
||||
def validate_model(form, field):
|
||||
if field.data not in SERVICES['ocr'][form.versiondata]['models']:
|
||||
raise ValidationError('Model is not available in this version')
|
||||
|
||||
|
||||
class AddFileSetupJobForm(AddJobForm):
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
version = SelectField('Version',
|
||||
choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'], # noqa
|
||||
default=SERVICES['file-setup']['versions']['latest'],
|
||||
validators=[DataRequired()])
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png',
|
||||
'.tiff', '.tif')):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .jpeg | .jpg | .png | .tiff '
|
||||
'| .tif')
|
@ -1,27 +1,17 @@
|
||||
from flask import abort, flash, make_response, render_template, url_for
|
||||
from flask import (abort, flash, make_response, render_template, request,
|
||||
url_for)
|
||||
from flask_login import current_user, login_required
|
||||
from werkzeug.utils import secure_filename
|
||||
from . import services
|
||||
from . import SERVICES
|
||||
from .forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
|
||||
from .. import db, socketio
|
||||
from ..jobs.forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
|
||||
from ..models import Job, JobInput
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
SERVICES = {'corpus_analysis': {'name': 'Corpus analysis'},
|
||||
'file-setup': {'name': 'File setup',
|
||||
'resources': {'mem_mb': 4096, 'n_cores': 4},
|
||||
'form': AddFileSetupJobForm},
|
||||
'nlp': {'name': 'Natural Language Processing',
|
||||
'resources': {'mem_mb': 4096, 'n_cores': 2},
|
||||
'form': AddNLPJobForm},
|
||||
'ocr': {'name': 'Optical Character Recognition',
|
||||
'resources': {'mem_mb': 8192, 'n_cores': 4},
|
||||
'form': AddOCRJobForm}}
|
||||
|
||||
|
||||
@services.route('/<service>', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def service(service):
|
||||
@ -30,23 +20,26 @@ def service(service):
|
||||
if service == 'corpus_analysis':
|
||||
return render_template('services/{}.html.j2'.format(service),
|
||||
title=SERVICES[service]['name'])
|
||||
form = SERVICES[service]['form'](prefix='add-job-form')
|
||||
elif service == 'file-setup':
|
||||
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
|
||||
elif service == 'nlp':
|
||||
form = AddNLPJobForm(prefix='add-nlp-job-form')
|
||||
elif service == 'ocr':
|
||||
form = AddOCRJobForm(prefix='add-ocr-job-form')
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
service_args = []
|
||||
if service == 'nlp':
|
||||
service_args.append('-l {}'.format(form.language.data))
|
||||
service_args.append('-l {}'.format(form.model.data))
|
||||
if form.check_encoding.data:
|
||||
service_args.append('--check-encoding')
|
||||
if service == 'ocr':
|
||||
service_args.append('-l {}'.format(form.language.data))
|
||||
service_args.append('-l {}'.format(form.model.data))
|
||||
if form.binarization.data:
|
||||
service_args.append('--binarize')
|
||||
job = Job(creator=current_user,
|
||||
description=form.description.data,
|
||||
mem_mb=SERVICES[service]['resources']['mem_mb'],
|
||||
n_cores=SERVICES[service]['resources']['n_cores'],
|
||||
service=service, service_args=json.dumps(service_args),
|
||||
service_version=form.version.data,
|
||||
status='preparing', title=form.title.data)
|
||||
|
@ -60,7 +60,7 @@
|
||||
{{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
|
||||
</div>
|
||||
<div class="col s12 l4">
|
||||
{{ wtf.render_field(form.language, material_icon='language') }}
|
||||
{{ wtf.render_field(form.model, material_icon='language') }}
|
||||
</div>
|
||||
<div class="col s12 l3">
|
||||
{{ wtf.render_field(form.version, material_icon='apps') }}
|
||||
|
Loading…
Reference in New Issue
Block a user