mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-15 01:05:42 +00:00
First work on fixed versioning
This commit is contained in:
parent
76e3ffb9fa
commit
351004b795
@ -1,80 +0,0 @@
|
|||||||
from flask_wtf import FlaskForm
|
|
||||||
from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
|
|
||||||
SubmitField, ValidationError)
|
|
||||||
from wtforms.validators import DataRequired, Length
|
|
||||||
|
|
||||||
|
|
||||||
class AddNLPJobForm(FlaskForm):
|
|
||||||
description = StringField('Description',
|
|
||||||
validators=[DataRequired(), Length(1, 255)])
|
|
||||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
|
||||||
language = SelectField('Language',
|
|
||||||
choices=[('', 'Choose your option'),
|
|
||||||
('nl', 'Dutch'),
|
|
||||||
('en', 'English'),
|
|
||||||
('fr', 'French'),
|
|
||||||
('de', 'German'),
|
|
||||||
('el', 'Greek'),
|
|
||||||
('it', 'Italian'),
|
|
||||||
('pt', 'Portuguese'),
|
|
||||||
('es', 'Spanish')],
|
|
||||||
validators=[DataRequired()])
|
|
||||||
submit = SubmitField()
|
|
||||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
|
||||||
version = SelectField('Version', choices=[('latest', 'Latest')],
|
|
||||||
validators=[DataRequired()])
|
|
||||||
check_encoding = BooleanField('Check encoding')
|
|
||||||
|
|
||||||
def validate_files(form, field):
|
|
||||||
for file in field.data:
|
|
||||||
if not file.filename.lower().endswith('.txt'):
|
|
||||||
raise ValidationError('File does not have an approved '
|
|
||||||
'extension: .txt')
|
|
||||||
|
|
||||||
|
|
||||||
class AddOCRJobForm(FlaskForm):
|
|
||||||
binarization = BooleanField('Binarazation')
|
|
||||||
description = StringField('Description',
|
|
||||||
validators=[DataRequired(), Length(1, 255)])
|
|
||||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
|
||||||
language = SelectField('Language',
|
|
||||||
choices=[('', 'Choose your option'),
|
|
||||||
('eng', 'English'),
|
|
||||||
('enm', 'English, Middle (1100-1500)'),
|
|
||||||
('fra', 'French'),
|
|
||||||
('frm', 'French, Middle (ca. 1400-1600)'),
|
|
||||||
('deu', 'German'),
|
|
||||||
('frk', 'German Fraktur'),
|
|
||||||
('ita', 'Italian'),
|
|
||||||
('por', 'Portuguese'),
|
|
||||||
('spa', 'Spanish; Castilian')],
|
|
||||||
validators=[DataRequired()])
|
|
||||||
split = BooleanField('Split')
|
|
||||||
submit = SubmitField()
|
|
||||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
|
||||||
version = SelectField('Version', choices=[('latest', 'Latest')],
|
|
||||||
validators=[DataRequired()])
|
|
||||||
|
|
||||||
def validate_files(form, field):
|
|
||||||
for file in field.data:
|
|
||||||
if not file.filename.lower().endswith('.pdf'):
|
|
||||||
raise ValidationError('File does not have an approved '
|
|
||||||
'extension: .pdf')
|
|
||||||
|
|
||||||
|
|
||||||
class AddFileSetupJobForm(FlaskForm):
|
|
||||||
description = StringField('Description',
|
|
||||||
validators=[DataRequired(), Length(1, 255)])
|
|
||||||
submit = SubmitField()
|
|
||||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
|
||||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
|
||||||
version = SelectField('Version', choices=[('latest', 'Latest')],
|
|
||||||
validators=[DataRequired()])
|
|
||||||
|
|
||||||
def validate_files(form, field):
|
|
||||||
for file in field.data:
|
|
||||||
if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png',
|
|
||||||
'.tiff', '.tif')):
|
|
||||||
raise ValidationError('File does not have an approved '
|
|
||||||
'extension: .jpeg | .jpg | .png | .tiff '
|
|
||||||
'| .tif')
|
|
@ -1,5 +1,68 @@
|
|||||||
from flask import Blueprint
|
from flask import Blueprint
|
||||||
|
|
||||||
|
|
||||||
|
SERVICES = {
|
||||||
|
'corpus_analysis': {
|
||||||
|
'name': 'Corpus analysis'
|
||||||
|
},
|
||||||
|
'file-setup': {
|
||||||
|
'name': 'File setup',
|
||||||
|
'versions': {
|
||||||
|
'latest': '1.0.0',
|
||||||
|
'1.0.0': {
|
||||||
|
'publishing_data': {
|
||||||
|
'date': None,
|
||||||
|
'title': 'nopaque File setup service',
|
||||||
|
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup/-/tree/1.0.0', # noqa
|
||||||
|
'version': '1.0.0'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'nlp': {
|
||||||
|
'name': 'Natural Language Processing',
|
||||||
|
'versions': {
|
||||||
|
'latest': '1.0.0',
|
||||||
|
'1.0.0': {
|
||||||
|
'check_encoding': True,
|
||||||
|
'models': {},
|
||||||
|
'publishing_data': {
|
||||||
|
'date': None,
|
||||||
|
'title': 'nopaque NLP service',
|
||||||
|
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp/-/tree/1.0.0', # noqa
|
||||||
|
'version': '1.0.0'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'ocr': {
|
||||||
|
'name': 'Optical Character Recognition',
|
||||||
|
'versions': {
|
||||||
|
'latest': '1.0.0',
|
||||||
|
'1.0.0': {
|
||||||
|
'binarization': True,
|
||||||
|
'models': {
|
||||||
|
'eng': 'English',
|
||||||
|
'enm': 'English, Middle 1100-1500',
|
||||||
|
'fra': 'French',
|
||||||
|
'frm': 'French, Middle ca. 1400-1600',
|
||||||
|
'deu': 'German',
|
||||||
|
'frk': 'German Fraktur',
|
||||||
|
'ita': 'Italian',
|
||||||
|
'por': 'Portuguese',
|
||||||
|
'spa': 'Spanish; Castilian',
|
||||||
|
},
|
||||||
|
'publishing_data': {
|
||||||
|
'date': None,
|
||||||
|
'title': 'nopaque OCR service',
|
||||||
|
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/tree/1.0.0', # noqa
|
||||||
|
'version': '1.0.0'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
services = Blueprint('services', __name__)
|
services = Blueprint('services', __name__)
|
||||||
from . import views
|
from . import views
|
||||||
|
68
web/app/services/forms.py
Normal file
68
web/app/services/forms.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
from flask_wtf import FlaskForm
|
||||||
|
from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
|
||||||
|
SubmitField, ValidationError)
|
||||||
|
from wtforms.validators import DataRequired, Length
|
||||||
|
from . import SERVICES
|
||||||
|
|
||||||
|
|
||||||
|
class AddJobForm(FlaskForm):
|
||||||
|
description = StringField('Description',
|
||||||
|
validators=[DataRequired(), Length(1, 255)])
|
||||||
|
submit = SubmitField()
|
||||||
|
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||||
|
|
||||||
|
|
||||||
|
class AddNLPJobForm(AddJobForm):
|
||||||
|
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||||
|
model = SelectField('Model', validators=[DataRequired()])
|
||||||
|
version = SelectField('Version',
|
||||||
|
choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'], # noqa
|
||||||
|
default=SERVICES['nlp']['versions']['latest'],
|
||||||
|
validators=[DataRequired()])
|
||||||
|
check_encoding = BooleanField('Check encoding')
|
||||||
|
|
||||||
|
def validate_files(form, field):
|
||||||
|
for file in field.data:
|
||||||
|
if not file.filename.lower().endswith('.txt'):
|
||||||
|
raise ValidationError('File does not have an approved '
|
||||||
|
'extension: .txt')
|
||||||
|
|
||||||
|
|
||||||
|
class AddOCRJobForm(AddJobForm):
|
||||||
|
binarization = BooleanField('Binarazation')
|
||||||
|
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||||
|
model = SelectField('Model', validators=[DataRequired()])
|
||||||
|
version = SelectField('Version',
|
||||||
|
choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'], # noqa
|
||||||
|
default=SERVICES['ocr']['versions']['latest'],
|
||||||
|
validators=[DataRequired()])
|
||||||
|
|
||||||
|
def validate_binarization(form, field):
|
||||||
|
if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]: # noqa
|
||||||
|
raise ValidationError('Binarization is not available in this version') # noqa
|
||||||
|
|
||||||
|
def validate_files(form, field):
|
||||||
|
for file in field.data:
|
||||||
|
if not file.filename.lower().endswith('.pdf'):
|
||||||
|
raise ValidationError('File does not have an approved '
|
||||||
|
'extension: .pdf')
|
||||||
|
|
||||||
|
def validate_model(form, field):
|
||||||
|
if field.data not in SERVICES['ocr'][form.versiondata]['models']:
|
||||||
|
raise ValidationError('Model is not available in this version')
|
||||||
|
|
||||||
|
|
||||||
|
class AddFileSetupJobForm(AddJobForm):
|
||||||
|
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||||
|
version = SelectField('Version',
|
||||||
|
choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'], # noqa
|
||||||
|
default=SERVICES['file-setup']['versions']['latest'],
|
||||||
|
validators=[DataRequired()])
|
||||||
|
|
||||||
|
def validate_files(form, field):
|
||||||
|
for file in field.data:
|
||||||
|
if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png',
|
||||||
|
'.tiff', '.tif')):
|
||||||
|
raise ValidationError('File does not have an approved '
|
||||||
|
'extension: .jpeg | .jpg | .png | .tiff '
|
||||||
|
'| .tif')
|
@ -1,27 +1,17 @@
|
|||||||
from flask import abort, flash, make_response, render_template, url_for
|
from flask import (abort, flash, make_response, render_template, request,
|
||||||
|
url_for)
|
||||||
from flask_login import current_user, login_required
|
from flask_login import current_user, login_required
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
from . import services
|
from . import services
|
||||||
|
from . import SERVICES
|
||||||
|
from .forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
|
||||||
from .. import db, socketio
|
from .. import db, socketio
|
||||||
from ..jobs.forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
|
|
||||||
from ..models import Job, JobInput
|
from ..models import Job, JobInput
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
SERVICES = {'corpus_analysis': {'name': 'Corpus analysis'},
|
|
||||||
'file-setup': {'name': 'File setup',
|
|
||||||
'resources': {'mem_mb': 4096, 'n_cores': 4},
|
|
||||||
'form': AddFileSetupJobForm},
|
|
||||||
'nlp': {'name': 'Natural Language Processing',
|
|
||||||
'resources': {'mem_mb': 4096, 'n_cores': 2},
|
|
||||||
'form': AddNLPJobForm},
|
|
||||||
'ocr': {'name': 'Optical Character Recognition',
|
|
||||||
'resources': {'mem_mb': 8192, 'n_cores': 4},
|
|
||||||
'form': AddOCRJobForm}}
|
|
||||||
|
|
||||||
|
|
||||||
@services.route('/<service>', methods=['GET', 'POST'])
|
@services.route('/<service>', methods=['GET', 'POST'])
|
||||||
@login_required
|
@login_required
|
||||||
def service(service):
|
def service(service):
|
||||||
@ -30,23 +20,26 @@ def service(service):
|
|||||||
if service == 'corpus_analysis':
|
if service == 'corpus_analysis':
|
||||||
return render_template('services/{}.html.j2'.format(service),
|
return render_template('services/{}.html.j2'.format(service),
|
||||||
title=SERVICES[service]['name'])
|
title=SERVICES[service]['name'])
|
||||||
form = SERVICES[service]['form'](prefix='add-job-form')
|
elif service == 'file-setup':
|
||||||
|
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
|
||||||
|
elif service == 'nlp':
|
||||||
|
form = AddNLPJobForm(prefix='add-nlp-job-form')
|
||||||
|
elif service == 'ocr':
|
||||||
|
form = AddOCRJobForm(prefix='add-ocr-job-form')
|
||||||
if form.is_submitted():
|
if form.is_submitted():
|
||||||
if not form.validate():
|
if not form.validate():
|
||||||
return make_response(form.errors, 400)
|
return make_response(form.errors, 400)
|
||||||
service_args = []
|
service_args = []
|
||||||
if service == 'nlp':
|
if service == 'nlp':
|
||||||
service_args.append('-l {}'.format(form.language.data))
|
service_args.append('-l {}'.format(form.model.data))
|
||||||
if form.check_encoding.data:
|
if form.check_encoding.data:
|
||||||
service_args.append('--check-encoding')
|
service_args.append('--check-encoding')
|
||||||
if service == 'ocr':
|
if service == 'ocr':
|
||||||
service_args.append('-l {}'.format(form.language.data))
|
service_args.append('-l {}'.format(form.model.data))
|
||||||
if form.binarization.data:
|
if form.binarization.data:
|
||||||
service_args.append('--binarize')
|
service_args.append('--binarize')
|
||||||
job = Job(creator=current_user,
|
job = Job(creator=current_user,
|
||||||
description=form.description.data,
|
description=form.description.data,
|
||||||
mem_mb=SERVICES[service]['resources']['mem_mb'],
|
|
||||||
n_cores=SERVICES[service]['resources']['n_cores'],
|
|
||||||
service=service, service_args=json.dumps(service_args),
|
service=service, service_args=json.dumps(service_args),
|
||||||
service_version=form.version.data,
|
service_version=form.version.data,
|
||||||
status='preparing', title=form.title.data)
|
status='preparing', title=form.title.data)
|
||||||
|
@ -60,7 +60,7 @@
|
|||||||
{{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
|
{{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
|
||||||
</div>
|
</div>
|
||||||
<div class="col s12 l4">
|
<div class="col s12 l4">
|
||||||
{{ wtf.render_field(form.language, material_icon='language') }}
|
{{ wtf.render_field(form.model, material_icon='language') }}
|
||||||
</div>
|
</div>
|
||||||
<div class="col s12 l3">
|
<div class="col s12 l3">
|
||||||
{{ wtf.render_field(form.version, material_icon='apps') }}
|
{{ wtf.render_field(form.version, material_icon='apps') }}
|
||||||
|
Loading…
Reference in New Issue
Block a user