Change directory structure (move ./nopaque/* to ./)

This commit is contained in:
Patrick Jentsch
2021-07-20 15:07:42 +02:00
parent ff39d8d650
commit d6ab379418
231 changed files with 26 additions and 23 deletions

80
app/services/__init__.py Normal file
View File

@ -0,0 +1,80 @@
from flask import Blueprint
SERVICES = {
'corpus_analysis': {
'name': 'Corpus analysis'
},
'file_setup': {
'name': 'File setup',
'versions': {
'latest': '1.0.0b',
'1.0.0b': {
'publishing_data': {
'date': None,
'title': 'nopaque File setup service',
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup/-/tree/1.0.0b', # noqa
'version': '1.0.0'
}
}
}
},
'nlp': {
'name': 'Natural Language Processing',
'versions': {
'latest': '1.0.0b',
'1.0.0b': {
'check_encoding': True,
'models': {
'de': 'German',
'en': 'English',
'it': 'Italian',
'nl': 'Dutch',
'pl': 'Polish',
'zh': 'Chinese'
},
'publishing_data': {
'date': None,
'title': 'nopaque NLP service',
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp/-/tree/1.0.0b', # noqa
'version': '1.0.0'
}
}
}
},
'ocr': {
'name': 'Optical Character Recognition',
'versions': {
'latest': '1.0.0b',
'1.0.0b': {
'binarization': True,
'models': {
'ara': 'Arabic',
'chi_tra': 'Chinese - Traditional',
'dan': 'Danish',
'eng': 'English',
'enm': 'English, Middle 1100-1500',
'fra': 'French',
'frm': 'French, Middle ca. 1400-1600',
'deu': 'German',
'frk': 'German Fraktur',
'ell': 'Greek, Modern (1453-)',
'ita': 'Italian',
'por': 'Portuguese',
'rus': 'Russian',
'spa': 'Spanish; Castilian',
},
'publishing_data': {
'date': None,
'title': 'nopaque OCR service',
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/tree/1.0.0b', # noqa
'version': '1.0.0'
}
}
}
}
}
services = Blueprint('services', __name__)
from . import views

83
app/services/forms.py Normal file
View File

@ -0,0 +1,83 @@
from flask_wtf import FlaskForm
from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
SubmitField, ValidationError)
from wtforms.validators import DataRequired, Length
from . import SERVICES
class AddJobForm(FlaskForm):
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
version = SelectField('Version', validators=[DataRequired()])
class AddNLPJobForm(AddJobForm):
check_encoding = BooleanField('Check encoding')
files = MultipleFileField('Files', validators=[DataRequired()])
language = SelectField('Language', choices=[('', 'Choose your option')],
default='', validators=[DataRequired()])
def validate_check_encoding(self, field):
if field.data and 'check_encoding' not in SERVICES['nlp']['versions'][self.version.data]: # noqa
raise ValidationError('Check encoding is not available in this version') # noqa
def validate_files(form, field):
for file in field.data:
if not file.filename.lower().endswith('.txt'):
raise ValidationError('File does not have an approved '
'extension: .txt')
def __init__(self, *args, **kwargs):
version = kwargs.pop('version', SERVICES['nlp']['versions']['latest'])
super().__init__(*args, **kwargs)
if 'check_encoding' not in SERVICES['nlp']['versions'][version]:
self.check_encoding.render_kw = {'disabled': True}
self.language.choices += [(x, y) for x, y in SERVICES['nlp']['versions'][version]['models'].items()] # noqa
self.version.choices = [(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'] # noqa
self.version.default = version
class AddOCRJobForm(AddJobForm):
binarization = BooleanField('Binarazation')
files = MultipleFileField('Files', validators=[DataRequired()])
language = SelectField('Language', choices=[('', 'Choose your option')],
default='', validators=[DataRequired()])
def validate_binarization(self, field):
if field.data and 'binarization' not in SERVICES['ocr']['versions'][self.version.data]: # noqa
raise ValidationError('Binarization is not available in this version') # noqa
def validate_files(self, field):
for file in field.data:
if not file.filename.lower().endswith('.pdf'):
raise ValidationError('File does not have an approved '
'extension: .pdf')
def __init__(self, *args, **kwargs):
version = kwargs.pop('version', SERVICES['ocr']['versions']['latest'])
super().__init__(*args, **kwargs)
if 'binarization' not in SERVICES['ocr']['versions'][version]:
self.binarization.render_kw = {'disabled': True}
self.language.choices += [(x, y) for x, y in SERVICES['ocr']['versions'][version]['models'].items()] # noqa
self.version.choices = [(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'] # noqa
self.version.default = version
class AddFileSetupJobForm(AddJobForm):
files = MultipleFileField('Files', validators=[DataRequired()])
def validate_files(form, field):
for file in field.data:
if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png',
'.tiff', '.tif')):
raise ValidationError('File does not have an approved '
'extension: .jpeg | .jpg | .png | .tiff '
'| .tif')
def __init__(self, *args, **kwargs):
version = kwargs.pop('version', SERVICES['file_setup']['versions']['latest'])
super().__init__(*args, **kwargs)
self.version.choices = [(x, x) for x in SERVICES['file_setup']['versions'] if x != 'latest'] # noqa
self.version.default = version

85
app/services/views.py Normal file
View File

@ -0,0 +1,85 @@
from flask import (abort, flash, make_response, render_template, request,
url_for)
from flask_login import current_user, login_required
from werkzeug.utils import secure_filename
from . import services
from . import SERVICES
from .forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
from .. import db, socketio
from ..models import Job, JobInput
import json
import logging
import os
@services.route('/<service>', methods=['GET', 'POST'])
@login_required
def service(service):
if service not in SERVICES:
abort(404)
if service == 'corpus_analysis':
return render_template('services/{}.html.j2'.format(service),
title=SERVICES[service]['name'])
elif service == 'file_setup':
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
elif service == 'nlp':
version = request.args.get('version')
if version is None or version not in SERVICES[service]['versions']:
form = AddNLPJobForm(prefix='add-nlp-job-form')
else:
form = AddNLPJobForm(prefix='add-nlp-job-form', version=version)
form.version.data = version
elif service == 'ocr':
version = request.args.get('version')
if version is None or version not in SERVICES[service]['versions']:
form = AddOCRJobForm(prefix='add-ocr-job-form')
else:
form = AddOCRJobForm(prefix='add-ocr-job-form', version=version)
form.version.data = version
if form.is_submitted():
if not form.validate():
logging.error(form.errors)
return make_response(form.errors, 400)
service_args = []
if service == 'nlp':
service_args.append('-l {}'.format(form.language.data))
if form.check_encoding.data:
service_args.append('--check-encoding')
if service == 'ocr':
service_args.append('-l {}'.format(form.language.data))
if form.binarization.data:
service_args.append('--binarize')
job = Job(creator=current_user,
description=form.description.data,
service=service, service_args=json.dumps(service_args),
service_version=form.version.data,
status='preparing', title=form.title.data)
db.session.add(job)
db.session.flush()
db.session.refresh(job)
try:
os.makedirs(job.path)
except OSError:
logging.error('Make dir {} led to an OSError!'.format(job.path))
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
{'redirect_url': url_for('.service', service=service)}, 500)
else:
for file in form.files.data:
filename = secure_filename(file.filename)
job_input = JobInput(filename=filename, job=job)
file.save(job_input.path)
db.session.add(job_input)
job.status = 'submitted'
db.session.commit()
flash('Job "{}" added'.format(job.title), 'job')
event = 'user_{}_patch'.format(job.user_id)
jsonpatch = [{'op': 'add', 'path': '/jobs/{}'.format(job.id), 'value': job.to_dict()}] # noqa
room = 'user_{}'.format(job.user_id)
socketio.emit(event, jsonpatch, room=room)
return make_response(
{'redirect_url': url_for('jobs.job', job_id=job.id)}, 201)
return render_template('services/{}.html.j2'.format(service),
form=form, title=SERVICES[service]['name'],
versions=SERVICES[service]['versions'])