mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-12 00:50:40 +00:00
Change directory structure (move ./nopaque/* to ./)
This commit is contained in:
80
app/services/__init__.py
Normal file
80
app/services/__init__.py
Normal file
@ -0,0 +1,80 @@
|
||||
from flask import Blueprint
|
||||
|
||||
|
||||
SERVICES = {
|
||||
'corpus_analysis': {
|
||||
'name': 'Corpus analysis'
|
||||
},
|
||||
'file_setup': {
|
||||
'name': 'File setup',
|
||||
'versions': {
|
||||
'latest': '1.0.0b',
|
||||
'1.0.0b': {
|
||||
'publishing_data': {
|
||||
'date': None,
|
||||
'title': 'nopaque File setup service',
|
||||
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup/-/tree/1.0.0b', # noqa
|
||||
'version': '1.0.0'
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'nlp': {
|
||||
'name': 'Natural Language Processing',
|
||||
'versions': {
|
||||
'latest': '1.0.0b',
|
||||
'1.0.0b': {
|
||||
'check_encoding': True,
|
||||
'models': {
|
||||
'de': 'German',
|
||||
'en': 'English',
|
||||
'it': 'Italian',
|
||||
'nl': 'Dutch',
|
||||
'pl': 'Polish',
|
||||
'zh': 'Chinese'
|
||||
},
|
||||
'publishing_data': {
|
||||
'date': None,
|
||||
'title': 'nopaque NLP service',
|
||||
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp/-/tree/1.0.0b', # noqa
|
||||
'version': '1.0.0'
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'ocr': {
|
||||
'name': 'Optical Character Recognition',
|
||||
'versions': {
|
||||
'latest': '1.0.0b',
|
||||
'1.0.0b': {
|
||||
'binarization': True,
|
||||
'models': {
|
||||
'ara': 'Arabic',
|
||||
'chi_tra': 'Chinese - Traditional',
|
||||
'dan': 'Danish',
|
||||
'eng': 'English',
|
||||
'enm': 'English, Middle 1100-1500',
|
||||
'fra': 'French',
|
||||
'frm': 'French, Middle ca. 1400-1600',
|
||||
'deu': 'German',
|
||||
'frk': 'German Fraktur',
|
||||
'ell': 'Greek, Modern (1453-)',
|
||||
'ita': 'Italian',
|
||||
'por': 'Portuguese',
|
||||
'rus': 'Russian',
|
||||
'spa': 'Spanish; Castilian',
|
||||
},
|
||||
'publishing_data': {
|
||||
'date': None,
|
||||
'title': 'nopaque OCR service',
|
||||
'url': 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/tree/1.0.0b', # noqa
|
||||
'version': '1.0.0'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
services = Blueprint('services', __name__)
|
||||
from . import views
|
83
app/services/forms.py
Normal file
83
app/services/forms.py
Normal file
@ -0,0 +1,83 @@
|
||||
from flask_wtf import FlaskForm
|
||||
from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
|
||||
SubmitField, ValidationError)
|
||||
from wtforms.validators import DataRequired, Length
|
||||
from . import SERVICES
|
||||
|
||||
|
||||
class AddJobForm(FlaskForm):
|
||||
description = StringField('Description',
|
||||
validators=[DataRequired(), Length(1, 255)])
|
||||
submit = SubmitField()
|
||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||
version = SelectField('Version', validators=[DataRequired()])
|
||||
|
||||
|
||||
class AddNLPJobForm(AddJobForm):
|
||||
check_encoding = BooleanField('Check encoding')
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
language = SelectField('Language', choices=[('', 'Choose your option')],
|
||||
default='', validators=[DataRequired()])
|
||||
|
||||
def validate_check_encoding(self, field):
|
||||
if field.data and 'check_encoding' not in SERVICES['nlp']['versions'][self.version.data]: # noqa
|
||||
raise ValidationError('Check encoding is not available in this version') # noqa
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith('.txt'):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .txt')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
version = kwargs.pop('version', SERVICES['nlp']['versions']['latest'])
|
||||
super().__init__(*args, **kwargs)
|
||||
if 'check_encoding' not in SERVICES['nlp']['versions'][version]:
|
||||
self.check_encoding.render_kw = {'disabled': True}
|
||||
self.language.choices += [(x, y) for x, y in SERVICES['nlp']['versions'][version]['models'].items()] # noqa
|
||||
self.version.choices = [(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'] # noqa
|
||||
self.version.default = version
|
||||
|
||||
|
||||
class AddOCRJobForm(AddJobForm):
|
||||
binarization = BooleanField('Binarazation')
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
language = SelectField('Language', choices=[('', 'Choose your option')],
|
||||
default='', validators=[DataRequired()])
|
||||
|
||||
def validate_binarization(self, field):
|
||||
if field.data and 'binarization' not in SERVICES['ocr']['versions'][self.version.data]: # noqa
|
||||
raise ValidationError('Binarization is not available in this version') # noqa
|
||||
|
||||
def validate_files(self, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith('.pdf'):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .pdf')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
version = kwargs.pop('version', SERVICES['ocr']['versions']['latest'])
|
||||
super().__init__(*args, **kwargs)
|
||||
if 'binarization' not in SERVICES['ocr']['versions'][version]:
|
||||
self.binarization.render_kw = {'disabled': True}
|
||||
self.language.choices += [(x, y) for x, y in SERVICES['ocr']['versions'][version]['models'].items()] # noqa
|
||||
self.version.choices = [(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'] # noqa
|
||||
self.version.default = version
|
||||
|
||||
|
||||
class AddFileSetupJobForm(AddJobForm):
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith(('.jpeg', '.jpg', '.png',
|
||||
'.tiff', '.tif')):
|
||||
raise ValidationError('File does not have an approved '
|
||||
'extension: .jpeg | .jpg | .png | .tiff '
|
||||
'| .tif')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
version = kwargs.pop('version', SERVICES['file_setup']['versions']['latest'])
|
||||
super().__init__(*args, **kwargs)
|
||||
self.version.choices = [(x, x) for x in SERVICES['file_setup']['versions'] if x != 'latest'] # noqa
|
||||
self.version.default = version
|
85
app/services/views.py
Normal file
85
app/services/views.py
Normal file
@ -0,0 +1,85 @@
|
||||
from flask import (abort, flash, make_response, render_template, request,
|
||||
url_for)
|
||||
from flask_login import current_user, login_required
|
||||
from werkzeug.utils import secure_filename
|
||||
from . import services
|
||||
from . import SERVICES
|
||||
from .forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
|
||||
from .. import db, socketio
|
||||
from ..models import Job, JobInput
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
@services.route('/<service>', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def service(service):
|
||||
if service not in SERVICES:
|
||||
abort(404)
|
||||
if service == 'corpus_analysis':
|
||||
return render_template('services/{}.html.j2'.format(service),
|
||||
title=SERVICES[service]['name'])
|
||||
elif service == 'file_setup':
|
||||
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
|
||||
elif service == 'nlp':
|
||||
version = request.args.get('version')
|
||||
if version is None or version not in SERVICES[service]['versions']:
|
||||
form = AddNLPJobForm(prefix='add-nlp-job-form')
|
||||
else:
|
||||
form = AddNLPJobForm(prefix='add-nlp-job-form', version=version)
|
||||
form.version.data = version
|
||||
elif service == 'ocr':
|
||||
version = request.args.get('version')
|
||||
if version is None or version not in SERVICES[service]['versions']:
|
||||
form = AddOCRJobForm(prefix='add-ocr-job-form')
|
||||
else:
|
||||
form = AddOCRJobForm(prefix='add-ocr-job-form', version=version)
|
||||
form.version.data = version
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
logging.error(form.errors)
|
||||
return make_response(form.errors, 400)
|
||||
service_args = []
|
||||
if service == 'nlp':
|
||||
service_args.append('-l {}'.format(form.language.data))
|
||||
if form.check_encoding.data:
|
||||
service_args.append('--check-encoding')
|
||||
if service == 'ocr':
|
||||
service_args.append('-l {}'.format(form.language.data))
|
||||
if form.binarization.data:
|
||||
service_args.append('--binarize')
|
||||
job = Job(creator=current_user,
|
||||
description=form.description.data,
|
||||
service=service, service_args=json.dumps(service_args),
|
||||
service_version=form.version.data,
|
||||
status='preparing', title=form.title.data)
|
||||
db.session.add(job)
|
||||
db.session.flush()
|
||||
db.session.refresh(job)
|
||||
try:
|
||||
os.makedirs(job.path)
|
||||
except OSError:
|
||||
logging.error('Make dir {} led to an OSError!'.format(job.path))
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response(
|
||||
{'redirect_url': url_for('.service', service=service)}, 500)
|
||||
else:
|
||||
for file in form.files.data:
|
||||
filename = secure_filename(file.filename)
|
||||
job_input = JobInput(filename=filename, job=job)
|
||||
file.save(job_input.path)
|
||||
db.session.add(job_input)
|
||||
job.status = 'submitted'
|
||||
db.session.commit()
|
||||
flash('Job "{}" added'.format(job.title), 'job')
|
||||
event = 'user_{}_patch'.format(job.user_id)
|
||||
jsonpatch = [{'op': 'add', 'path': '/jobs/{}'.format(job.id), 'value': job.to_dict()}] # noqa
|
||||
room = 'user_{}'.format(job.user_id)
|
||||
socketio.emit(event, jsonpatch, room=room)
|
||||
return make_response(
|
||||
{'redirect_url': url_for('jobs.job', job_id=job.id)}, 201)
|
||||
return render_template('services/{}.html.j2'.format(service),
|
||||
form=form, title=SERVICES[service]['name'],
|
||||
versions=SERVICES[service]['versions'])
|
Reference in New Issue
Block a user