mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-15 18:40:40 +00:00
move blueprints in dedicated folder
This commit is contained in:
24
app/blueprints/services/__init__.py
Normal file
24
app/blueprints/services/__init__.py
Normal file
@ -0,0 +1,24 @@
|
||||
from flask import Blueprint
|
||||
from flask_login import login_required
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
|
||||
|
||||
services_file = Path(__file__).parent / 'services.yml'
|
||||
with services_file.open('r') as f:
|
||||
SERVICES = yaml.safe_load(f)
|
||||
|
||||
bp = Blueprint('services', __name__)
|
||||
|
||||
|
||||
@bp.before_request
|
||||
@login_required
|
||||
def before_request():
|
||||
'''
|
||||
Ensures that the routes in this package can only be visited by users that
|
||||
are logged in.
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
from . import routes # noqa
|
192
app/blueprints/services/forms.py
Normal file
192
app/blueprints/services/forms.py
Normal file
@ -0,0 +1,192 @@
|
||||
from flask_wtf import FlaskForm
|
||||
from flask_login import current_user
|
||||
from flask_wtf.file import FileField, FileRequired
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
DecimalRangeField,
|
||||
MultipleFileField,
|
||||
SelectField,
|
||||
StringField,
|
||||
SubmitField,
|
||||
ValidationError
|
||||
)
|
||||
from wtforms.validators import InputRequired, Length
|
||||
from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel
|
||||
from . import SERVICES
|
||||
|
||||
|
||||
class CreateJobBaseForm(FlaskForm):
|
||||
description = StringField(
|
||||
'Description',
|
||||
validators=[InputRequired(), Length(max=255)]
|
||||
)
|
||||
title = StringField(
|
||||
'Title',
|
||||
validators=[InputRequired(), Length(max=32)]
|
||||
)
|
||||
version = SelectField('Version', validators=[InputRequired()])
|
||||
submit = SubmitField()
|
||||
|
||||
|
||||
class CreateFileSetupPipelineJobForm(CreateJobBaseForm):
|
||||
images = MultipleFileField('File(s)', validators=[InputRequired()])
|
||||
|
||||
def validate_images(form, field):
|
||||
valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
|
||||
for image in field.data:
|
||||
if image.mimetype not in valid_mimetypes:
|
||||
raise ValidationError('JPEG, PNG and TIFF files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-file-setup-pipeline-job-form'
|
||||
service_manifest = SERVICES['file-setup-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||||
self.version.data = version
|
||||
self.version.default = service_manifest['latest_version']
|
||||
|
||||
|
||||
class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
|
||||
binarization = BooleanField('Binarization')
|
||||
pdf = FileField('File', validators=[FileRequired()])
|
||||
model = SelectField('Model', validators=[InputRequired()])
|
||||
ocropus_nlbin_threshold = DecimalRangeField(
|
||||
render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5], 'disabled': True}
|
||||
)
|
||||
|
||||
def validate_binarization(self, field):
|
||||
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
|
||||
if field.data:
|
||||
if not('methods' in service_info and 'binarization' in service_info['methods']):
|
||||
raise ValidationError('Binarization is not available')
|
||||
|
||||
def validate_pdf(self, field):
|
||||
if field.data.mimetype != 'application/pdf':
|
||||
raise ValidationError('PDF files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-tesseract-ocr-pipeline-job-form'
|
||||
service_manifest = SERVICES['tesseract-ocr-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
service_info = service_manifest['versions'][version]
|
||||
if self.binarization.render_kw is None:
|
||||
self.binarization.render_kw = {}
|
||||
self.binarization.render_kw['disabled'] = True
|
||||
if self.ocropus_nlbin_threshold.render_kw is None:
|
||||
self.ocropus_nlbin_threshold.render_kw = {}
|
||||
self.ocropus_nlbin_threshold.render_kw['disabled'] = True
|
||||
if 'methods' in service_info:
|
||||
if 'binarization' in service_info['methods']:
|
||||
del self.binarization.render_kw['disabled']
|
||||
if 'ocropus_nlbin_threshold' in service_info['methods']:
|
||||
del self.ocropus_nlbin_threshold.render_kw['disabled']
|
||||
user_models = [
|
||||
x for x in current_user.tesseract_ocr_pipeline_models.order_by(TesseractOCRPipelineModel.title).all()
|
||||
]
|
||||
models = [
|
||||
x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all()
|
||||
if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
|
||||
]
|
||||
self.model.choices = {
|
||||
'': [('', 'Choose your option')],
|
||||
'Your models': [(x.hashid, f'{x.title} [{x.version}]') for x in user_models] if user_models else [(0, 'Nothing here yet...')],
|
||||
'Public models': [(x.hashid, f'{x.title} [{x.version}]') for x in models]
|
||||
}
|
||||
self.model.default = ''
|
||||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||||
self.version.data = version
|
||||
self.version.default = service_manifest['latest_version']
|
||||
|
||||
|
||||
class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
|
||||
binarization = BooleanField('Binarization')
|
||||
pdf = FileField('File', validators=[FileRequired()])
|
||||
model = SelectField('Model', validators=[InputRequired()])
|
||||
|
||||
def validate_binarization(self, field):
|
||||
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
|
||||
if field.data:
|
||||
if(
|
||||
'methods' not in service_info
|
||||
or 'binarization' not in service_info['methods']
|
||||
):
|
||||
raise ValidationError('Binarization is not available')
|
||||
|
||||
def validate_pdf(self, field):
|
||||
if field.data.mimetype != 'application/pdf':
|
||||
raise ValidationError('PDF files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-transkribus-htr-pipeline-job-form'
|
||||
transkribus_htr_pipeline_models = kwargs.pop('transkribus_htr_pipeline_models', [])
|
||||
service_manifest = SERVICES['transkribus-htr-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
service_info = service_manifest['versions'][version]
|
||||
if self.binarization.render_kw is None:
|
||||
self.binarization.render_kw = {}
|
||||
self.binarization.render_kw['disabled'] = True
|
||||
if 'methods' in service_info:
|
||||
if 'binarization' in service_info['methods']:
|
||||
del self.binarization.render_kw['disabled']
|
||||
self.model.choices = [('', 'Choose your option')]
|
||||
self.model.choices += [(x['modelId'], x['name']) for x in transkribus_htr_pipeline_models]
|
||||
self.model.default = ''
|
||||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||||
self.version.data = version
|
||||
self.version.default = service_manifest['latest_version']
|
||||
|
||||
|
||||
class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
|
||||
encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
|
||||
txt = FileField('File', validators=[FileRequired()])
|
||||
model = SelectField('Model', validators=[InputRequired()])
|
||||
|
||||
def validate_encoding_detection(self, field):
|
||||
service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
|
||||
if field.data:
|
||||
if(
|
||||
'methods' not in service_info
|
||||
or 'encoding_detection' not in service_info['methods']
|
||||
):
|
||||
raise ValidationError('Encoding detection is not available')
|
||||
|
||||
def validate_txt(form, field):
|
||||
if field.data.mimetype != 'text/plain':
|
||||
raise ValidationError('Plain text files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-spacy-nlp-pipeline-job-form'
|
||||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
service_info = service_manifest['versions'][version]
|
||||
print(service_info)
|
||||
if self.encoding_detection.render_kw is None:
|
||||
self.encoding_detection.render_kw = {}
|
||||
self.encoding_detection.render_kw['disabled'] = True
|
||||
if 'methods' in service_info:
|
||||
if 'encoding_detection' in service_info['methods']:
|
||||
del self.encoding_detection.render_kw['disabled']
|
||||
user_models = [
|
||||
x for x in current_user.spacy_nlp_pipeline_models.order_by(SpaCyNLPPipelineModel.title).all()
|
||||
]
|
||||
models = [
|
||||
x for x in SpaCyNLPPipelineModel.query.filter(SpaCyNLPPipelineModel.user != current_user, SpaCyNLPPipelineModel.is_public == True).order_by(SpaCyNLPPipelineModel.title).all()
|
||||
if version in x.compatible_service_versions
|
||||
]
|
||||
self.model.choices = {
|
||||
'': [('', 'Choose your option')],
|
||||
'Your models': [(x.hashid, f'{x.title} [{x.version}]') for x in user_models] if user_models else [(0, 'Nothing here yet...')],
|
||||
'Public models': [(x.hashid, f'{x.title} [{x.version}]') for x in models]
|
||||
}
|
||||
self.model.default = ''
|
||||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||||
self.version.data = version
|
||||
self.version.default = version
|
224
app/blueprints/services/routes.py
Normal file
224
app/blueprints/services/routes.py
Normal file
@ -0,0 +1,224 @@
|
||||
from flask import abort, current_app, flash, redirect, render_template, request, url_for
|
||||
from flask_login import current_user
|
||||
import requests
|
||||
from app import db, hashids
|
||||
from app.models import (
|
||||
Job,
|
||||
JobInput,
|
||||
JobStatus,
|
||||
TesseractOCRPipelineModel,
|
||||
SpaCyNLPPipelineModel
|
||||
)
|
||||
from . import bp, SERVICES
|
||||
from .forms import (
|
||||
CreateFileSetupPipelineJobForm,
|
||||
CreateTesseractOCRPipelineJobForm,
|
||||
CreateTranskribusHTRPipelineJobForm,
|
||||
CreateSpacyNLPPipelineJobForm
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/services')
|
||||
def services():
|
||||
return redirect(url_for('main.dashboard'))
|
||||
|
||||
|
||||
@bp.route('/file-setup-pipeline', methods=['GET', 'POST'])
|
||||
def file_setup_pipeline():
|
||||
service = 'file-setup-pipeline'
|
||||
service_manifest = SERVICES[service]
|
||||
version = request.args.get('version', service_manifest['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
form = CreateFileSetupPipelineJobForm(prefix='create-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
job = Job.create(
|
||||
title=form.title.data,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args={},
|
||||
service_version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
for input_file in form.images.data:
|
||||
try:
|
||||
JobInput.create(input_file, job=job)
|
||||
except (AttributeError, OSError):
|
||||
abort(500)
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
message = f'Job "<a href="{job.url}">{job.title}</a>" created'
|
||||
flash(message, 'job')
|
||||
return {}, 201, {'Location': job.url}
|
||||
return render_template(
|
||||
'services/file_setup_pipeline.html.j2',
|
||||
title=service_manifest['name'],
|
||||
form=form
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST'])
|
||||
def tesseract_ocr_pipeline():
|
||||
service_name = 'tesseract-ocr-pipeline'
|
||||
service_manifest = SERVICES[service_name]
|
||||
version = request.args.get('version', service_manifest['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
form = CreateTesseractOCRPipelineJobForm(prefix='create-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
job = Job.create(
|
||||
title=form.title.data,
|
||||
description=form.description.data,
|
||||
service=service_name,
|
||||
service_args={
|
||||
'binarization': form.binarization.data,
|
||||
'model': hashids.decode(form.model.data),
|
||||
'ocropus_nlbin_threshold': float(form.ocropus_nlbin_threshold.data)
|
||||
},
|
||||
service_version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
try:
|
||||
JobInput.create(form.pdf.data, job=job)
|
||||
except (AttributeError, OSError):
|
||||
abort(500)
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
message = f'Job "<a href="{job.url}">{job.title}</a>" created'
|
||||
flash(message, 'job')
|
||||
return {}, 201, {'Location': job.url}
|
||||
tesseract_ocr_pipeline_models = [
|
||||
x for x in TesseractOCRPipelineModel.query.all()
|
||||
if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
|
||||
]
|
||||
user_tesseract_ocr_pipeline_models_count = len(current_user.tesseract_ocr_pipeline_models.all())
|
||||
return render_template(
|
||||
'services/tesseract_ocr_pipeline.html.j2',
|
||||
title=service_manifest['name'],
|
||||
form=form,
|
||||
tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
|
||||
user_tesseract_ocr_pipeline_models_count=user_tesseract_ocr_pipeline_models_count
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/transkribus-htr-pipeline', methods=['GET', 'POST'])
|
||||
def transkribus_htr_pipeline():
|
||||
if not current_app.config.get('NOPAQUE_TRANSKRIBUS_ENABLED'):
|
||||
abort(404)
|
||||
service = 'transkribus-htr-pipeline'
|
||||
service_manifest = SERVICES[service]
|
||||
version = request.args.get('version', service_manifest['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
r = requests.get(
|
||||
'https://transkribus.eu/TrpServer/rest/models/text',
|
||||
headers={'Accept': 'application/json'}
|
||||
)
|
||||
if r.status_code != 200:
|
||||
abort(500)
|
||||
transkribus_htr_pipeline_models = r.json()['trpModelMetadata']
|
||||
transkribus_htr_pipeline_models.append({'modelId': 48513, 'name': 'Caroline Minuscle', 'language': 'lat', 'isoLanguages': ['lat']})
|
||||
form = CreateTranskribusHTRPipelineJobForm(
|
||||
prefix='create-job-form',
|
||||
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models,
|
||||
version=version
|
||||
)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
job = Job.create(
|
||||
title=form.title.data,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args={
|
||||
'binarization': form.binarization.data,
|
||||
'model': form.model.data
|
||||
},
|
||||
service_version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
try:
|
||||
JobInput.create(form.pdf.data, job=job)
|
||||
except (AttributeError, OSError):
|
||||
abort(500)
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
message = f'Job "<a href="{job.url}">{job.title}</a>" created'
|
||||
flash(message, 'job')
|
||||
return {}, 201, {'Location': job.url}
|
||||
return render_template(
|
||||
'services/transkribus_htr_pipeline.html.j2',
|
||||
title=service_manifest['name'],
|
||||
form=form,
|
||||
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline', methods=['GET', 'POST'])
|
||||
def spacy_nlp_pipeline():
|
||||
service = 'spacy-nlp-pipeline'
|
||||
service_manifest = SERVICES[service]
|
||||
version = request.args.get('version', SERVICES[service]['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version)
|
||||
spacy_nlp_pipeline_models = SpaCyNLPPipelineModel.query.all()
|
||||
user_spacy_nlp_pipeline_models_count = len(current_user.spacy_nlp_pipeline_models.all())
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
job = Job.create(
|
||||
title=form.title.data,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args={
|
||||
'encoding_detection': form.encoding_detection.data,
|
||||
'model': form.model.data
|
||||
},
|
||||
service_version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
try:
|
||||
JobInput.create(form.txt.data, job=job)
|
||||
except (AttributeError, OSError):
|
||||
abort(500)
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
message = f'Job "<a href="{job.url}">{job.title}</a>" created'
|
||||
flash(message, 'job')
|
||||
return {}, 201, {'Location': job.url}
|
||||
return render_template(
|
||||
'services/spacy_nlp_pipeline.html.j2',
|
||||
title=service_manifest['name'],
|
||||
form=form,
|
||||
spacy_nlp_pipeline_models=spacy_nlp_pipeline_models,
|
||||
user_spacy_nlp_pipeline_models_count=user_spacy_nlp_pipeline_models_count
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/corpus-analysis')
|
||||
def corpus_analysis():
|
||||
return render_template(
|
||||
'services/corpus_analysis.html.j2',
|
||||
title='Corpus Analysis'
|
||||
)
|
66
app/blueprints/services/services.yml
Normal file
66
app/blueprints/services/services.yml
Normal file
@ -0,0 +1,66 @@
|
||||
# TODO: This could also be done via GitLab/GitHub APIs
|
||||
file-setup-pipeline:
|
||||
name: 'File Setup Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.0'
|
||||
versions:
|
||||
0.1.0:
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup-pipeline/-/releases/v0.1.0'
|
||||
tesseract-ocr-pipeline:
|
||||
name: 'Tesseract OCR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.2'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.0'
|
||||
0.1.1:
|
||||
methods:
|
||||
- 'binarization'
|
||||
- 'ocropus_nlbin_threshold'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
|
||||
0.1.2:
|
||||
methods:
|
||||
- 'binarization'
|
||||
- 'ocropus_nlbin_threshold'
|
||||
publishing_year: 2023
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
|
||||
transkribus-htr-pipeline:
|
||||
name: 'Transkribus HTR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.1'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.0'
|
||||
0.1.1:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.1'
|
||||
spacy-nlp-pipeline:
|
||||
name: 'SpaCy NLP Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.1'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
- 'encoding_detection'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0'
|
||||
0.1.1:
|
||||
methods:
|
||||
- 'encoding_detection'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1'
|
||||
0.1.2:
|
||||
methods:
|
||||
- 'encoding_detection'
|
||||
publishing_year: 2024
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'
|
Reference in New Issue
Block a user