2022-02-08 11:26:20 +00:00
|
|
|
from app import db, hashids
|
2022-04-22 13:27:52 +00:00
|
|
|
from app.models import (
|
|
|
|
Job,
|
|
|
|
JobInput,
|
|
|
|
JobStatus,
|
|
|
|
TesseractOCRModel,
|
|
|
|
TRANSKRIBUS_HTR_MODELS,
|
|
|
|
TranskribusHTRModel
|
|
|
|
)
|
2022-02-08 11:26:20 +00:00
|
|
|
from flask import (
|
|
|
|
abort,
|
|
|
|
current_app,
|
|
|
|
flash,
|
|
|
|
make_response,
|
|
|
|
render_template,
|
|
|
|
request,
|
|
|
|
url_for
|
|
|
|
)
|
2019-07-19 11:28:17 +00:00
|
|
|
from flask_login import current_user, login_required
|
2019-10-16 14:52:05 +00:00
|
|
|
from werkzeug.utils import secure_filename
|
2021-09-13 09:45:43 +00:00
|
|
|
from . import bp
|
2021-02-19 12:00:52 +00:00
|
|
|
from . import SERVICES
|
2022-04-04 11:31:09 +00:00
|
|
|
from .forms import (
|
|
|
|
AddFileSetupPipelineJobForm,
|
|
|
|
AddTesseractOCRPipelineJobForm,
|
|
|
|
AddTranskribusHTRPipelineJobForm,
|
|
|
|
AddSpacyNLPPipelineJobForm
|
|
|
|
)
|
2019-08-06 12:27:41 +00:00
|
|
|
import json
|
2019-07-19 11:28:17 +00:00
|
|
|
|
|
|
|
|
2022-04-04 11:31:09 +00:00
|
|
|
@bp.route('/file-setup-pipeline', methods=['GET', 'POST'])
|
2019-08-05 13:35:18 +00:00
|
|
|
@login_required
|
2022-04-04 11:31:09 +00:00
|
|
|
def file_setup_pipeline():
|
|
|
|
service = 'file-setup-pipeline'
|
|
|
|
service_manifest = SERVICES[service]
|
|
|
|
version = request.args.get('version', service_manifest['latest_version'])
|
|
|
|
if version not in service_manifest['versions']:
|
2021-08-04 10:26:49 +00:00
|
|
|
abort(404)
|
2022-04-04 11:31:09 +00:00
|
|
|
form = AddFileSetupPipelineJobForm(prefix='add-job-form', version=version)
|
2020-11-13 09:01:51 +00:00
|
|
|
if form.is_submitted():
|
|
|
|
if not form.validate():
|
|
|
|
return make_response(form.errors, 400)
|
2022-02-03 11:39:16 +00:00
|
|
|
service_args = {}
|
|
|
|
job = Job(
|
|
|
|
user=current_user,
|
|
|
|
description=form.description.data,
|
|
|
|
service=service,
|
2022-04-04 11:31:09 +00:00
|
|
|
service_args=service_args,
|
2022-02-03 11:39:16 +00:00
|
|
|
service_version=form.version.data,
|
|
|
|
title=form.title.data
|
|
|
|
)
|
2019-09-24 14:55:24 +00:00
|
|
|
db.session.add(job)
|
2022-02-03 11:39:16 +00:00
|
|
|
db.session.flush(objects=[job])
|
2021-01-28 10:25:02 +00:00
|
|
|
db.session.refresh(job)
|
2019-08-05 13:35:18 +00:00
|
|
|
try:
|
2022-02-03 11:39:16 +00:00
|
|
|
job.makedirs()
|
|
|
|
except OSError as e:
|
|
|
|
current_app.logger.error(e)
|
2021-01-28 10:25:02 +00:00
|
|
|
db.session.rollback()
|
2020-11-13 09:01:51 +00:00
|
|
|
flash('Internal Server Error', 'error')
|
2022-02-03 11:39:16 +00:00
|
|
|
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
2022-04-04 11:31:09 +00:00
|
|
|
for image_file in form.images.data:
|
2022-02-03 11:39:16 +00:00
|
|
|
job_input = JobInput(
|
2022-04-04 11:31:09 +00:00
|
|
|
filename=secure_filename(image_file.filename),
|
2022-02-03 11:39:16 +00:00
|
|
|
job=job,
|
2022-04-04 11:31:09 +00:00
|
|
|
mimetype=image_file.mimetype
|
2022-02-03 11:39:16 +00:00
|
|
|
)
|
|
|
|
db.session.add(job_input)
|
|
|
|
db.session.flush(objects=[job_input])
|
|
|
|
db.session.refresh(job_input)
|
|
|
|
try:
|
2022-04-04 11:31:09 +00:00
|
|
|
image_file.save(job_input.path)
|
2022-02-03 11:39:16 +00:00
|
|
|
except OSError as e:
|
|
|
|
current_app.logger.error(e)
|
|
|
|
db.session.rollback()
|
|
|
|
flash('Internal Server Error', 'error')
|
|
|
|
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
2022-02-08 11:26:20 +00:00
|
|
|
job.status = JobStatus.SUBMITTED
|
2022-02-03 11:39:16 +00:00
|
|
|
db.session.commit()
|
|
|
|
flash(f'Job "{job.title}" added', 'job')
|
|
|
|
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
2021-12-08 13:45:05 +00:00
|
|
|
return render_template(
|
2022-04-22 13:27:52 +00:00
|
|
|
'services/file_setup_pipeline.html.j2',
|
2021-12-08 13:45:05 +00:00
|
|
|
form=form,
|
2022-04-04 11:31:09 +00:00
|
|
|
title=service_manifest['name']
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST'])
|
|
|
|
@login_required
|
|
|
|
def tesseract_ocr_pipeline():
|
|
|
|
service = 'tesseract-ocr-pipeline'
|
|
|
|
service_manifest = SERVICES[service]
|
|
|
|
version = request.args.get('version', service_manifest['latest_version'])
|
|
|
|
if version not in service_manifest['versions']:
|
|
|
|
abort(404)
|
|
|
|
form = AddTesseractOCRPipelineJobForm(prefix='add-job-form', version=version)
|
|
|
|
if form.is_submitted():
|
|
|
|
if not form.validate():
|
|
|
|
return make_response(form.errors, 400)
|
|
|
|
service_args = {}
|
|
|
|
service_args['model'] = hashids.decode(form.model.data)
|
|
|
|
if form.binarization.data:
|
|
|
|
service_args['binarization'] = True
|
|
|
|
job = Job(
|
|
|
|
user=current_user,
|
|
|
|
description=form.description.data,
|
|
|
|
service=service,
|
|
|
|
service_args=service_args,
|
|
|
|
service_version=form.version.data,
|
|
|
|
title=form.title.data
|
|
|
|
)
|
|
|
|
db.session.add(job)
|
|
|
|
db.session.flush(objects=[job])
|
|
|
|
db.session.refresh(job)
|
|
|
|
try:
|
|
|
|
job.makedirs()
|
|
|
|
except OSError as e:
|
|
|
|
current_app.logger.error(e)
|
|
|
|
db.session.rollback()
|
|
|
|
flash('Internal Server Error', 'error')
|
|
|
|
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
|
|
|
job_input = JobInput(
|
|
|
|
filename=secure_filename(form.pdf.data.filename),
|
|
|
|
job=job,
|
|
|
|
mimetype=form.pdf.data.mimetype
|
|
|
|
)
|
|
|
|
db.session.add(job_input)
|
|
|
|
db.session.flush(objects=[job_input])
|
|
|
|
db.session.refresh(job_input)
|
|
|
|
try:
|
|
|
|
form.pdf.data.save(job_input.path)
|
|
|
|
except OSError as e:
|
|
|
|
current_app.logger.error(e)
|
|
|
|
db.session.rollback()
|
|
|
|
flash('Internal Server Error', 'error')
|
|
|
|
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
|
|
|
job.status = JobStatus.SUBMITTED
|
|
|
|
db.session.commit()
|
|
|
|
flash(f'Job "{job.title}" added', 'job')
|
|
|
|
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
2022-05-17 14:16:31 +00:00
|
|
|
tesseract_ocr_models = [
|
|
|
|
x for x in TesseractOCRModel.query.filter().all()
|
|
|
|
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
|
|
|
|
]
|
2022-04-04 11:31:09 +00:00
|
|
|
return render_template(
|
2022-04-22 13:27:52 +00:00
|
|
|
'services/tesseract_ocr_pipeline.html.j2',
|
2022-04-04 11:31:09 +00:00
|
|
|
form=form,
|
2022-04-22 13:27:52 +00:00
|
|
|
tesseract_ocr_models=tesseract_ocr_models,
|
2022-04-04 11:31:09 +00:00
|
|
|
title=service_manifest['name']
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/transkribus-htr-pipeline', methods=['GET', 'POST'])
|
|
|
|
@login_required
|
|
|
|
def transkribus_htr_pipeline():
|
|
|
|
if not current_app.config.get('NOPAQUE_TRANSKRIBUS_ENABLED'):
|
|
|
|
abort(404)
|
|
|
|
service = 'transkribus-htr-pipeline'
|
|
|
|
service_manifest = SERVICES[service]
|
|
|
|
version = request.args.get('version', service_manifest['latest_version'])
|
|
|
|
if version not in service_manifest['versions']:
|
|
|
|
abort(404)
|
|
|
|
form = AddTranskribusHTRPipelineJobForm(prefix='add-job-form', version=version)
|
|
|
|
if form.is_submitted():
|
|
|
|
if not form.validate():
|
|
|
|
return make_response(form.errors, 400)
|
|
|
|
service_args = {}
|
2022-04-22 13:27:52 +00:00
|
|
|
service_args['model'] = hashids.decode(form.model.data)
|
2022-04-04 11:31:09 +00:00
|
|
|
if form.binarization.data:
|
|
|
|
service_args['binarization'] = True
|
|
|
|
job = Job(
|
|
|
|
user=current_user,
|
|
|
|
description=form.description.data,
|
|
|
|
service=service,
|
|
|
|
service_args=service_args,
|
|
|
|
service_version=form.version.data,
|
|
|
|
title=form.title.data
|
|
|
|
)
|
|
|
|
db.session.add(job)
|
|
|
|
db.session.flush(objects=[job])
|
|
|
|
db.session.refresh(job)
|
|
|
|
try:
|
|
|
|
job.makedirs()
|
|
|
|
except OSError as e:
|
|
|
|
current_app.logger.error(e)
|
|
|
|
db.session.rollback()
|
|
|
|
flash('Internal Server Error', 'error')
|
|
|
|
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
|
|
|
job_input = JobInput(
|
|
|
|
filename=secure_filename(form.pdf.data.filename),
|
|
|
|
job=job,
|
|
|
|
mimetype=form.pdf.data.mimetype
|
|
|
|
)
|
|
|
|
db.session.add(job_input)
|
|
|
|
db.session.flush(objects=[job_input])
|
|
|
|
db.session.refresh(job_input)
|
|
|
|
try:
|
|
|
|
form.pdf.data.save(job_input.path)
|
|
|
|
except OSError as e:
|
|
|
|
current_app.logger.error(e)
|
|
|
|
db.session.rollback()
|
|
|
|
flash('Internal Server Error', 'error')
|
|
|
|
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
|
|
|
job.status = JobStatus.SUBMITTED
|
|
|
|
db.session.commit()
|
|
|
|
flash(f'Job "{job.title}" added', 'job')
|
|
|
|
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
2022-05-17 14:16:31 +00:00
|
|
|
transkribus_htr_models = [
|
|
|
|
x for x in TranskribusHTRModel.query.filter().all()
|
2022-05-19 07:56:14 +00:00
|
|
|
if x.shared == True or x.user == current_user
|
2022-05-17 14:16:31 +00:00
|
|
|
]
|
2022-04-04 11:31:09 +00:00
|
|
|
return render_template(
|
2022-04-22 13:27:52 +00:00
|
|
|
f'services/transkribus_htr_pipeline.html.j2',
|
2022-04-04 11:31:09 +00:00
|
|
|
form=form,
|
2022-04-22 13:27:52 +00:00
|
|
|
title=service_manifest['name'],
|
|
|
|
TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS,
|
|
|
|
transkribus_htr_models=transkribus_htr_models
|
2021-12-08 13:45:05 +00:00
|
|
|
)
|
2022-04-04 11:31:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/spacy-nlp-pipeline', methods=['GET', 'POST'])
|
|
|
|
@login_required
|
|
|
|
def spacy_nlp_pipeline():
|
|
|
|
service = 'spacy-nlp-pipeline'
|
|
|
|
service_manifest = SERVICES[service]
|
|
|
|
version = request.args.get('version', SERVICES[service]['latest_version'])
|
|
|
|
if version not in service_manifest['versions']:
|
|
|
|
abort(404)
|
|
|
|
form = AddSpacyNLPPipelineJobForm(prefix='add-job-form', version=version)
|
|
|
|
if form.is_submitted():
|
|
|
|
if not form.validate():
|
|
|
|
return make_response(form.errors, 400)
|
|
|
|
service_args = {}
|
|
|
|
service_args['model'] = form.model.data
|
|
|
|
if form.encoding_detection.data:
|
|
|
|
service_args['encoding_detection'] = True
|
|
|
|
job = Job(
|
|
|
|
user=current_user,
|
|
|
|
description=form.description.data,
|
|
|
|
service=service,
|
|
|
|
service_args=service_args,
|
|
|
|
service_version=form.version.data,
|
|
|
|
title=form.title.data
|
|
|
|
)
|
|
|
|
db.session.add(job)
|
|
|
|
db.session.flush(objects=[job])
|
|
|
|
db.session.refresh(job)
|
|
|
|
try:
|
|
|
|
job.makedirs()
|
|
|
|
except OSError as e:
|
|
|
|
current_app.logger.error(e)
|
|
|
|
db.session.rollback()
|
|
|
|
flash('Internal Server Error', 'error')
|
|
|
|
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
|
|
|
job_input = JobInput(
|
|
|
|
filename=secure_filename(form.txt.data.filename),
|
|
|
|
job=job,
|
|
|
|
mimetype=form.txt.data.mimetype
|
|
|
|
)
|
|
|
|
db.session.add(job_input)
|
|
|
|
db.session.flush(objects=[job_input])
|
|
|
|
db.session.refresh(job_input)
|
|
|
|
try:
|
|
|
|
form.txt.data.save(job_input.path)
|
|
|
|
except OSError as e:
|
|
|
|
current_app.logger.error(e)
|
|
|
|
db.session.rollback()
|
|
|
|
flash('Internal Server Error', 'error')
|
|
|
|
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
|
|
|
job.status = JobStatus.SUBMITTED
|
|
|
|
db.session.commit()
|
|
|
|
flash(f'Job "{job.title}" added', 'job')
|
|
|
|
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
|
|
|
return render_template(
|
2022-04-22 13:27:52 +00:00
|
|
|
'services/spacy_nlp_pipeline.html.j2',
|
2022-04-04 11:31:09 +00:00
|
|
|
form=form,
|
|
|
|
title=service_manifest['name']
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/corpus-analysis')
|
|
|
|
@login_required
|
|
|
|
def corpus_analysis():
|
|
|
|
return render_template(
|
|
|
|
'services/corpus_analysis.html.j2',
|
|
|
|
title='Corpus analysis'
|
2022-05-19 07:56:14 +00:00
|
|
|
)
|