2024-04-30 08:41:29 +02:00
|
|
|
from flask import abort, current_app, flash, redirect, render_template, request, url_for
|
2023-04-11 11:46:33 +02:00
|
|
|
from flask_login import current_user
|
2022-10-12 15:10:55 +02:00
|
|
|
import requests
|
2022-02-08 12:26:20 +01:00
|
|
|
from app import db, hashids
|
2022-04-22 15:27:52 +02:00
|
|
|
from app.models import (
|
|
|
|
Job,
|
|
|
|
JobInput,
|
|
|
|
JobStatus,
|
2022-11-08 14:11:57 +01:00
|
|
|
TesseractOCRPipelineModel,
|
|
|
|
SpaCyNLPPipelineModel
|
2022-04-22 15:27:52 +02:00
|
|
|
)
|
2022-09-02 13:07:30 +02:00
|
|
|
from . import bp, SERVICES
|
2022-04-04 13:31:09 +02:00
|
|
|
from .forms import (
|
2022-09-02 13:07:30 +02:00
|
|
|
CreateFileSetupPipelineJobForm,
|
|
|
|
CreateTesseractOCRPipelineJobForm,
|
|
|
|
CreateTranskribusHTRPipelineJobForm,
|
|
|
|
CreateSpacyNLPPipelineJobForm
|
2022-04-04 13:31:09 +02:00
|
|
|
)
|
2019-07-19 13:28:17 +02:00
|
|
|
|
|
|
|
|
2023-03-13 16:22:42 +01:00
|
|
|
@bp.route('/services')
|
|
|
|
def services():
|
|
|
|
return redirect(url_for('main.dashboard'))
|
|
|
|
|
|
|
|
|
2022-04-04 13:31:09 +02:00
|
|
|
@bp.route('/file-setup-pipeline', methods=['GET', 'POST'])
|
|
|
|
def file_setup_pipeline():
|
|
|
|
service = 'file-setup-pipeline'
|
|
|
|
service_manifest = SERVICES[service]
|
|
|
|
version = request.args.get('version', service_manifest['latest_version'])
|
|
|
|
if version not in service_manifest['versions']:
|
2021-08-04 12:26:49 +02:00
|
|
|
abort(404)
|
2023-03-31 09:14:21 +02:00
|
|
|
form = CreateFileSetupPipelineJobForm(prefix='create-job-form', version=version)
|
2020-11-13 10:01:51 +01:00
|
|
|
if form.is_submitted():
|
|
|
|
if not form.validate():
|
2022-09-02 13:07:30 +02:00
|
|
|
response = {'errors': form.errors}
|
|
|
|
return response, 400
|
2019-08-05 15:35:18 +02:00
|
|
|
try:
|
2022-09-02 13:07:30 +02:00
|
|
|
job = Job.create(
|
|
|
|
title=form.title.data,
|
|
|
|
description=form.description.data,
|
|
|
|
service=service,
|
|
|
|
service_args={},
|
|
|
|
service_version=form.version.data,
|
|
|
|
user=current_user
|
2022-02-03 12:39:16 +01:00
|
|
|
)
|
2022-09-02 13:07:30 +02:00
|
|
|
except OSError:
|
|
|
|
abort(500)
|
|
|
|
for input_file in form.images.data:
|
2022-02-03 12:39:16 +01:00
|
|
|
try:
|
2022-09-02 13:07:30 +02:00
|
|
|
JobInput.create(input_file, job=job)
|
2022-10-11 11:32:50 +02:00
|
|
|
except (AttributeError, OSError):
|
2022-09-02 13:07:30 +02:00
|
|
|
abort(500)
|
2022-02-08 12:26:20 +01:00
|
|
|
job.status = JobStatus.SUBMITTED
|
2022-02-03 12:39:16 +01:00
|
|
|
db.session.commit()
|
2024-04-30 08:41:29 +02:00
|
|
|
message = f'Job "<a href="{job.url}">{job.title}</a>" created'
|
2022-09-02 13:07:30 +02:00
|
|
|
flash(message, 'job')
|
|
|
|
return {}, 201, {'Location': job.url}
|
2021-12-08 14:45:05 +01:00
|
|
|
return render_template(
|
2022-04-22 15:27:52 +02:00
|
|
|
'services/file_setup_pipeline.html.j2',
|
2023-03-28 14:19:37 +02:00
|
|
|
title=service_manifest['name'],
|
|
|
|
form=form
|
2022-04-04 13:31:09 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST'])
|
|
|
|
def tesseract_ocr_pipeline():
|
2022-09-02 13:07:30 +02:00
|
|
|
service_name = 'tesseract-ocr-pipeline'
|
|
|
|
service_manifest = SERVICES[service_name]
|
2022-04-04 13:31:09 +02:00
|
|
|
version = request.args.get('version', service_manifest['latest_version'])
|
|
|
|
if version not in service_manifest['versions']:
|
|
|
|
abort(404)
|
2023-03-31 09:14:21 +02:00
|
|
|
form = CreateTesseractOCRPipelineJobForm(prefix='create-job-form', version=version)
|
2022-04-04 13:31:09 +02:00
|
|
|
if form.is_submitted():
|
|
|
|
if not form.validate():
|
2022-09-02 13:07:30 +02:00
|
|
|
response = {'errors': form.errors}
|
|
|
|
return response, 400
|
2022-04-04 13:31:09 +02:00
|
|
|
try:
|
2022-09-02 13:07:30 +02:00
|
|
|
job = Job.create(
|
|
|
|
title=form.title.data,
|
|
|
|
description=form.description.data,
|
|
|
|
service=service_name,
|
|
|
|
service_args={
|
|
|
|
'binarization': form.binarization.data,
|
2022-11-10 12:14:03 +01:00
|
|
|
'model': hashids.decode(form.model.data),
|
2022-11-10 16:19:58 +01:00
|
|
|
'ocropus_nlbin_threshold': float(form.ocropus_nlbin_threshold.data)
|
2022-09-02 13:07:30 +02:00
|
|
|
},
|
|
|
|
service_version=form.version.data,
|
|
|
|
user=current_user
|
|
|
|
)
|
|
|
|
except OSError:
|
|
|
|
abort(500)
|
2022-04-04 13:31:09 +02:00
|
|
|
try:
|
2022-09-02 13:07:30 +02:00
|
|
|
JobInput.create(form.pdf.data, job=job)
|
2022-10-11 11:32:50 +02:00
|
|
|
except (AttributeError, OSError):
|
2022-09-02 13:07:30 +02:00
|
|
|
abort(500)
|
2022-04-04 13:31:09 +02:00
|
|
|
job.status = JobStatus.SUBMITTED
|
|
|
|
db.session.commit()
|
2024-04-30 08:41:29 +02:00
|
|
|
message = f'Job "<a href="{job.url}">{job.title}</a>" created'
|
2022-09-02 13:07:30 +02:00
|
|
|
flash(message, 'job')
|
|
|
|
return {}, 201, {'Location': job.url}
|
2022-10-12 10:23:05 +02:00
|
|
|
tesseract_ocr_pipeline_models = [
|
|
|
|
x for x in TesseractOCRPipelineModel.query.all()
|
2022-12-13 15:01:04 +01:00
|
|
|
if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
|
2022-05-17 16:16:31 +02:00
|
|
|
]
|
2023-06-07 10:35:16 +02:00
|
|
|
user_tesseract_ocr_pipeline_models_count = len(current_user.tesseract_ocr_pipeline_models.all())
|
2022-04-04 13:31:09 +02:00
|
|
|
return render_template(
|
2022-04-22 15:27:52 +02:00
|
|
|
'services/tesseract_ocr_pipeline.html.j2',
|
2023-03-28 14:19:37 +02:00
|
|
|
title=service_manifest['name'],
|
2022-04-04 13:31:09 +02:00
|
|
|
form=form,
|
2023-06-07 10:35:16 +02:00
|
|
|
tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
|
|
|
|
user_tesseract_ocr_pipeline_models_count=user_tesseract_ocr_pipeline_models_count
|
2022-04-04 13:31:09 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/transkribus-htr-pipeline', methods=['GET', 'POST'])
|
|
|
|
def transkribus_htr_pipeline():
|
|
|
|
if not current_app.config.get('NOPAQUE_TRANSKRIBUS_ENABLED'):
|
|
|
|
abort(404)
|
|
|
|
service = 'transkribus-htr-pipeline'
|
|
|
|
service_manifest = SERVICES[service]
|
|
|
|
version = request.args.get('version', service_manifest['latest_version'])
|
|
|
|
if version not in service_manifest['versions']:
|
|
|
|
abort(404)
|
2022-10-12 15:10:55 +02:00
|
|
|
r = requests.get(
|
|
|
|
'https://transkribus.eu/TrpServer/rest/models/text',
|
|
|
|
headers={'Accept': 'application/json'}
|
|
|
|
)
|
|
|
|
if r.status_code != 200:
|
|
|
|
abort(500)
|
|
|
|
transkribus_htr_pipeline_models = r.json()['trpModelMetadata']
|
2022-12-22 13:57:37 +01:00
|
|
|
transkribus_htr_pipeline_models.append({'modelId': 48513, 'name': 'Caroline Minuscle', 'language': 'lat', 'isoLanguages': ['lat']})
|
2022-10-12 15:10:55 +02:00
|
|
|
form = CreateTranskribusHTRPipelineJobForm(
|
2023-03-31 09:14:21 +02:00
|
|
|
prefix='create-job-form',
|
2022-10-12 15:10:55 +02:00
|
|
|
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models,
|
|
|
|
version=version
|
|
|
|
)
|
2022-04-04 13:31:09 +02:00
|
|
|
if form.is_submitted():
|
|
|
|
if not form.validate():
|
2022-09-02 13:07:30 +02:00
|
|
|
response = {'errors': form.errors}
|
|
|
|
return response, 400
|
2022-04-04 13:31:09 +02:00
|
|
|
try:
|
2022-09-02 13:07:30 +02:00
|
|
|
job = Job.create(
|
|
|
|
title=form.title.data,
|
|
|
|
description=form.description.data,
|
|
|
|
service=service,
|
|
|
|
service_args={
|
|
|
|
'binarization': form.binarization.data,
|
2022-10-12 15:10:55 +02:00
|
|
|
'model': form.model.data
|
2022-09-02 13:07:30 +02:00
|
|
|
},
|
|
|
|
service_version=form.version.data,
|
|
|
|
user=current_user
|
|
|
|
)
|
|
|
|
except OSError:
|
|
|
|
abort(500)
|
2022-04-04 13:31:09 +02:00
|
|
|
try:
|
2022-09-02 13:07:30 +02:00
|
|
|
JobInput.create(form.pdf.data, job=job)
|
2022-10-11 11:32:50 +02:00
|
|
|
except (AttributeError, OSError):
|
2022-09-02 13:07:30 +02:00
|
|
|
abort(500)
|
2022-04-04 13:31:09 +02:00
|
|
|
job.status = JobStatus.SUBMITTED
|
|
|
|
db.session.commit()
|
2024-04-30 08:41:29 +02:00
|
|
|
message = f'Job "<a href="{job.url}">{job.title}</a>" created'
|
2022-09-02 13:07:30 +02:00
|
|
|
flash(message, 'job')
|
|
|
|
return {}, 201, {'Location': job.url}
|
2022-04-04 13:31:09 +02:00
|
|
|
return render_template(
|
2022-09-02 13:07:30 +02:00
|
|
|
'services/transkribus_htr_pipeline.html.j2',
|
2022-04-22 15:27:52 +02:00
|
|
|
title=service_manifest['name'],
|
2023-03-28 14:19:37 +02:00
|
|
|
form=form,
|
2022-10-12 10:23:05 +02:00
|
|
|
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models
|
2021-12-08 14:45:05 +01:00
|
|
|
)
|
2022-04-04 13:31:09 +02:00
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/spacy-nlp-pipeline', methods=['GET', 'POST'])
|
|
|
|
def spacy_nlp_pipeline():
|
|
|
|
service = 'spacy-nlp-pipeline'
|
|
|
|
service_manifest = SERVICES[service]
|
|
|
|
version = request.args.get('version', SERVICES[service]['latest_version'])
|
|
|
|
if version not in service_manifest['versions']:
|
|
|
|
abort(404)
|
2023-03-31 09:14:21 +02:00
|
|
|
form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version)
|
2022-11-08 14:11:57 +01:00
|
|
|
spacy_nlp_pipeline_models = SpaCyNLPPipelineModel.query.all()
|
2023-06-07 10:35:16 +02:00
|
|
|
user_spacy_nlp_pipeline_models_count = len(current_user.spacy_nlp_pipeline_models.all())
|
2022-04-04 13:31:09 +02:00
|
|
|
if form.is_submitted():
|
|
|
|
if not form.validate():
|
2022-09-02 13:07:30 +02:00
|
|
|
response = {'errors': form.errors}
|
|
|
|
return response, 400
|
2022-04-04 13:31:09 +02:00
|
|
|
try:
|
2022-09-02 13:07:30 +02:00
|
|
|
job = Job.create(
|
|
|
|
title=form.title.data,
|
|
|
|
description=form.description.data,
|
|
|
|
service=service,
|
|
|
|
service_args={
|
|
|
|
'encoding_detection': form.encoding_detection.data,
|
|
|
|
'model': form.model.data
|
|
|
|
},
|
|
|
|
service_version=form.version.data,
|
|
|
|
user=current_user
|
|
|
|
)
|
|
|
|
except OSError:
|
|
|
|
abort(500)
|
2022-04-04 13:31:09 +02:00
|
|
|
try:
|
2022-09-02 13:07:30 +02:00
|
|
|
JobInput.create(form.txt.data, job=job)
|
2022-10-11 11:32:50 +02:00
|
|
|
except (AttributeError, OSError):
|
2022-09-02 13:07:30 +02:00
|
|
|
abort(500)
|
2022-04-04 13:31:09 +02:00
|
|
|
job.status = JobStatus.SUBMITTED
|
|
|
|
db.session.commit()
|
2024-04-30 08:41:29 +02:00
|
|
|
message = f'Job "<a href="{job.url}">{job.title}</a>" created'
|
2022-09-02 13:07:30 +02:00
|
|
|
flash(message, 'job')
|
|
|
|
return {}, 201, {'Location': job.url}
|
2022-04-04 13:31:09 +02:00
|
|
|
return render_template(
|
2022-04-22 15:27:52 +02:00
|
|
|
'services/spacy_nlp_pipeline.html.j2',
|
2023-03-28 14:19:37 +02:00
|
|
|
title=service_manifest['name'],
|
2022-04-04 13:31:09 +02:00
|
|
|
form=form,
|
2023-06-07 10:35:16 +02:00
|
|
|
spacy_nlp_pipeline_models=spacy_nlp_pipeline_models,
|
|
|
|
user_spacy_nlp_pipeline_models_count=user_spacy_nlp_pipeline_models_count
|
2022-04-04 13:31:09 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/corpus-analysis')
|
|
|
|
def corpus_analysis():
|
|
|
|
return render_template(
|
|
|
|
'services/corpus_analysis.html.j2',
|
2023-03-13 16:22:42 +01:00
|
|
|
title='Corpus Analysis'
|
2022-05-19 09:56:14 +02:00
|
|
|
)
|