Restructure project

This commit is contained in:
Patrick Jentsch
2022-09-02 13:07:30 +02:00
parent ec9225b881
commit 8e1d94bb5d
73 changed files with 2105 additions and 2468 deletions

View File

@ -1,4 +1,3 @@
from app.models import TesseractOCRModel, TranskribusHTRModel
from flask_login import current_user
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired
@ -10,19 +9,26 @@ from wtforms import (
SubmitField,
ValidationError
)
from wtforms.validators import DataRequired, InputRequired, Length
from wtforms.validators import InputRequired, Length
from app.models import TesseractOCRModel, TranskribusHTRModel
from . import SERVICES
class AddJobForm(FlaskForm):
description = StringField('Description', validators=[InputRequired(), Length(1, 255)])
title = StringField('Title', validators=[InputRequired(), Length(1, 32)])
version = SelectField('Version', validators=[DataRequired()])
class CreateJobBaseForm(FlaskForm):
description = StringField(
'Description',
validators=[InputRequired(), Length(max=255)]
)
title = StringField(
'Title',
validators=[InputRequired(), Length(max=32)]
)
version = SelectField('Version', validators=[InputRequired()])
submit = SubmitField()
class AddFileSetupPipelineJobForm(AddJobForm):
images = MultipleFileField('File(s)', validators=[DataRequired()])
class CreateFileSetupPipelineJobForm(CreateJobBaseForm):
images = MultipleFileField('File(s)', validators=[InputRequired()])
def validate_images(form, field):
valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
@ -39,18 +45,15 @@ class AddFileSetupPipelineJobForm(AddJobForm):
self.version.default = service_manifest['latest_version']
class AddTesseractOCRPipelineJobForm(AddJobForm):
class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[DataRequired()])
model = SelectField('Model', validators=[InputRequired()])
def validate_binarization(self, field):
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
if field.data:
if(
'methods' not in service_info
or 'binarization' not in service_info['methods']
):
if not('methods' in service_info and 'binarization' in service_info['methods']):
raise ValidationError('Binarization is not available')
def validate_pdf(self, field):
@ -81,10 +84,10 @@ class AddTesseractOCRPipelineJobForm(AddJobForm):
self.version.default = service_manifest['latest_version']
class AddTranskribusHTRPipelineJobForm(AddJobForm):
class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[DataRequired()])
model = SelectField('Model', validators=[InputRequired()])
def validate_binarization(self, field):
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
@ -123,10 +126,10 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm):
self.version.default = service_manifest['latest_version']
class AddSpacyNLPPipelineJobForm(AddJobForm):
class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
txt = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[DataRequired()])
model = SelectField('Model', validators=[InputRequired()])
def validate_encoding_detection(self, field):
service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]

View File

@ -1,3 +1,5 @@
from flask import abort, current_app, flash, Markup, render_template, request
from flask_login import current_user, login_required
from app import db, hashids
from app.models import (
Job,
@ -7,26 +9,13 @@ from app.models import (
TRANSKRIBUS_HTR_MODELS,
TranskribusHTRModel
)
from flask import (
abort,
current_app,
flash,
make_response,
render_template,
request,
url_for
)
from flask_login import current_user, login_required
from werkzeug.utils import secure_filename
from . import bp
from . import SERVICES
from . import bp, SERVICES
from .forms import (
AddFileSetupPipelineJobForm,
AddTesseractOCRPipelineJobForm,
AddTranskribusHTRPipelineJobForm,
AddSpacyNLPPipelineJobForm
CreateFileSetupPipelineJobForm,
CreateTesseractOCRPipelineJobForm,
CreateTranskribusHTRPipelineJobForm,
CreateSpacyNLPPipelineJobForm
)
import json
@bp.route('/file-setup-pipeline', methods=['GET', 'POST'])
@ -37,49 +26,32 @@ def file_setup_pipeline():
version = request.args.get('version', service_manifest['latest_version'])
if version not in service_manifest['versions']:
abort(404)
form = AddFileSetupPipelineJobForm(prefix='add-job-form', version=version)
form = CreateFileSetupPipelineJobForm(prefix='create-job-form', version=version)
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
service_args = {}
job = Job(
user=current_user,
description=form.description.data,
service=service,
service_args=service_args,
service_version=form.version.data,
title=form.title.data
)
db.session.add(job)
db.session.flush(objects=[job])
db.session.refresh(job)
response = {'errors': form.errors}
return response, 400
try:
job.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
for image_file in form.images.data:
job_input = JobInput(
filename=secure_filename(image_file.filename),
job=job,
mimetype=image_file.mimetype
job = Job.create(
title=form.title.data,
description=form.description.data,
service=service,
service_args={},
service_version=form.version.data,
user=current_user
)
db.session.add(job_input)
db.session.flush(objects=[job_input])
db.session.refresh(job_input)
except OSError:
abort(500)
for input_file in form.images.data:
try:
image_file.save(job_input.path)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
JobInput.create(input_file, job=job)
except OSError:
abort(500)
job.status = JobStatus.SUBMITTED
db.session.commit()
flash(f'Job "{job.title}" added', 'job')
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
return render_template(
'services/file_setup_pipeline.html.j2',
form=form,
@ -90,58 +62,41 @@ def file_setup_pipeline():
@bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST'])
@login_required
def tesseract_ocr_pipeline():
service = 'tesseract-ocr-pipeline'
service_manifest = SERVICES[service]
service_name = 'tesseract-ocr-pipeline'
service_manifest = SERVICES[service_name]
version = request.args.get('version', service_manifest['latest_version'])
if version not in service_manifest['versions']:
abort(404)
form = AddTesseractOCRPipelineJobForm(prefix='add-job-form', version=version)
form = CreateTesseractOCRPipelineJobForm(prefix='create-job-form', version=version)
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
service_args = {}
service_args['model'] = hashids.decode(form.model.data)
if form.binarization.data:
service_args['binarization'] = True
job = Job(
user=current_user,
description=form.description.data,
service=service,
service_args=service_args,
service_version=form.version.data,
title=form.title.data
)
db.session.add(job)
db.session.flush(objects=[job])
db.session.refresh(job)
response = {'errors': form.errors}
return response, 400
try:
job.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
job_input = JobInput(
filename=secure_filename(form.pdf.data.filename),
job=job,
mimetype=form.pdf.data.mimetype
)
db.session.add(job_input)
db.session.flush(objects=[job_input])
db.session.refresh(job_input)
job = Job.create(
title=form.title.data,
description=form.description.data,
service=service_name,
service_args={
'binarization': form.binarization.data,
'model': hashids.decode(form.model.data)
},
service_version=form.version.data,
user=current_user
)
except OSError:
abort(500)
try:
form.pdf.data.save(job_input.path)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
JobInput.create(form.pdf.data, job=job)
except OSError:
abort(500)
job.status = JobStatus.SUBMITTED
db.session.commit()
flash(f'Job "{job.title}" added', 'job')
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
tesseract_ocr_models = [
x for x in TesseractOCRModel.query.filter().all()
x for x in TesseractOCRModel.query.all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
]
return render_template(
@ -162,57 +117,40 @@ def transkribus_htr_pipeline():
version = request.args.get('version', service_manifest['latest_version'])
if version not in service_manifest['versions']:
abort(404)
form = AddTranskribusHTRPipelineJobForm(prefix='add-job-form', version=version)
form = CreateTranskribusHTRPipelineJobForm(prefix='create-job-form', version=version)
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
service_args = {}
service_args['model'] = hashids.decode(form.model.data)
if form.binarization.data:
service_args['binarization'] = True
job = Job(
user=current_user,
description=form.description.data,
service=service,
service_args=service_args,
service_version=form.version.data,
title=form.title.data
)
db.session.add(job)
db.session.flush(objects=[job])
db.session.refresh(job)
response = {'errors': form.errors}
return response, 400
try:
job.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
job_input = JobInput(
filename=secure_filename(form.pdf.data.filename),
job=job,
mimetype=form.pdf.data.mimetype
)
db.session.add(job_input)
db.session.flush(objects=[job_input])
db.session.refresh(job_input)
job = Job.create(
title=form.title.data,
description=form.description.data,
service=service,
service_args={
'binarization': form.binarization.data,
'model': hashids.decode(form.model.data)
},
service_version=form.version.data,
user=current_user
)
except OSError:
abort(500)
try:
form.pdf.data.save(job_input.path)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
JobInput.create(form.pdf.data, job=job)
except OSError:
abort(500)
job.status = JobStatus.SUBMITTED
db.session.commit()
flash(f'Job "{job.title}" added', 'job')
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
transkribus_htr_models = [
x for x in TranskribusHTRModel.query.filter().all()
x for x in TranskribusHTRModel.query.all()
if x.shared == True or x.user == current_user
]
return render_template(
f'services/transkribus_htr_pipeline.html.j2',
'services/transkribus_htr_pipeline.html.j2',
form=form,
title=service_manifest['name'],
TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS,
@ -228,51 +166,34 @@ def spacy_nlp_pipeline():
version = request.args.get('version', SERVICES[service]['latest_version'])
if version not in service_manifest['versions']:
abort(404)
form = AddSpacyNLPPipelineJobForm(prefix='add-job-form', version=version)
form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version)
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
service_args = {}
service_args['model'] = form.model.data
if form.encoding_detection.data:
service_args['encoding_detection'] = True
job = Job(
user=current_user,
description=form.description.data,
service=service,
service_args=service_args,
service_version=form.version.data,
title=form.title.data
)
db.session.add(job)
db.session.flush(objects=[job])
db.session.refresh(job)
response = {'errors': form.errors}
return response, 400
try:
job.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
job_input = JobInput(
filename=secure_filename(form.txt.data.filename),
job=job,
mimetype=form.txt.data.mimetype
)
db.session.add(job_input)
db.session.flush(objects=[job_input])
db.session.refresh(job_input)
job = Job.create(
title=form.title.data,
description=form.description.data,
service=service,
service_args={
'encoding_detection': form.encoding_detection.data,
'model': form.model.data
},
service_version=form.version.data,
user=current_user
)
except OSError:
abort(500)
try:
form.txt.data.save(job_input.path)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
JobInput.create(form.txt.data, job=job)
except OSError:
abort(500)
job.status = JobStatus.SUBMITTED
db.session.commit()
flash(f'Job "{job.title}" added', 'job')
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
return render_template(
'services/spacy_nlp_pipeline.html.j2',
form=form,

View File

@ -17,6 +17,11 @@ tesseract-ocr-pipeline:
- 'binarization'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.0'
0.1.1:
methods:
- 'binarization'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
transkribus-htr-pipeline:
name: 'Transkribus HTR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
@ -47,4 +52,4 @@ spacy-nlp-pipeline:
ru: 'Russian'
zh: 'Chinese'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0'
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0'