mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-12 00:50:40 +00:00
Restructure project
This commit is contained in:
@ -1,4 +1,3 @@
|
||||
from app.models import TesseractOCRModel, TranskribusHTRModel
|
||||
from flask_login import current_user
|
||||
from flask_wtf import FlaskForm
|
||||
from flask_wtf.file import FileField, FileRequired
|
||||
@ -10,19 +9,26 @@ from wtforms import (
|
||||
SubmitField,
|
||||
ValidationError
|
||||
)
|
||||
from wtforms.validators import DataRequired, InputRequired, Length
|
||||
from wtforms.validators import InputRequired, Length
|
||||
from app.models import TesseractOCRModel, TranskribusHTRModel
|
||||
from . import SERVICES
|
||||
|
||||
|
||||
class AddJobForm(FlaskForm):
|
||||
description = StringField('Description', validators=[InputRequired(), Length(1, 255)])
|
||||
title = StringField('Title', validators=[InputRequired(), Length(1, 32)])
|
||||
version = SelectField('Version', validators=[DataRequired()])
|
||||
class CreateJobBaseForm(FlaskForm):
|
||||
description = StringField(
|
||||
'Description',
|
||||
validators=[InputRequired(), Length(max=255)]
|
||||
)
|
||||
title = StringField(
|
||||
'Title',
|
||||
validators=[InputRequired(), Length(max=32)]
|
||||
)
|
||||
version = SelectField('Version', validators=[InputRequired()])
|
||||
submit = SubmitField()
|
||||
|
||||
|
||||
class AddFileSetupPipelineJobForm(AddJobForm):
|
||||
images = MultipleFileField('File(s)', validators=[DataRequired()])
|
||||
class CreateFileSetupPipelineJobForm(CreateJobBaseForm):
|
||||
images = MultipleFileField('File(s)', validators=[InputRequired()])
|
||||
|
||||
def validate_images(form, field):
|
||||
valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
|
||||
@ -39,18 +45,15 @@ class AddFileSetupPipelineJobForm(AddJobForm):
|
||||
self.version.default = service_manifest['latest_version']
|
||||
|
||||
|
||||
class AddTesseractOCRPipelineJobForm(AddJobForm):
|
||||
class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
|
||||
binarization = BooleanField('Binarization')
|
||||
pdf = FileField('File', validators=[FileRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
model = SelectField('Model', validators=[InputRequired()])
|
||||
|
||||
def validate_binarization(self, field):
|
||||
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
|
||||
if field.data:
|
||||
if(
|
||||
'methods' not in service_info
|
||||
or 'binarization' not in service_info['methods']
|
||||
):
|
||||
if not('methods' in service_info and 'binarization' in service_info['methods']):
|
||||
raise ValidationError('Binarization is not available')
|
||||
|
||||
def validate_pdf(self, field):
|
||||
@ -81,10 +84,10 @@ class AddTesseractOCRPipelineJobForm(AddJobForm):
|
||||
self.version.default = service_manifest['latest_version']
|
||||
|
||||
|
||||
class AddTranskribusHTRPipelineJobForm(AddJobForm):
|
||||
class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
|
||||
binarization = BooleanField('Binarization')
|
||||
pdf = FileField('File', validators=[FileRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
model = SelectField('Model', validators=[InputRequired()])
|
||||
|
||||
def validate_binarization(self, field):
|
||||
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
|
||||
@ -123,10 +126,10 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm):
|
||||
self.version.default = service_manifest['latest_version']
|
||||
|
||||
|
||||
class AddSpacyNLPPipelineJobForm(AddJobForm):
|
||||
class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
|
||||
encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
|
||||
txt = FileField('File', validators=[FileRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
model = SelectField('Model', validators=[InputRequired()])
|
||||
|
||||
def validate_encoding_detection(self, field):
|
||||
service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
|
||||
|
@ -1,3 +1,5 @@
|
||||
from flask import abort, current_app, flash, Markup, render_template, request
|
||||
from flask_login import current_user, login_required
|
||||
from app import db, hashids
|
||||
from app.models import (
|
||||
Job,
|
||||
@ -7,26 +9,13 @@ from app.models import (
|
||||
TRANSKRIBUS_HTR_MODELS,
|
||||
TranskribusHTRModel
|
||||
)
|
||||
from flask import (
|
||||
abort,
|
||||
current_app,
|
||||
flash,
|
||||
make_response,
|
||||
render_template,
|
||||
request,
|
||||
url_for
|
||||
)
|
||||
from flask_login import current_user, login_required
|
||||
from werkzeug.utils import secure_filename
|
||||
from . import bp
|
||||
from . import SERVICES
|
||||
from . import bp, SERVICES
|
||||
from .forms import (
|
||||
AddFileSetupPipelineJobForm,
|
||||
AddTesseractOCRPipelineJobForm,
|
||||
AddTranskribusHTRPipelineJobForm,
|
||||
AddSpacyNLPPipelineJobForm
|
||||
CreateFileSetupPipelineJobForm,
|
||||
CreateTesseractOCRPipelineJobForm,
|
||||
CreateTranskribusHTRPipelineJobForm,
|
||||
CreateSpacyNLPPipelineJobForm
|
||||
)
|
||||
import json
|
||||
|
||||
|
||||
@bp.route('/file-setup-pipeline', methods=['GET', 'POST'])
|
||||
@ -37,49 +26,32 @@ def file_setup_pipeline():
|
||||
version = request.args.get('version', service_manifest['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
form = AddFileSetupPipelineJobForm(prefix='add-job-form', version=version)
|
||||
form = CreateFileSetupPipelineJobForm(prefix='create-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
service_args = {}
|
||||
job = Job(
|
||||
user=current_user,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args=service_args,
|
||||
service_version=form.version.data,
|
||||
title=form.title.data
|
||||
)
|
||||
db.session.add(job)
|
||||
db.session.flush(objects=[job])
|
||||
db.session.refresh(job)
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
job.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
for image_file in form.images.data:
|
||||
job_input = JobInput(
|
||||
filename=secure_filename(image_file.filename),
|
||||
job=job,
|
||||
mimetype=image_file.mimetype
|
||||
job = Job.create(
|
||||
title=form.title.data,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args={},
|
||||
service_version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
db.session.add(job_input)
|
||||
db.session.flush(objects=[job_input])
|
||||
db.session.refresh(job_input)
|
||||
except OSError:
|
||||
abort(500)
|
||||
for input_file in form.images.data:
|
||||
try:
|
||||
image_file.save(job_input.path)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
JobInput.create(input_file, job=job)
|
||||
except OSError:
|
||||
abort(500)
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
flash(f'Job "{job.title}" added', 'job')
|
||||
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
||||
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
|
||||
flash(message, 'job')
|
||||
return {}, 201, {'Location': job.url}
|
||||
return render_template(
|
||||
'services/file_setup_pipeline.html.j2',
|
||||
form=form,
|
||||
@ -90,58 +62,41 @@ def file_setup_pipeline():
|
||||
@bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def tesseract_ocr_pipeline():
|
||||
service = 'tesseract-ocr-pipeline'
|
||||
service_manifest = SERVICES[service]
|
||||
service_name = 'tesseract-ocr-pipeline'
|
||||
service_manifest = SERVICES[service_name]
|
||||
version = request.args.get('version', service_manifest['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
form = AddTesseractOCRPipelineJobForm(prefix='add-job-form', version=version)
|
||||
form = CreateTesseractOCRPipelineJobForm(prefix='create-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
service_args = {}
|
||||
service_args['model'] = hashids.decode(form.model.data)
|
||||
if form.binarization.data:
|
||||
service_args['binarization'] = True
|
||||
job = Job(
|
||||
user=current_user,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args=service_args,
|
||||
service_version=form.version.data,
|
||||
title=form.title.data
|
||||
)
|
||||
db.session.add(job)
|
||||
db.session.flush(objects=[job])
|
||||
db.session.refresh(job)
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
job.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
job_input = JobInput(
|
||||
filename=secure_filename(form.pdf.data.filename),
|
||||
job=job,
|
||||
mimetype=form.pdf.data.mimetype
|
||||
)
|
||||
db.session.add(job_input)
|
||||
db.session.flush(objects=[job_input])
|
||||
db.session.refresh(job_input)
|
||||
job = Job.create(
|
||||
title=form.title.data,
|
||||
description=form.description.data,
|
||||
service=service_name,
|
||||
service_args={
|
||||
'binarization': form.binarization.data,
|
||||
'model': hashids.decode(form.model.data)
|
||||
},
|
||||
service_version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
try:
|
||||
form.pdf.data.save(job_input.path)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
JobInput.create(form.pdf.data, job=job)
|
||||
except OSError:
|
||||
abort(500)
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
flash(f'Job "{job.title}" added', 'job')
|
||||
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
||||
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
|
||||
flash(message, 'job')
|
||||
return {}, 201, {'Location': job.url}
|
||||
tesseract_ocr_models = [
|
||||
x for x in TesseractOCRModel.query.filter().all()
|
||||
x for x in TesseractOCRModel.query.all()
|
||||
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
|
||||
]
|
||||
return render_template(
|
||||
@ -162,57 +117,40 @@ def transkribus_htr_pipeline():
|
||||
version = request.args.get('version', service_manifest['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
form = AddTranskribusHTRPipelineJobForm(prefix='add-job-form', version=version)
|
||||
form = CreateTranskribusHTRPipelineJobForm(prefix='create-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
service_args = {}
|
||||
service_args['model'] = hashids.decode(form.model.data)
|
||||
if form.binarization.data:
|
||||
service_args['binarization'] = True
|
||||
job = Job(
|
||||
user=current_user,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args=service_args,
|
||||
service_version=form.version.data,
|
||||
title=form.title.data
|
||||
)
|
||||
db.session.add(job)
|
||||
db.session.flush(objects=[job])
|
||||
db.session.refresh(job)
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
job.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
job_input = JobInput(
|
||||
filename=secure_filename(form.pdf.data.filename),
|
||||
job=job,
|
||||
mimetype=form.pdf.data.mimetype
|
||||
)
|
||||
db.session.add(job_input)
|
||||
db.session.flush(objects=[job_input])
|
||||
db.session.refresh(job_input)
|
||||
job = Job.create(
|
||||
title=form.title.data,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args={
|
||||
'binarization': form.binarization.data,
|
||||
'model': hashids.decode(form.model.data)
|
||||
},
|
||||
service_version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
try:
|
||||
form.pdf.data.save(job_input.path)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
JobInput.create(form.pdf.data, job=job)
|
||||
except OSError:
|
||||
abort(500)
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
flash(f'Job "{job.title}" added', 'job')
|
||||
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
||||
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
|
||||
flash(message, 'job')
|
||||
return {}, 201, {'Location': job.url}
|
||||
transkribus_htr_models = [
|
||||
x for x in TranskribusHTRModel.query.filter().all()
|
||||
x for x in TranskribusHTRModel.query.all()
|
||||
if x.shared == True or x.user == current_user
|
||||
]
|
||||
return render_template(
|
||||
f'services/transkribus_htr_pipeline.html.j2',
|
||||
'services/transkribus_htr_pipeline.html.j2',
|
||||
form=form,
|
||||
title=service_manifest['name'],
|
||||
TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS,
|
||||
@ -228,51 +166,34 @@ def spacy_nlp_pipeline():
|
||||
version = request.args.get('version', SERVICES[service]['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
form = AddSpacyNLPPipelineJobForm(prefix='add-job-form', version=version)
|
||||
form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
service_args = {}
|
||||
service_args['model'] = form.model.data
|
||||
if form.encoding_detection.data:
|
||||
service_args['encoding_detection'] = True
|
||||
job = Job(
|
||||
user=current_user,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args=service_args,
|
||||
service_version=form.version.data,
|
||||
title=form.title.data
|
||||
)
|
||||
db.session.add(job)
|
||||
db.session.flush(objects=[job])
|
||||
db.session.refresh(job)
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
job.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
job_input = JobInput(
|
||||
filename=secure_filename(form.txt.data.filename),
|
||||
job=job,
|
||||
mimetype=form.txt.data.mimetype
|
||||
)
|
||||
db.session.add(job_input)
|
||||
db.session.flush(objects=[job_input])
|
||||
db.session.refresh(job_input)
|
||||
job = Job.create(
|
||||
title=form.title.data,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args={
|
||||
'encoding_detection': form.encoding_detection.data,
|
||||
'model': form.model.data
|
||||
},
|
||||
service_version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
try:
|
||||
form.txt.data.save(job_input.path)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
JobInput.create(form.txt.data, job=job)
|
||||
except OSError:
|
||||
abort(500)
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
flash(f'Job "{job.title}" added', 'job')
|
||||
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
||||
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
|
||||
flash(message, 'job')
|
||||
return {}, 201, {'Location': job.url}
|
||||
return render_template(
|
||||
'services/spacy_nlp_pipeline.html.j2',
|
||||
form=form,
|
||||
|
@ -17,6 +17,11 @@ tesseract-ocr-pipeline:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.0'
|
||||
0.1.1:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
|
||||
transkribus-htr-pipeline:
|
||||
name: 'Transkribus HTR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
@ -47,4 +52,4 @@ spacy-nlp-pipeline:
|
||||
ru: 'Russian'
|
||||
zh: 'Chinese'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0'
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0'
|
||||
|
Reference in New Issue
Block a user