2022-04-04 11:31:09 +00:00
|
|
|
from flask_login import current_user
|
2021-02-19 12:00:52 +00:00
|
|
|
from flask_wtf import FlaskForm
|
2022-04-12 14:11:24 +00:00
|
|
|
from flask_wtf.file import FileField, FileRequired
|
2022-02-08 11:26:20 +00:00
|
|
|
from wtforms import (
|
|
|
|
BooleanField,
|
|
|
|
MultipleFileField,
|
|
|
|
SelectField,
|
|
|
|
StringField,
|
|
|
|
SubmitField,
|
|
|
|
ValidationError
|
|
|
|
)
|
2022-09-02 11:07:30 +00:00
|
|
|
from wtforms.validators import InputRequired, Length
|
2022-10-12 13:10:55 +00:00
|
|
|
from app.models import TesseractOCRPipelineModel
|
2021-02-19 12:00:52 +00:00
|
|
|
from . import SERVICES
|
|
|
|
|
|
|
|
|
2022-09-02 11:07:30 +00:00
|
|
|
class CreateJobBaseForm(FlaskForm):
|
|
|
|
description = StringField(
|
|
|
|
'Description',
|
|
|
|
validators=[InputRequired(), Length(max=255)]
|
|
|
|
)
|
|
|
|
title = StringField(
|
|
|
|
'Title',
|
|
|
|
validators=[InputRequired(), Length(max=32)]
|
|
|
|
)
|
|
|
|
version = SelectField('Version', validators=[InputRequired()])
|
2022-04-12 14:11:24 +00:00
|
|
|
submit = SubmitField()
|
2022-02-03 11:39:16 +00:00
|
|
|
|
2021-02-19 12:00:52 +00:00
|
|
|
|
2022-09-02 11:07:30 +00:00
|
|
|
class CreateFileSetupPipelineJobForm(CreateJobBaseForm):
|
|
|
|
images = MultipleFileField('File(s)', validators=[InputRequired()])
|
2022-04-04 11:31:09 +00:00
|
|
|
|
|
|
|
def validate_images(form, field):
|
|
|
|
valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
|
|
|
|
for image in field.data:
|
|
|
|
if image.mimetype not in valid_mimetypes:
|
|
|
|
raise ValidationError('JPEG, PNG and TIFF files only!')
|
2021-02-19 12:00:52 +00:00
|
|
|
|
2021-03-26 12:10:42 +00:00
|
|
|
def __init__(self, *args, **kwargs):
|
2022-04-04 11:31:09 +00:00
|
|
|
service_manifest = SERVICES['file-setup-pipeline']
|
|
|
|
version = kwargs.pop('version', service_manifest['latest_version'])
|
2021-03-26 12:10:42 +00:00
|
|
|
super().__init__(*args, **kwargs)
|
2022-04-04 11:31:09 +00:00
|
|
|
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
|
|
|
self.version.data = version
|
|
|
|
self.version.default = service_manifest['latest_version']
|
2021-03-26 12:10:42 +00:00
|
|
|
|
2021-02-19 12:00:52 +00:00
|
|
|
|
2022-09-02 11:07:30 +00:00
|
|
|
class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
|
2022-02-03 11:39:16 +00:00
|
|
|
binarization = BooleanField('Binarization')
|
2022-04-04 11:31:09 +00:00
|
|
|
pdf = FileField('File', validators=[FileRequired()])
|
2022-09-02 11:07:30 +00:00
|
|
|
model = SelectField('Model', validators=[InputRequired()])
|
2021-03-26 12:10:42 +00:00
|
|
|
|
|
|
|
def validate_binarization(self, field):
|
2022-04-04 11:31:09 +00:00
|
|
|
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
|
2022-04-13 14:39:51 +00:00
|
|
|
if field.data:
|
2022-09-02 11:07:30 +00:00
|
|
|
if not('methods' in service_info and 'binarization' in service_info['methods']):
|
2022-04-13 14:39:51 +00:00
|
|
|
raise ValidationError('Binarization is not available')
|
2021-02-19 12:00:52 +00:00
|
|
|
|
2022-04-04 11:31:09 +00:00
|
|
|
def validate_pdf(self, field):
|
|
|
|
if field.data.mimetype != 'application/pdf':
|
|
|
|
raise ValidationError('PDF files only!')
|
2021-02-19 12:00:52 +00:00
|
|
|
|
2021-03-26 12:10:42 +00:00
|
|
|
def __init__(self, *args, **kwargs):
|
2022-04-04 11:31:09 +00:00
|
|
|
service_manifest = SERVICES['tesseract-ocr-pipeline']
|
|
|
|
version = kwargs.pop('version', service_manifest['latest_version'])
|
2021-03-26 12:10:42 +00:00
|
|
|
super().__init__(*args, **kwargs)
|
2022-04-04 11:31:09 +00:00
|
|
|
service_info = service_manifest['versions'][version]
|
2022-04-13 14:39:51 +00:00
|
|
|
if self.binarization.render_kw is None:
|
|
|
|
self.binarization.render_kw = {}
|
|
|
|
self.binarization.render_kw['disabled'] = True
|
|
|
|
if 'methods' in service_info:
|
|
|
|
if 'binarization' in service_info['methods']:
|
|
|
|
if 'disabled' in self.binarization.render_kw:
|
|
|
|
del self.binarization.render_kw['disabled']
|
2022-05-17 14:16:31 +00:00
|
|
|
models = [
|
2022-10-12 08:23:05 +00:00
|
|
|
x for x in TesseractOCRPipelineModel.query.filter().all()
|
2022-05-17 14:16:31 +00:00
|
|
|
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
|
2022-04-04 11:31:09 +00:00
|
|
|
]
|
|
|
|
self.model.choices = [('', 'Choose your option')]
|
2022-05-17 14:16:31 +00:00
|
|
|
self.model.choices += [(x.hashid, x.title) for x in models]
|
2022-04-04 11:31:09 +00:00
|
|
|
self.model.default = ''
|
|
|
|
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
2022-02-03 11:39:16 +00:00
|
|
|
self.version.data = version
|
2022-04-04 11:31:09 +00:00
|
|
|
self.version.default = service_manifest['latest_version']
|
2021-02-19 12:00:52 +00:00
|
|
|
|
|
|
|
|
2022-09-02 11:07:30 +00:00
|
|
|
class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
|
2022-04-04 11:31:09 +00:00
|
|
|
binarization = BooleanField('Binarization')
|
|
|
|
pdf = FileField('File', validators=[FileRequired()])
|
2022-09-02 11:07:30 +00:00
|
|
|
model = SelectField('Model', validators=[InputRequired()])
|
2021-02-19 12:00:52 +00:00
|
|
|
|
2022-04-04 11:31:09 +00:00
|
|
|
def validate_binarization(self, field):
|
|
|
|
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
|
2022-04-13 14:39:51 +00:00
|
|
|
if field.data:
|
|
|
|
if(
|
|
|
|
'methods' not in service_info
|
|
|
|
or 'binarization' not in service_info['methods']
|
|
|
|
):
|
|
|
|
raise ValidationError('Binarization is not available')
|
2022-04-04 11:31:09 +00:00
|
|
|
|
|
|
|
def validate_pdf(self, field):
|
|
|
|
if field.data.mimetype != 'application/pdf':
|
|
|
|
raise ValidationError('PDF files only!')
|
2021-03-26 12:10:42 +00:00
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
2022-10-12 13:10:55 +00:00
|
|
|
transkribus_htr_pipeline_models = kwargs.pop('transkribus_htr_pipeline_models', [])
|
2022-04-04 11:31:09 +00:00
|
|
|
service_manifest = SERVICES['transkribus-htr-pipeline']
|
|
|
|
version = kwargs.pop('version', service_manifest['latest_version'])
|
2021-03-26 12:10:42 +00:00
|
|
|
super().__init__(*args, **kwargs)
|
2022-04-04 11:31:09 +00:00
|
|
|
service_info = service_manifest['versions'][version]
|
2022-04-13 14:39:51 +00:00
|
|
|
if self.binarization.render_kw is None:
|
|
|
|
self.binarization.render_kw = {}
|
|
|
|
self.binarization.render_kw['disabled'] = True
|
|
|
|
if 'methods' in service_info:
|
|
|
|
if 'binarization' in service_info['methods']:
|
|
|
|
if 'disabled' in self.binarization.render_kw:
|
|
|
|
del self.binarization.render_kw['disabled']
|
2022-04-04 11:31:09 +00:00
|
|
|
self.model.choices = [('', 'Choose your option')]
|
2022-10-12 13:10:55 +00:00
|
|
|
self.model.choices += [(x['modelId'], x['name']) for x in transkribus_htr_pipeline_models]
|
2022-04-04 11:31:09 +00:00
|
|
|
self.model.default = ''
|
|
|
|
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
2022-02-03 11:39:16 +00:00
|
|
|
self.version.data = version
|
2022-04-04 11:31:09 +00:00
|
|
|
self.version.default = service_manifest['latest_version']
|
|
|
|
|
|
|
|
|
2022-09-02 11:07:30 +00:00
|
|
|
class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
|
2022-04-13 14:39:51 +00:00
|
|
|
encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
|
2022-04-04 11:31:09 +00:00
|
|
|
txt = FileField('File', validators=[FileRequired()])
|
2022-09-02 11:07:30 +00:00
|
|
|
model = SelectField('Model', validators=[InputRequired()])
|
2022-04-04 11:31:09 +00:00
|
|
|
|
|
|
|
def validate_encoding_detection(self, field):
|
2022-04-13 14:39:51 +00:00
|
|
|
service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
|
|
|
|
if field.data:
|
|
|
|
if(
|
|
|
|
'methods' not in service_info
|
|
|
|
or 'encoding_detection' not in service_info['methods']
|
|
|
|
):
|
|
|
|
raise ValidationError('Encoding detection is not available')
|
2021-08-04 10:26:49 +00:00
|
|
|
|
2022-04-04 11:31:09 +00:00
|
|
|
def validate_txt(form, field):
|
|
|
|
if field.data.mimetype != 'text/plain':
|
|
|
|
raise ValidationError('Plain text files only!')
|
2021-08-04 10:26:49 +00:00
|
|
|
|
2022-04-04 11:31:09 +00:00
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
service_manifest = SERVICES['spacy-nlp-pipeline']
|
|
|
|
version = kwargs.pop('version', service_manifest['latest_version'])
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
service_info = service_manifest['versions'][version]
|
2022-04-13 14:39:51 +00:00
|
|
|
if self.encoding_detection.render_kw is None:
|
|
|
|
self.encoding_detection.render_kw = {}
|
|
|
|
self.encoding_detection.render_kw['disabled'] = True
|
|
|
|
if 'methods' in service_info:
|
|
|
|
if 'encoding_detection' in service_info['methods']:
|
|
|
|
if 'disabled' in self.encoding_detection.render_kw:
|
|
|
|
del self.encoding_detection.render_kw['disabled']
|
2022-04-04 11:31:09 +00:00
|
|
|
self.model.choices = [('', 'Choose your option')]
|
|
|
|
self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa
|
|
|
|
self.model.default = ''
|
|
|
|
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
|
|
|
self.version.data = version
|
|
|
|
self.version.default = version
|