nopaque/app/services/forms.py

165 lines
7.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from app.models import TesseractOCRModel
from flask_login import current_user
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired
from wtforms import (
BooleanField,
MultipleFileField,
SelectField,
StringField,
SubmitField,
ValidationError
)
from wtforms.validators import DataRequired, InputRequired, Length
from . import SERVICES
class AddJobForm(FlaskForm):
description = StringField('Description', validators=[InputRequired(), Length(min=1, max=255)])
title = StringField('Title', validators=[InputRequired(), Length(min=1, max=32)])
version = SelectField('Version', validators=[DataRequired()])
submit = SubmitField()
class AddFileSetupPipelineJobForm(AddJobForm):
images = MultipleFileField('File(s)', validators=[DataRequired()])
def validate_images(form, field):
valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
for image in field.data:
if image.mimetype not in valid_mimetypes:
raise ValidationError('JPEG, PNG and TIFF files only!')
def __init__(self, *args, **kwargs):
service_manifest = SERVICES['file-setup-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
class AddTesseractOCRPipelineJobForm(AddJobForm):
binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[DataRequired()])
def validate_binarization(self, field):
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
if field.data:
if(
'methods' not in service_info
or 'binarization' not in service_info['methods']
):
raise ValidationError('Binarization is not available')
def validate_pdf(self, field):
if field.data.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
def __init__(self, *args, **kwargs):
service_manifest = SERVICES['tesseract-ocr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if self.binarization.render_kw is None:
self.binarization.render_kw = {}
self.binarization.render_kw['disabled'] = True
if 'methods' in service_info:
if 'binarization' in service_info['methods']:
if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled']
compatible_models = [
x for x in TesseractOCRModel.query.filter_by(shared=True).all()
if version in x.compatible_service_versions
]
compatible_models += [
x for x in TesseractOCRModel.query.filter_by(shared=False, user=current_user).all()
if version in x.compatible_service_versions
]
self.model.choices = [('', 'Choose your option')]
self.model.choices += [(x.hashid, x.title) for x in compatible_models]
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
class AddTranskribusHTRPipelineJobForm(AddJobForm):
binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[DataRequired()])
def validate_binarization(self, field):
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
if field.data:
if(
'methods' not in service_info
or 'binarization' not in service_info['methods']
):
raise ValidationError('Binarization is not available')
def validate_pdf(self, field):
if field.data.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
def __init__(self, *args, **kwargs):
service_manifest = SERVICES['transkribus-htr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if self.binarization.render_kw is None:
self.binarization.render_kw = {}
self.binarization.render_kw['disabled'] = True
if 'methods' in service_info:
if 'binarization' in service_info['methods']:
if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled']
self.model.choices = [('', 'Choose your option')]
self.model.choices += [
('37569', 'Tim Model'),
('29539', 'UCLUniversity of Toronto #7')
]
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
class AddSpacyNLPPipelineJobForm(AddJobForm):
encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
txt = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[DataRequired()])
def validate_encoding_detection(self, field):
service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
if field.data:
if(
'methods' not in service_info
or 'encoding_detection' not in service_info['methods']
):
raise ValidationError('Encoding detection is not available')
def validate_txt(form, field):
if field.data.mimetype != 'text/plain':
raise ValidationError('Plain text files only!')
def __init__(self, *args, **kwargs):
service_manifest = SERVICES['spacy-nlp-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if self.encoding_detection.render_kw is None:
self.encoding_detection.render_kw = {}
self.encoding_detection.render_kw['disabled'] = True
if 'methods' in service_info:
if 'encoding_detection' in service_info['methods']:
if 'disabled' in self.encoding_detection.render_kw:
del self.encoding_detection.render_kw['disabled']
self.model.choices = [('', 'Choose your option')]
self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = version