nopaque/app/services/forms.py
2022-04-12 16:11:24 +02:00

139 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from app.models import TesseractOCRModel
from flask_login import current_user
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired
from wtforms import (
BooleanField,
MultipleFileField,
SelectField,
StringField,
SubmitField,
ValidationError
)
from wtforms.validators import DataRequired, InputRequired, Length
from . import SERVICES
class AddJobForm(FlaskForm):
description = StringField('Description', validators=[InputRequired(), Length(min=1, max=255)])
title = StringField('Title', validators=[InputRequired(), Length(min=1, max=32)])
version = SelectField('Version', validators=[DataRequired()])
submit = SubmitField()
class AddFileSetupPipelineJobForm(AddJobForm):
images = MultipleFileField('File(s)', validators=[DataRequired()])
def validate_images(form, field):
valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
for image in field.data:
if image.mimetype not in valid_mimetypes:
raise ValidationError('JPEG, PNG and TIFF files only!')
def __init__(self, *args, **kwargs):
service_manifest = SERVICES['file-setup-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
class AddTesseractOCRPipelineJobForm(AddJobForm):
binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[DataRequired()])
def validate_binarization(self, field):
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
if field.data and 'binarization' not in service_info['methods']:
raise ValidationError('Binarization is not available')
def validate_pdf(self, field):
if field.data.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
def __init__(self, *args, **kwargs):
service_manifest = SERVICES['tesseract-ocr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if 'binarization' not in service_info['methods']:
self.binarization.render_kw = {'disabled': True}
compatible_models = [
x for x in TesseractOCRModel.query.filter_by(shared=True).all()
if version in x.compatible_service_versions
]
compatible_models += [
x for x in TesseractOCRModel.query.filter_by(shared=False, user=current_user).all()
if version in x.compatible_service_versions
]
self.model.choices = [('', 'Choose your option')]
self.model.choices += [(x.hashid, x.title) for x in compatible_models]
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
class AddTranskribusHTRPipelineJobForm(AddJobForm):
binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[DataRequired()])
def validate_binarization(self, field):
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
if field.data and 'binarization' not in service_info['methods']:
raise ValidationError('Binarization is not available')
def validate_pdf(self, field):
if field.data.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
def __init__(self, *args, **kwargs):
service_manifest = SERVICES['transkribus-htr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if 'binarization' not in service_info['methods']:
self.binarization.render_kw = {'disabled': True}
self.model.choices = [('', 'Choose your option')]
self.model.choices += [
('37569', 'Tim Model'),
('29539', 'UCLUniversity of Toronto #7')
]
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
class AddSpacyNLPPipelineJobForm(AddJobForm):
encoding_detection = BooleanField('Encoding detection')
txt = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[DataRequired()])
def validate_encoding_detection(self, field):
service_manifest = SERVICES['spacy-nlp-pipeline']
service_info = service_manifest['versions'][self.version.data]
if field.data and 'encoding_detection' not in service_info['methods']:
raise ValidationError('Encoding detection is not available!')
def validate_txt(form, field):
if field.data.mimetype != 'text/plain':
raise ValidationError('Plain text files only!')
def __init__(self, *args, **kwargs):
service_manifest = SERVICES['spacy-nlp-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if 'encoding_detection' not in service_info['methods']:
self.encoding_detection.render_kw = {'disabled': True}
self.model.choices = [('', 'Choose your option')]
self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = version