mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-13 11:40:35 +00:00
139 lines
6.0 KiB
Python
139 lines
6.0 KiB
Python
from app.models import TesseractOCRModel
|
||
from flask_login import current_user
|
||
from flask_wtf import FlaskForm
|
||
from flask_wtf.file import FileField, FileRequired
|
||
from wtforms import (
|
||
BooleanField,
|
||
MultipleFileField,
|
||
SelectField,
|
||
StringField,
|
||
SubmitField,
|
||
ValidationError
|
||
)
|
||
from wtforms.validators import DataRequired, InputRequired, Length
|
||
from . import SERVICES
|
||
|
||
|
||
class AddJobForm(FlaskForm):
|
||
description = StringField('Description', validators=[InputRequired(), Length(min=1, max=255)])
|
||
title = StringField('Title', validators=[InputRequired(), Length(min=1, max=32)])
|
||
version = SelectField('Version', validators=[DataRequired()])
|
||
submit = SubmitField()
|
||
|
||
|
||
class AddFileSetupPipelineJobForm(AddJobForm):
|
||
images = MultipleFileField('File(s)', validators=[DataRequired()])
|
||
|
||
def validate_images(form, field):
|
||
valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
|
||
for image in field.data:
|
||
if image.mimetype not in valid_mimetypes:
|
||
raise ValidationError('JPEG, PNG and TIFF files only!')
|
||
|
||
def __init__(self, *args, **kwargs):
|
||
service_manifest = SERVICES['file-setup-pipeline']
|
||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||
super().__init__(*args, **kwargs)
|
||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||
self.version.data = version
|
||
self.version.default = service_manifest['latest_version']
|
||
|
||
|
||
class AddTesseractOCRPipelineJobForm(AddJobForm):
|
||
binarization = BooleanField('Binarization')
|
||
pdf = FileField('File', validators=[FileRequired()])
|
||
model = SelectField('Model', validators=[DataRequired()])
|
||
|
||
def validate_binarization(self, field):
|
||
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
|
||
if field.data and 'binarization' not in service_info['methods']:
|
||
raise ValidationError('Binarization is not available')
|
||
|
||
def validate_pdf(self, field):
|
||
if field.data.mimetype != 'application/pdf':
|
||
raise ValidationError('PDF files only!')
|
||
|
||
def __init__(self, *args, **kwargs):
|
||
service_manifest = SERVICES['tesseract-ocr-pipeline']
|
||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||
super().__init__(*args, **kwargs)
|
||
service_info = service_manifest['versions'][version]
|
||
if 'binarization' not in service_info['methods']:
|
||
self.binarization.render_kw = {'disabled': True}
|
||
compatible_models = [
|
||
x for x in TesseractOCRModel.query.filter_by(shared=True).all()
|
||
if version in x.compatible_service_versions
|
||
]
|
||
compatible_models += [
|
||
x for x in TesseractOCRModel.query.filter_by(shared=False, user=current_user).all()
|
||
if version in x.compatible_service_versions
|
||
]
|
||
self.model.choices = [('', 'Choose your option')]
|
||
self.model.choices += [(x.hashid, x.title) for x in compatible_models]
|
||
self.model.default = ''
|
||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||
self.version.data = version
|
||
self.version.default = service_manifest['latest_version']
|
||
|
||
|
||
class AddTranskribusHTRPipelineJobForm(AddJobForm):
|
||
binarization = BooleanField('Binarization')
|
||
pdf = FileField('File', validators=[FileRequired()])
|
||
model = SelectField('Model', validators=[DataRequired()])
|
||
|
||
def validate_binarization(self, field):
|
||
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
|
||
if field.data and 'binarization' not in service_info['methods']:
|
||
raise ValidationError('Binarization is not available')
|
||
|
||
def validate_pdf(self, field):
|
||
if field.data.mimetype != 'application/pdf':
|
||
raise ValidationError('PDF files only!')
|
||
|
||
def __init__(self, *args, **kwargs):
|
||
service_manifest = SERVICES['transkribus-htr-pipeline']
|
||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||
super().__init__(*args, **kwargs)
|
||
service_info = service_manifest['versions'][version]
|
||
if 'binarization' not in service_info['methods']:
|
||
self.binarization.render_kw = {'disabled': True}
|
||
self.model.choices = [('', 'Choose your option')]
|
||
self.model.choices += [
|
||
('37569', 'Tim Model'),
|
||
('29539', 'UCL–University of Toronto #7')
|
||
]
|
||
self.model.default = ''
|
||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||
self.version.data = version
|
||
self.version.default = service_manifest['latest_version']
|
||
|
||
|
||
class AddSpacyNLPPipelineJobForm(AddJobForm):
|
||
encoding_detection = BooleanField('Encoding detection')
|
||
txt = FileField('File', validators=[FileRequired()])
|
||
model = SelectField('Model', validators=[DataRequired()])
|
||
|
||
def validate_encoding_detection(self, field):
|
||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||
service_info = service_manifest['versions'][self.version.data]
|
||
if field.data and 'encoding_detection' not in service_info['methods']:
|
||
raise ValidationError('Encoding detection is not available!')
|
||
|
||
def validate_txt(form, field):
|
||
if field.data.mimetype != 'text/plain':
|
||
raise ValidationError('Plain text files only!')
|
||
|
||
def __init__(self, *args, **kwargs):
|
||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||
super().__init__(*args, **kwargs)
|
||
service_info = service_manifest['versions'][version]
|
||
if 'encoding_detection' not in service_info['methods']:
|
||
self.encoding_detection.render_kw = {'disabled': True}
|
||
self.model.choices = [('', 'Choose your option')]
|
||
self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa
|
||
self.model.default = ''
|
||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||
self.version.data = version
|
||
self.version.default = version
|