192 lines
8.6 KiB
Python
Raw Permalink Normal View History

2023-03-31 09:14:21 +02:00
from flask_wtf import FlaskForm
from flask_login import current_user
2022-04-12 16:11:24 +02:00
from flask_wtf.file import FileField, FileRequired
from wtforms import (
BooleanField,
DecimalRangeField,
MultipleFileField,
SelectField,
StringField,
SubmitField,
ValidationError
)
2022-09-02 13:07:30 +02:00
from wtforms.validators import InputRequired, Length
2022-11-14 12:25:26 +01:00
from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel
2021-02-19 13:00:52 +01:00
from . import SERVICES
2023-03-31 09:14:21 +02:00
class CreateJobBaseForm(FlaskForm):
2022-09-02 13:07:30 +02:00
description = StringField(
'Description',
validators=[InputRequired(), Length(max=255)]
)
title = StringField(
'Title',
validators=[InputRequired(), Length(max=32)]
)
version = SelectField('Version', validators=[InputRequired()])
2022-04-12 16:11:24 +02:00
submit = SubmitField()
2021-02-19 13:00:52 +01:00
2022-09-02 13:07:30 +02:00
class CreateFileSetupPipelineJobForm(CreateJobBaseForm):
images = MultipleFileField('File(s)', validators=[InputRequired()])
def validate_images(form, field):
valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
for image in field.data:
if image.mimetype not in valid_mimetypes:
raise ValidationError('JPEG, PNG and TIFF files only!')
2021-02-19 13:00:52 +01:00
def __init__(self, *args, **kwargs):
2023-03-31 09:14:21 +02:00
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-file-setup-pipeline-job-form'
service_manifest = SERVICES['file-setup-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
2021-02-19 13:00:52 +01:00
2022-09-02 13:07:30 +02:00
class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()])
2022-09-02 13:07:30 +02:00
model = SelectField('Model', validators=[InputRequired()])
2022-11-10 16:19:58 +01:00
ocropus_nlbin_threshold = DecimalRangeField(
2022-11-14 12:25:26 +01:00
render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5], 'disabled': True}
2022-11-10 12:14:03 +01:00
)
def validate_binarization(self, field):
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
if field.data:
2022-09-02 13:07:30 +02:00
if not('methods' in service_info and 'binarization' in service_info['methods']):
raise ValidationError('Binarization is not available')
2024-11-07 08:32:20 +01:00
def validate_pdf(self, field):
if field.data.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
2021-02-19 13:00:52 +01:00
def __init__(self, *args, **kwargs):
2023-03-31 09:14:21 +02:00
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-tesseract-ocr-pipeline-job-form'
service_manifest = SERVICES['tesseract-ocr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if self.binarization.render_kw is None:
self.binarization.render_kw = {}
self.binarization.render_kw['disabled'] = True
2022-11-14 12:25:26 +01:00
if self.ocropus_nlbin_threshold.render_kw is None:
self.ocropus_nlbin_threshold.render_kw = {}
self.ocropus_nlbin_threshold.render_kw['disabled'] = True
if 'methods' in service_info:
if 'binarization' in service_info['methods']:
2022-11-16 16:05:34 +01:00
del self.binarization.render_kw['disabled']
2022-12-13 15:01:04 +01:00
if 'ocropus_nlbin_threshold' in service_info['methods']:
del self.ocropus_nlbin_threshold.render_kw['disabled']
user_models = [
x for x in current_user.tesseract_ocr_pipeline_models.order_by(TesseractOCRPipelineModel.title).all()
]
2022-05-17 16:16:31 +02:00
models = [
x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all()
2022-12-13 15:01:04 +01:00
if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
]
self.model.choices = {
'': [('', 'Choose your option')],
'Your models': [(x.hashid, f'{x.title} [{x.version}]') for x in user_models] if user_models else [(0, 'Nothing here yet...')],
'Public models': [(x.hashid, f'{x.title} [{x.version}]') for x in models]
}
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
2021-02-19 13:00:52 +01:00
2022-09-02 13:07:30 +02:00
class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()])
2022-09-02 13:07:30 +02:00
model = SelectField('Model', validators=[InputRequired()])
2021-02-19 13:00:52 +01:00
def validate_binarization(self, field):
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
if field.data:
if(
'methods' not in service_info
or 'binarization' not in service_info['methods']
):
raise ValidationError('Binarization is not available')
def validate_pdf(self, field):
if field.data.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
def __init__(self, *args, **kwargs):
2023-03-31 09:14:21 +02:00
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-transkribus-htr-pipeline-job-form'
transkribus_htr_pipeline_models = kwargs.pop('transkribus_htr_pipeline_models', [])
service_manifest = SERVICES['transkribus-htr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if self.binarization.render_kw is None:
self.binarization.render_kw = {}
self.binarization.render_kw['disabled'] = True
if 'methods' in service_info:
if 'binarization' in service_info['methods']:
2022-11-16 16:05:34 +01:00
del self.binarization.render_kw['disabled']
self.model.choices = [('', 'Choose your option')]
self.model.choices += [(x['modelId'], x['name']) for x in transkribus_htr_pipeline_models]
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
2022-09-02 13:07:30 +02:00
class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
txt = FileField('File', validators=[FileRequired()])
2022-09-02 13:07:30 +02:00
model = SelectField('Model', validators=[InputRequired()])
2024-11-07 08:32:20 +01:00
def validate_encoding_detection(self, field):
service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
if field.data:
if(
'methods' not in service_info
or 'encoding_detection' not in service_info['methods']
):
raise ValidationError('Encoding detection is not available')
def validate_txt(form, field):
if field.data.mimetype != 'text/plain':
raise ValidationError('Plain text files only!')
def __init__(self, *args, **kwargs):
2023-03-31 09:14:21 +02:00
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-spacy-nlp-pipeline-job-form'
service_manifest = SERVICES['spacy-nlp-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if self.encoding_detection.render_kw is None:
self.encoding_detection.render_kw = {}
self.encoding_detection.render_kw['disabled'] = True
if 'methods' in service_info:
if 'encoding_detection' in service_info['methods']:
2022-11-16 16:05:34 +01:00
del self.encoding_detection.render_kw['disabled']
user_models = [
x for x in current_user.spacy_nlp_pipeline_models.order_by(SpaCyNLPPipelineModel.title).all()
]
models = [
x for x in SpaCyNLPPipelineModel.query.filter(SpaCyNLPPipelineModel.user != current_user, SpaCyNLPPipelineModel.is_public == True).order_by(SpaCyNLPPipelineModel.title).all()
if version in x.compatible_service_versions
]
self.model.choices = {
'': [('', 'Choose your option')],
'Your models': [(x.hashid, f'{x.title} [{x.version}]') for x in user_models] if user_models else [(0, 'Nothing here yet...')],
'Public models': [(x.hashid, f'{x.title} [{x.version}]') for x in models]
}
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = version