from flask_login import current_user from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired from wtforms import ( BooleanField, MultipleFileField, SelectField, StringField, SubmitField, ValidationError ) from wtforms.validators import InputRequired, Length from app.models import ( TRANSKRIBUS_HTR_MODELS, TesseractOCRModel, TranskribusHTRModel ) from . import SERVICES class CreateJobBaseForm(FlaskForm): description = StringField( 'Description', validators=[InputRequired(), Length(max=255)] ) title = StringField( 'Title', validators=[InputRequired(), Length(max=32)] ) version = SelectField('Version', validators=[InputRequired()]) submit = SubmitField() class CreateFileSetupPipelineJobForm(CreateJobBaseForm): images = MultipleFileField('File(s)', validators=[InputRequired()]) def validate_images(form, field): valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff'] for image in field.data: if image.mimetype not in valid_mimetypes: raise ValidationError('JPEG, PNG and TIFF files only!') def __init__(self, *args, **kwargs): service_manifest = SERVICES['file-setup-pipeline'] version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version self.version.default = service_manifest['latest_version'] class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): binarization = BooleanField('Binarization') pdf = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[InputRequired()]) def validate_binarization(self, field): service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data] if field.data: if not('methods' in service_info and 'binarization' in service_info['methods']): raise ValidationError('Binarization is not available') def validate_pdf(self, field): if field.data.mimetype != 'application/pdf': raise ValidationError('PDF files only!') def __init__(self, *args, **kwargs): service_manifest = SERVICES['tesseract-ocr-pipeline'] version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] if self.binarization.render_kw is None: self.binarization.render_kw = {} self.binarization.render_kw['disabled'] = True if 'methods' in service_info: if 'binarization' in service_info['methods']: if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] models = [ x for x in TesseractOCRModel.query.filter().all() if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) ] self.model.choices = [('', 'Choose your option')] self.model.choices += [(x.hashid, x.title) for x in models] self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version self.version.default = service_manifest['latest_version'] class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm): binarization = BooleanField('Binarization') pdf = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[InputRequired()]) def validate_binarization(self, field): service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data] if field.data: if( 'methods' not in service_info or 'binarization' not in service_info['methods'] ): raise ValidationError('Binarization is not available') def validate_pdf(self, field): if field.data.mimetype != 'application/pdf': raise ValidationError('PDF files only!') def __init__(self, *args, **kwargs): service_manifest = SERVICES['transkribus-htr-pipeline'] version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] if self.binarization.render_kw is None: self.binarization.render_kw = {} self.binarization.render_kw['disabled'] = True if 'methods' in service_info: if 'binarization' in service_info['methods']: if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] models = [ x for x in TranskribusHTRModel.query.filter().all() if x.shared == True or x.user == current_user ] self.model.choices = [('', 'Choose your option')] self.model.choices += [(x.hashid, [y['name'] for y in TRANSKRIBUS_HTR_MODELS if y['modelId'] == x.transkribus_model_id ][0]) for x in models] self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version self.version.default = service_manifest['latest_version'] class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm): encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True}) txt = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[InputRequired()]) def validate_encoding_detection(self, field): service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data] if field.data: if( 'methods' not in service_info or 'encoding_detection' not in service_info['methods'] ): raise ValidationError('Encoding detection is not available') def validate_txt(form, field): if field.data.mimetype != 'text/plain': raise ValidationError('Plain text files only!') def __init__(self, *args, **kwargs): service_manifest = SERVICES['spacy-nlp-pipeline'] version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] if self.encoding_detection.render_kw is None: self.encoding_detection.render_kw = {} self.encoding_detection.render_kw['disabled'] = True if 'methods' in service_info: if 'encoding_detection' in service_info['methods']: if 'disabled' in self.encoding_detection.render_kw: del self.encoding_detection.render_kw['disabled'] self.model.choices = [('', 'Choose your option')] self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version self.version.default = version