from app.models import TesseractOCRModel from flask_login import current_user from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired from wtforms import ( BooleanField, MultipleFileField, SelectField, StringField, SubmitField, ValidationError ) from wtforms.validators import DataRequired, InputRequired, Length from . import SERVICES class AddJobForm(FlaskForm): description = StringField('Description', validators=[InputRequired(), Length(1, 255)]) title = StringField('Title', validators=[InputRequired(), Length(1, 32)]) version = SelectField('Version', validators=[DataRequired()]) submit = SubmitField() class AddFileSetupPipelineJobForm(AddJobForm): images = MultipleFileField('File(s)', validators=[DataRequired()]) def validate_images(form, field): valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff'] for image in field.data: if image.mimetype not in valid_mimetypes: raise ValidationError('JPEG, PNG and TIFF files only!') def __init__(self, *args, **kwargs): service_manifest = SERVICES['file-setup-pipeline'] version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version self.version.default = service_manifest['latest_version'] class AddTesseractOCRPipelineJobForm(AddJobForm): binarization = BooleanField('Binarization') pdf = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[DataRequired()]) def validate_binarization(self, field): service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data] if field.data: if( 'methods' not in service_info or 'binarization' not in service_info['methods'] ): raise ValidationError('Binarization is not available') def validate_pdf(self, field): if field.data.mimetype != 'application/pdf': raise ValidationError('PDF files only!') def __init__(self, *args, **kwargs): service_manifest = SERVICES['tesseract-ocr-pipeline'] version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] if self.binarization.render_kw is None: self.binarization.render_kw = {} self.binarization.render_kw['disabled'] = True if 'methods' in service_info: if 'binarization' in service_info['methods']: if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] compatible_models = [ x for x in TesseractOCRModel.query.filter_by(shared=True).all() if version in x.compatible_service_versions ] compatible_models += [ x for x in TesseractOCRModel.query.filter_by(shared=False, user=current_user).all() if version in x.compatible_service_versions ] self.model.choices = [('', 'Choose your option')] self.model.choices += [(x.hashid, x.title) for x in compatible_models] self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version self.version.default = service_manifest['latest_version'] class AddTranskribusHTRPipelineJobForm(AddJobForm): binarization = BooleanField('Binarization') pdf = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[DataRequired()]) def validate_binarization(self, field): service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data] if field.data: if( 'methods' not in service_info or 'binarization' not in service_info['methods'] ): raise ValidationError('Binarization is not available') def validate_pdf(self, field): if field.data.mimetype != 'application/pdf': raise ValidationError('PDF files only!') def __init__(self, *args, **kwargs): service_manifest = SERVICES['transkribus-htr-pipeline'] version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] if self.binarization.render_kw is None: self.binarization.render_kw = {} self.binarization.render_kw['disabled'] = True if 'methods' in service_info: if 'binarization' in service_info['methods']: if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] self.model.choices = [('', 'Choose your option')] self.model.choices += [ ('37569', 'Tim Model'), ('29539', 'UCL–University of Toronto #7') ] self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version self.version.default = service_manifest['latest_version'] class AddSpacyNLPPipelineJobForm(AddJobForm): encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True}) txt = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[DataRequired()]) def validate_encoding_detection(self, field): service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data] if field.data: if( 'methods' not in service_info or 'encoding_detection' not in service_info['methods'] ): raise ValidationError('Encoding detection is not available') def validate_txt(form, field): if field.data.mimetype != 'text/plain': raise ValidationError('Plain text files only!') def __init__(self, *args, **kwargs): service_manifest = SERVICES['spacy-nlp-pipeline'] version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] if self.encoding_detection.render_kw is None: self.encoding_detection.render_kw = {} self.encoding_detection.render_kw['disabled'] = True if 'methods' in service_info: if 'encoding_detection' in service_info['methods']: if 'disabled' in self.encoding_detection.render_kw: del self.encoding_detection.render_kw['disabled'] self.model.choices = [('', 'Choose your option')] self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version self.version.default = version