from app.models import TesseractOCRModel from flask_wtf import FlaskForm from wtforms import ( BooleanField, MultipleFileField, SelectField, StringField, SubmitField, ValidationError ) from wtforms.validators import DataRequired, Length from . import SERVICES class AddJobForm(FlaskForm): description = StringField('Description', validators=[DataRequired(), Length(1, 255)]) # noqa submit = SubmitField() title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) version = SelectField('Version', validators=[DataRequired()]) class AddSpacyNLPJobForm(AddJobForm): encoding_detection = BooleanField('Encoding detection') files = MultipleFileField('Files', validators=[DataRequired()]) model = SelectField( 'Model', choices=[('', 'Choose your option')], default='', validators=[DataRequired()] ) def validate_encoding_detection(self, field): service_info = SERVICES['spacy-nlp']['versions'][self.version.data] if field.data and 'encoding_detection' not in service_info['methods']: raise ValidationError('Encoding detection is not available') def validate_files(form, field): valid_extensions = ['.txt'] for file in field.data: if not file.filename.lower().endswith(tuple(valid_extensions)): raise ValidationError( 'File does not have an approved extension: ' '/'.join(valid_extensions) ) def __init__(self, *args, **kwargs): version = kwargs.pop('version', SERVICES['spacy-nlp']['latest_version']) # noqa super().__init__(*args, **kwargs) service_info = SERVICES['spacy-nlp']['versions'][version] if 'encoding_detection' not in service_info['methods']: self.encoding_detection.render_kw = {'disabled': True} self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa self.version.choices = [(x, x) for x in SERVICES['spacy-nlp']['versions']] # noqa self.version.default = version class AddTesseractOCRJobForm(AddJobForm): binarization = BooleanField('Binarization') files = MultipleFileField('Files', validators=[DataRequired()]) model = SelectField( 'Model', choices=[('', 'Choose your option')], default='', validators=[DataRequired()] ) def validate_binarization(self, field): service_info = SERVICES['tesseract-ocr']['versions'][self.version.data] if field.data and 'binarization' not in service_info['methods']: raise ValidationError('Binarization is not available') def validate_files(self, field): valid_extensions = ['.pdf'] for file in field.data: if not file.filename.lower().endswith(tuple(valid_extensions)): raise ValidationError( 'File does not have an approved extension: ' '/'.join(valid_extensions) ) def __init__(self, *args, **kwargs): version = kwargs.pop('version', SERVICES['tesseract-ocr']['latest_version']) # noqa super().__init__(*args, **kwargs) service_info = SERVICES['tesseract-ocr']['versions'][version] if 'binarization' not in service_info['methods']: self.binarization.render_kw = {'disabled': True} self.model.choices += [(x.hashid, x.title) for x in TesseractOCRModel.query.all()] # noqa self.version.choices = [(x, x) for x in SERVICES['tesseract-ocr']['versions']] # noqa self.version.data = version self.version.default = SERVICES['tesseract-ocr']['latest_version'] class AddFileSetupJobForm(AddJobForm): files = MultipleFileField('Files', validators=[DataRequired()]) def validate_files(form, field): valid_extensions = ['.jpeg', '.jpg', '.png', '.tiff', '.tif'] for file in field.data: if not file.filename.lower().endswith(tuple(valid_extensions)): raise ValidationError( 'File does not have an approved extension: ' '/'.join(valid_extensions) ) def __init__(self, *args, **kwargs): version = kwargs.pop('version', SERVICES['file-setup']['latest_version']) # noqa super().__init__(*args, **kwargs) self.version.choices = [(x, x) for x in SERVICES['file-setup']['versions']] # noqa self.version.data = version self.version.default = SERVICES['file-setup']['latest_version'] AddJobForms = { 'file-setup': AddFileSetupJobForm, 'tesseract-ocr': AddTesseractOCRJobForm, 'spacy-nlp': AddSpacyNLPJobForm }