mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 04:12:45 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			112 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			112 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from app.models import TesseractOCRModel
 | 
						|
from flask_wtf import FlaskForm
 | 
						|
from wtforms import (BooleanField, MultipleFileField, SelectField, StringField,
 | 
						|
                     SubmitField, ValidationError)
 | 
						|
from wtforms.validators import DataRequired, Length
 | 
						|
from . import SERVICES
 | 
						|
 | 
						|
 | 
						|
class AddJobForm(FlaskForm):
 | 
						|
    description = StringField('Description', validators=[DataRequired(), Length(1, 255)])  # noqa
 | 
						|
    submit = SubmitField()
 | 
						|
    title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
 | 
						|
    version = SelectField('Version', validators=[DataRequired()])
 | 
						|
 | 
						|
 | 
						|
class AddSpacyNLPJobForm(AddJobForm):
 | 
						|
    encoding_detection = BooleanField('Encoding detection')
 | 
						|
    files = MultipleFileField('Files', validators=[DataRequired()])
 | 
						|
    model = SelectField(
 | 
						|
        'Model',
 | 
						|
        choices=[('', 'Choose your option')],
 | 
						|
        default='',
 | 
						|
        validators=[DataRequired()]
 | 
						|
    )
 | 
						|
 | 
						|
    def validate_encoding_detection(self, field):
 | 
						|
        service_info = SERVICES['spacy-nlp']['versions'][self.version.data]
 | 
						|
        if field.data and 'encoding_detection' not in service_info:
 | 
						|
            raise ValidationError('Encoding detection is not available')
 | 
						|
 | 
						|
    def validate_files(form, field):
 | 
						|
        valid_extensions = ['.txt']
 | 
						|
        for file in field.data:
 | 
						|
            if not file.filename.lower().endswith(tuple(valid_extensions)):
 | 
						|
                raise ValidationError(
 | 
						|
                    'File does not have an approved extension: '
 | 
						|
                    '/'.join(valid_extensions)
 | 
						|
                )
 | 
						|
 | 
						|
    def __init__(self, *args, **kwargs):
 | 
						|
        version = kwargs.pop('version', SERVICES['spacy-nlp']['latest_version'])  # noqa
 | 
						|
        super().__init__(*args, **kwargs)
 | 
						|
        service_info = SERVICES['spacy-nlp']['versions'][version]
 | 
						|
        if 'check_encoding' not in service_info['methods']:
 | 
						|
            self.encoding_detection.render_kw = {'disabled': True}
 | 
						|
        self.model.choices += [(x, y) for x, y in service_info['models'].items()]  # noqa
 | 
						|
        self.version.choices = [(x, x) for x in SERVICES['spacy-nlp']['versions']]  # noqa
 | 
						|
        self.version.default = version
 | 
						|
 | 
						|
 | 
						|
class AddTesseractOCRJobForm(AddJobForm):
 | 
						|
    binarization = BooleanField('Binarization')
 | 
						|
    files = MultipleFileField('Files', validators=[DataRequired()])
 | 
						|
    model = SelectField(
 | 
						|
        'Model',
 | 
						|
        choices=[('', 'Choose your option')],
 | 
						|
        default='',
 | 
						|
        validators=[DataRequired()]
 | 
						|
    )
 | 
						|
 | 
						|
    def validate_binarization(self, field):
 | 
						|
        service_info = SERVICES['tesseract-ocr']['versions'][self.version.data]
 | 
						|
        if field.data and 'binarization' not in service_info:
 | 
						|
            raise ValidationError('Binarization is not available')
 | 
						|
 | 
						|
    def validate_files(self, field):
 | 
						|
        valid_extensions = ['.pdf']
 | 
						|
        for file in field.data:
 | 
						|
            if not file.filename.lower().endswith(tuple(valid_extensions)):
 | 
						|
                raise ValidationError(
 | 
						|
                    'File does not have an approved extension: '
 | 
						|
                    '/'.join(valid_extensions)
 | 
						|
                )
 | 
						|
 | 
						|
    def __init__(self, *args, **kwargs):
 | 
						|
        version = kwargs.pop('version', SERVICES['tesseract-ocr']['latest_version'])  # noqa
 | 
						|
        super().__init__(*args, **kwargs)
 | 
						|
        service_info = SERVICES['tesseract-ocr']['versions'][version]
 | 
						|
        if 'binarization' not in service_info['methods']:
 | 
						|
            self.binarization.render_kw = {'disabled': True}
 | 
						|
        self.model.choices += [(x.hashid, x.title) for x in TesseractOCRModel.query.all()]  # noqa
 | 
						|
        self.version.choices = [(x, x) for x in SERVICES['tesseract-ocr']['versions']]  # noqa
 | 
						|
        self.version.data = version
 | 
						|
        self.version.default = SERVICES['tesseract-ocr']['latest_version']
 | 
						|
 | 
						|
 | 
						|
class AddFileSetupJobForm(AddJobForm):
 | 
						|
    files = MultipleFileField('Files', validators=[DataRequired()])
 | 
						|
 | 
						|
    def validate_files(form, field):
 | 
						|
        valid_extensions = ['.jpeg', '.jpg', '.png', '.tiff', '.tif']
 | 
						|
        for file in field.data:
 | 
						|
            if not file.filename.lower().endswith(tuple(valid_extensions)):
 | 
						|
                raise ValidationError(
 | 
						|
                    'File does not have an approved extension: '
 | 
						|
                    '/'.join(valid_extensions)
 | 
						|
                )
 | 
						|
 | 
						|
    def __init__(self, *args, **kwargs):
 | 
						|
        version = kwargs.pop('version', SERVICES['file-setup']['latest_version'])  # noqa
 | 
						|
        super().__init__(*args, **kwargs)
 | 
						|
        self.version.choices = [(x, x) for x in SERVICES['file-setup']['versions']]  # noqa
 | 
						|
        self.version.data = version
 | 
						|
        self.version.default = SERVICES['file-setup']['latest_version']
 | 
						|
 | 
						|
 | 
						|
AddJobForms = {
 | 
						|
    'file-setup': AddFileSetupJobForm,
 | 
						|
    'tesseract-ocr': AddTesseractOCRJobForm,
 | 
						|
    'spacy-nlp': AddSpacyNLPJobForm
 | 
						|
}
 |