mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-10-31 02:32:45 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			118 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			118 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from app.models import TesseractOCRModel
 | |
| from flask_wtf import FlaskForm
 | |
| from wtforms import (
 | |
|     BooleanField,
 | |
|     MultipleFileField,
 | |
|     SelectField,
 | |
|     StringField,
 | |
|     SubmitField,
 | |
|     ValidationError
 | |
| )
 | |
| from wtforms.validators import DataRequired, Length
 | |
| from . import SERVICES
 | |
| 
 | |
| 
 | |
| class AddJobForm(FlaskForm):
 | |
|     description = StringField('Description', validators=[DataRequired(), Length(1, 255)])  # noqa
 | |
|     submit = SubmitField()
 | |
|     title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
 | |
|     version = SelectField('Version', validators=[DataRequired()])
 | |
| 
 | |
| 
 | |
| class AddSpacyNLPJobForm(AddJobForm):
 | |
|     encoding_detection = BooleanField('Encoding detection')
 | |
|     files = MultipleFileField('Files', validators=[DataRequired()])
 | |
|     model = SelectField(
 | |
|         'Model',
 | |
|         choices=[('', 'Choose your option')],
 | |
|         default='',
 | |
|         validators=[DataRequired()]
 | |
|     )
 | |
| 
 | |
|     def validate_encoding_detection(self, field):
 | |
|         service_info = SERVICES['spacy-nlp']['versions'][self.version.data]
 | |
|         if field.data and 'encoding_detection' not in service_info['methods']:
 | |
|             raise ValidationError('Encoding detection is not available')
 | |
| 
 | |
|     def validate_files(form, field):
 | |
|         valid_extensions = ['.txt']
 | |
|         for file in field.data:
 | |
|             if not file.filename.lower().endswith(tuple(valid_extensions)):
 | |
|                 raise ValidationError(
 | |
|                     'File does not have an approved extension: '
 | |
|                     '/'.join(valid_extensions)
 | |
|                 )
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         version = kwargs.pop('version', SERVICES['spacy-nlp']['latest_version'])  # noqa
 | |
|         super().__init__(*args, **kwargs)
 | |
|         service_info = SERVICES['spacy-nlp']['versions'][version]
 | |
|         if 'encoding_detection' not in service_info['methods']:
 | |
|             self.encoding_detection.render_kw = {'disabled': True}
 | |
|         self.model.choices += [(x, y) for x, y in service_info['models'].items()]  # noqa
 | |
|         self.version.choices = [(x, x) for x in SERVICES['spacy-nlp']['versions']]  # noqa
 | |
|         self.version.default = version
 | |
| 
 | |
| 
 | |
| class AddTesseractOCRJobForm(AddJobForm):
 | |
|     binarization = BooleanField('Binarization')
 | |
|     files = MultipleFileField('Files', validators=[DataRequired()])
 | |
|     model = SelectField(
 | |
|         'Model',
 | |
|         choices=[('', 'Choose your option')],
 | |
|         default='',
 | |
|         validators=[DataRequired()]
 | |
|     )
 | |
| 
 | |
|     def validate_binarization(self, field):
 | |
|         service_info = SERVICES['tesseract-ocr']['versions'][self.version.data]
 | |
|         if field.data and 'binarization' not in service_info['methods']:
 | |
|             raise ValidationError('Binarization is not available')
 | |
| 
 | |
|     def validate_files(self, field):
 | |
|         valid_extensions = ['.pdf']
 | |
|         for file in field.data:
 | |
|             if not file.filename.lower().endswith(tuple(valid_extensions)):
 | |
|                 raise ValidationError(
 | |
|                     'File does not have an approved extension: '
 | |
|                     '/'.join(valid_extensions)
 | |
|                 )
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         version = kwargs.pop('version', SERVICES['tesseract-ocr']['latest_version'])  # noqa
 | |
|         super().__init__(*args, **kwargs)
 | |
|         service_info = SERVICES['tesseract-ocr']['versions'][version]
 | |
|         if 'binarization' not in service_info['methods']:
 | |
|             self.binarization.render_kw = {'disabled': True}
 | |
|         self.model.choices += [(x.hashid, x.title) for x in TesseractOCRModel.query.all()]  # noqa
 | |
|         self.version.choices = [(x, x) for x in SERVICES['tesseract-ocr']['versions']]  # noqa
 | |
|         self.version.data = version
 | |
|         self.version.default = SERVICES['tesseract-ocr']['latest_version']
 | |
| 
 | |
| 
 | |
| class AddFileSetupJobForm(AddJobForm):
 | |
|     files = MultipleFileField('Files', validators=[DataRequired()])
 | |
| 
 | |
|     def validate_files(form, field):
 | |
|         valid_extensions = ['.jpeg', '.jpg', '.png', '.tiff', '.tif']
 | |
|         for file in field.data:
 | |
|             if not file.filename.lower().endswith(tuple(valid_extensions)):
 | |
|                 raise ValidationError(
 | |
|                     'File does not have an approved extension: '
 | |
|                     '/'.join(valid_extensions)
 | |
|                 )
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         version = kwargs.pop('version', SERVICES['file-setup']['latest_version'])  # noqa
 | |
|         super().__init__(*args, **kwargs)
 | |
|         self.version.choices = [(x, x) for x in SERVICES['file-setup']['versions']]  # noqa
 | |
|         self.version.data = version
 | |
|         self.version.default = SERVICES['file-setup']['latest_version']
 | |
| 
 | |
| 
 | |
| AddJobForms = {
 | |
|     'file-setup': AddFileSetupJobForm,
 | |
|     'tesseract-ocr': AddTesseractOCRJobForm,
 | |
|     'spacy-nlp': AddSpacyNLPJobForm
 | |
| }
 |