mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-12 00:50:40 +00:00
Disable binarization for old ocr service versions. Add new ocr service version (including binarization)
This commit is contained in:
@ -46,8 +46,12 @@ class AddTesseractOCRPipelineJobForm(AddJobForm):
|
||||
|
||||
def validate_binarization(self, field):
|
||||
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
|
||||
if field.data and 'binarization' not in service_info['methods']:
|
||||
raise ValidationError('Binarization is not available')
|
||||
if field.data:
|
||||
if(
|
||||
'methods' not in service_info
|
||||
or 'binarization' not in service_info['methods']
|
||||
):
|
||||
raise ValidationError('Binarization is not available')
|
||||
|
||||
def validate_pdf(self, field):
|
||||
if field.data.mimetype != 'application/pdf':
|
||||
@ -58,8 +62,13 @@ class AddTesseractOCRPipelineJobForm(AddJobForm):
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
service_info = service_manifest['versions'][version]
|
||||
if 'binarization' not in service_info['methods']:
|
||||
self.binarization.render_kw = {'disabled': True}
|
||||
if self.binarization.render_kw is None:
|
||||
self.binarization.render_kw = {}
|
||||
self.binarization.render_kw['disabled'] = True
|
||||
if 'methods' in service_info:
|
||||
if 'binarization' in service_info['methods']:
|
||||
if 'disabled' in self.binarization.render_kw:
|
||||
del self.binarization.render_kw['disabled']
|
||||
compatible_models = [
|
||||
x for x in TesseractOCRModel.query.filter_by(shared=True).all()
|
||||
if version in x.compatible_service_versions
|
||||
@ -83,8 +92,12 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm):
|
||||
|
||||
def validate_binarization(self, field):
|
||||
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
|
||||
if field.data and 'binarization' not in service_info['methods']:
|
||||
raise ValidationError('Binarization is not available')
|
||||
if field.data:
|
||||
if(
|
||||
'methods' not in service_info
|
||||
or 'binarization' not in service_info['methods']
|
||||
):
|
||||
raise ValidationError('Binarization is not available')
|
||||
|
||||
def validate_pdf(self, field):
|
||||
if field.data.mimetype != 'application/pdf':
|
||||
@ -95,8 +108,13 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm):
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
service_info = service_manifest['versions'][version]
|
||||
if 'binarization' not in service_info['methods']:
|
||||
self.binarization.render_kw = {'disabled': True}
|
||||
if self.binarization.render_kw is None:
|
||||
self.binarization.render_kw = {}
|
||||
self.binarization.render_kw['disabled'] = True
|
||||
if 'methods' in service_info:
|
||||
if 'binarization' in service_info['methods']:
|
||||
if 'disabled' in self.binarization.render_kw:
|
||||
del self.binarization.render_kw['disabled']
|
||||
self.model.choices = [('', 'Choose your option')]
|
||||
self.model.choices += [
|
||||
('37569', 'Tim Model'),
|
||||
@ -109,15 +127,18 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm):
|
||||
|
||||
|
||||
class AddSpacyNLPPipelineJobForm(AddJobForm):
|
||||
encoding_detection = BooleanField('Encoding detection')
|
||||
encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
|
||||
txt = FileField('File', validators=[FileRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
|
||||
def validate_encoding_detection(self, field):
|
||||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||||
service_info = service_manifest['versions'][self.version.data]
|
||||
if field.data and 'encoding_detection' not in service_info['methods']:
|
||||
raise ValidationError('Encoding detection is not available!')
|
||||
service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
|
||||
if field.data:
|
||||
if(
|
||||
'methods' not in service_info
|
||||
or 'encoding_detection' not in service_info['methods']
|
||||
):
|
||||
raise ValidationError('Encoding detection is not available')
|
||||
|
||||
def validate_txt(form, field):
|
||||
if field.data.mimetype != 'text/plain':
|
||||
@ -128,8 +149,13 @@ class AddSpacyNLPPipelineJobForm(AddJobForm):
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
service_info = service_manifest['versions'][version]
|
||||
if 'encoding_detection' not in service_info['methods']:
|
||||
self.encoding_detection.render_kw = {'disabled': True}
|
||||
if self.encoding_detection.render_kw is None:
|
||||
self.encoding_detection.render_kw = {}
|
||||
self.encoding_detection.render_kw['disabled'] = True
|
||||
if 'methods' in service_info:
|
||||
if 'encoding_detection' in service_info['methods']:
|
||||
if 'disabled' in self.encoding_detection.render_kw:
|
||||
del self.encoding_detection.render_kw['disabled']
|
||||
self.model.choices = [('', 'Choose your option')]
|
||||
self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa
|
||||
self.model.default = ''
|
||||
|
@ -10,33 +10,28 @@ file-setup-pipeline:
|
||||
tesseract-ocr-pipeline:
|
||||
name: 'Tesseract OCR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.4'
|
||||
latest_version: '0.1.5'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.0'
|
||||
0.1.1:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
|
||||
0.1.2:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
|
||||
0.1.3:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.3'
|
||||
0.1.4:
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.4'
|
||||
0.1.5:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.4'
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.5'
|
||||
transkribus-htr-pipeline:
|
||||
name: 'Transkribus HTR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
|
Reference in New Issue
Block a user