mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-03 20:02:47 +00:00 
			
		
		
		
	Disable binarization for old ocr service versions. Add new ocr service version (including binarization)
This commit is contained in:
		@@ -46,8 +46,12 @@ class AddTesseractOCRPipelineJobForm(AddJobForm):
 | 
			
		||||
 | 
			
		||||
    def validate_binarization(self, field):
 | 
			
		||||
        service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
 | 
			
		||||
        if field.data and 'binarization' not in service_info['methods']:
 | 
			
		||||
            raise ValidationError('Binarization is not available')
 | 
			
		||||
        if field.data:
 | 
			
		||||
            if(
 | 
			
		||||
                'methods' not in service_info
 | 
			
		||||
                or 'binarization' not in service_info['methods']
 | 
			
		||||
            ):
 | 
			
		||||
                raise ValidationError('Binarization is not available')
 | 
			
		||||
 | 
			
		||||
    def validate_pdf(self, field):
 | 
			
		||||
        if field.data.mimetype != 'application/pdf':
 | 
			
		||||
@@ -58,8 +62,13 @@ class AddTesseractOCRPipelineJobForm(AddJobForm):
 | 
			
		||||
        version = kwargs.pop('version', service_manifest['latest_version'])
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
        service_info = service_manifest['versions'][version]
 | 
			
		||||
        if 'binarization' not in service_info['methods']:
 | 
			
		||||
            self.binarization.render_kw = {'disabled': True}
 | 
			
		||||
        if self.binarization.render_kw is None:
 | 
			
		||||
            self.binarization.render_kw = {}
 | 
			
		||||
        self.binarization.render_kw['disabled'] = True
 | 
			
		||||
        if 'methods' in service_info:
 | 
			
		||||
            if 'binarization' in service_info['methods']:
 | 
			
		||||
                if 'disabled' in self.binarization.render_kw:
 | 
			
		||||
                    del self.binarization.render_kw['disabled']
 | 
			
		||||
        compatible_models = [
 | 
			
		||||
            x for x in TesseractOCRModel.query.filter_by(shared=True).all()
 | 
			
		||||
            if version in x.compatible_service_versions
 | 
			
		||||
@@ -83,8 +92,12 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm):
 | 
			
		||||
 | 
			
		||||
    def validate_binarization(self, field):
 | 
			
		||||
        service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
 | 
			
		||||
        if field.data and 'binarization' not in service_info['methods']:
 | 
			
		||||
            raise ValidationError('Binarization is not available')
 | 
			
		||||
        if field.data:
 | 
			
		||||
            if(
 | 
			
		||||
                'methods' not in service_info
 | 
			
		||||
                or 'binarization' not in service_info['methods']
 | 
			
		||||
            ):
 | 
			
		||||
                raise ValidationError('Binarization is not available')
 | 
			
		||||
 | 
			
		||||
    def validate_pdf(self, field):
 | 
			
		||||
        if field.data.mimetype != 'application/pdf':
 | 
			
		||||
@@ -95,8 +108,13 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm):
 | 
			
		||||
        version = kwargs.pop('version', service_manifest['latest_version'])
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
        service_info = service_manifest['versions'][version]
 | 
			
		||||
        if 'binarization' not in service_info['methods']:
 | 
			
		||||
            self.binarization.render_kw = {'disabled': True}
 | 
			
		||||
        if self.binarization.render_kw is None:
 | 
			
		||||
            self.binarization.render_kw = {}
 | 
			
		||||
        self.binarization.render_kw['disabled'] = True
 | 
			
		||||
        if 'methods' in service_info:
 | 
			
		||||
            if 'binarization' in service_info['methods']:
 | 
			
		||||
                if 'disabled' in self.binarization.render_kw:
 | 
			
		||||
                    del self.binarization.render_kw['disabled']
 | 
			
		||||
        self.model.choices = [('', 'Choose your option')]
 | 
			
		||||
        self.model.choices += [
 | 
			
		||||
            ('37569', 'Tim Model'),
 | 
			
		||||
@@ -109,15 +127,18 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AddSpacyNLPPipelineJobForm(AddJobForm):
 | 
			
		||||
    encoding_detection = BooleanField('Encoding detection')
 | 
			
		||||
    encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
 | 
			
		||||
    txt = FileField('File', validators=[FileRequired()])
 | 
			
		||||
    model = SelectField('Model', validators=[DataRequired()])
 | 
			
		||||
 | 
			
		||||
    def validate_encoding_detection(self, field):
 | 
			
		||||
        service_manifest = SERVICES['spacy-nlp-pipeline']
 | 
			
		||||
        service_info = service_manifest['versions'][self.version.data]
 | 
			
		||||
        if field.data and 'encoding_detection' not in service_info['methods']:
 | 
			
		||||
            raise ValidationError('Encoding detection is not available!')
 | 
			
		||||
        service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
 | 
			
		||||
        if field.data:
 | 
			
		||||
            if(
 | 
			
		||||
                'methods' not in service_info
 | 
			
		||||
                or 'encoding_detection' not in service_info['methods']
 | 
			
		||||
            ):
 | 
			
		||||
                raise ValidationError('Encoding detection is not available')
 | 
			
		||||
 | 
			
		||||
    def validate_txt(form, field):
 | 
			
		||||
        if field.data.mimetype != 'text/plain':
 | 
			
		||||
@@ -128,8 +149,13 @@ class AddSpacyNLPPipelineJobForm(AddJobForm):
 | 
			
		||||
        version = kwargs.pop('version', service_manifest['latest_version'])
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
        service_info = service_manifest['versions'][version]
 | 
			
		||||
        if 'encoding_detection' not in service_info['methods']:
 | 
			
		||||
            self.encoding_detection.render_kw = {'disabled': True}
 | 
			
		||||
        if self.encoding_detection.render_kw is None:
 | 
			
		||||
            self.encoding_detection.render_kw = {}
 | 
			
		||||
        self.encoding_detection.render_kw['disabled'] = True
 | 
			
		||||
        if 'methods' in service_info:
 | 
			
		||||
            if 'encoding_detection' in service_info['methods']:
 | 
			
		||||
                if 'disabled' in self.encoding_detection.render_kw:
 | 
			
		||||
                    del self.encoding_detection.render_kw['disabled']
 | 
			
		||||
        self.model.choices = [('', 'Choose your option')]
 | 
			
		||||
        self.model.choices += [(x, y) for x, y in service_info['models'].items()]  # noqa
 | 
			
		||||
        self.model.default = ''
 | 
			
		||||
 
 | 
			
		||||
@@ -10,33 +10,28 @@ file-setup-pipeline:
 | 
			
		||||
tesseract-ocr-pipeline:
 | 
			
		||||
  name: 'Tesseract OCR Pipeline'
 | 
			
		||||
  publisher: 'Bielefeld University - CRC 1288 - INF'
 | 
			
		||||
  latest_version: '0.1.4'
 | 
			
		||||
  latest_version: '0.1.5'
 | 
			
		||||
  versions:
 | 
			
		||||
    0.1.0:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.0'
 | 
			
		||||
    0.1.1:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
 | 
			
		||||
    0.1.2:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
 | 
			
		||||
    0.1.3:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.3'
 | 
			
		||||
    0.1.4:
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.4'
 | 
			
		||||
    0.1.5:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.4'
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.5'
 | 
			
		||||
transkribus-htr-pipeline:
 | 
			
		||||
  name: 'Transkribus HTR Pipeline'
 | 
			
		||||
  publisher: 'Bielefeld University - CRC 1288 - INF'
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user