mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 04:12:45 +00:00 
			
		
		
		
	Merge branch 'development' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into development
This commit is contained in:
		@@ -1,16 +1,12 @@
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from flask_wtf import FlaskForm
 | 
			
		||||
from flask_wtf.file import FileField, FileRequired
 | 
			
		||||
from wtforms import (
 | 
			
		||||
    BooleanField,
 | 
			
		||||
    MultipleFileField,
 | 
			
		||||
    SelectField,
 | 
			
		||||
    StringField,
 | 
			
		||||
    SubmitField,
 | 
			
		||||
    ValidationError
 | 
			
		||||
)
 | 
			
		||||
from wtforms import (BooleanField, DecimalRangeField, MultipleFileField,
 | 
			
		||||
                     SelectField, StringField, SubmitField, ValidationError)
 | 
			
		||||
from wtforms.validators import InputRequired, Length
 | 
			
		||||
from app.models import TesseractOCRPipelineModel, SpaCyNLPPipelineModel
 | 
			
		||||
 | 
			
		||||
from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel
 | 
			
		||||
 | 
			
		||||
from . import SERVICES
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -49,13 +45,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
 | 
			
		||||
    binarization = BooleanField('Binarization')
 | 
			
		||||
    pdf = FileField('File', validators=[FileRequired()])
 | 
			
		||||
    model = SelectField('Model', validators=[InputRequired()])
 | 
			
		||||
    ocropus_nlbin_threshold = DecimalRangeField(
 | 
			
		||||
        render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5], 'disabled': True}
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    def validate_binarization(self, field):
 | 
			
		||||
        service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
 | 
			
		||||
        if field.data:
 | 
			
		||||
            if not('methods' in service_info and 'binarization' in service_info['methods']):
 | 
			
		||||
                raise ValidationError('Binarization is not available')
 | 
			
		||||
 | 
			
		||||
              
 | 
			
		||||
    def validate_pdf(self, field):
 | 
			
		||||
        if field.data.mimetype != 'application/pdf':
 | 
			
		||||
            raise ValidationError('PDF files only!')
 | 
			
		||||
@@ -68,10 +67,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
 | 
			
		||||
        if self.binarization.render_kw is None:
 | 
			
		||||
            self.binarization.render_kw = {}
 | 
			
		||||
        self.binarization.render_kw['disabled'] = True
 | 
			
		||||
        if self.ocropus_nlbin_threshold.render_kw is None:
 | 
			
		||||
            self.ocropus_nlbin_threshold.render_kw = {}
 | 
			
		||||
        self.ocropus_nlbin_threshold.render_kw['disabled'] = True
 | 
			
		||||
        if 'methods' in service_info:
 | 
			
		||||
            if 'binarization' in service_info['methods']:
 | 
			
		||||
                if 'disabled' in self.binarization.render_kw:
 | 
			
		||||
                    del self.binarization.render_kw['disabled']
 | 
			
		||||
            if 'ocropus_nlbin_threshold' in service_info['methods']:
 | 
			
		||||
                if 'disabled' in self.ocropus_nlbin_threshold.render_kw:
 | 
			
		||||
                    del self.ocropus_nlbin_threshold.render_kw['disabled']
 | 
			
		||||
        models = [
 | 
			
		||||
            x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all()
 | 
			
		||||
            if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
 | 
			
		||||
 
 | 
			
		||||
@@ -79,7 +79,8 @@ def tesseract_ocr_pipeline():
 | 
			
		||||
                service=service_name,
 | 
			
		||||
                service_args={
 | 
			
		||||
                    'binarization': form.binarization.data,
 | 
			
		||||
                    'model': hashids.decode(form.model.data)
 | 
			
		||||
                    'model': hashids.decode(form.model.data),
 | 
			
		||||
                    'ocropus_nlbin_threshold': float(form.ocropus_nlbin_threshold.data)
 | 
			
		||||
                },
 | 
			
		||||
                service_version=form.version.data,
 | 
			
		||||
                user=current_user
 | 
			
		||||
 
 | 
			
		||||
@@ -20,6 +20,7 @@ tesseract-ocr-pipeline:
 | 
			
		||||
    0.1.1:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
        - 'ocropus_nlbin_threshold'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
 | 
			
		||||
transkribus-htr-pipeline:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user