mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-03 20:02:47 +00:00 
			
		
		
		
	move blueprints in dedicated folder
This commit is contained in:
		
							
								
								
									
										24
									
								
								app/blueprints/services/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								app/blueprints/services/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,24 @@
 | 
			
		||||
from flask import Blueprint
 | 
			
		||||
from flask_login import login_required
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import yaml
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
services_file = Path(__file__).parent / 'services.yml'
 | 
			
		||||
with services_file.open('r') as f:
 | 
			
		||||
    SERVICES = yaml.safe_load(f)
 | 
			
		||||
 | 
			
		||||
bp = Blueprint('services', __name__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.before_request
 | 
			
		||||
@login_required
 | 
			
		||||
def before_request():
 | 
			
		||||
    '''
 | 
			
		||||
    Ensures that the routes in this package can only be visited by users that
 | 
			
		||||
    are logged in.
 | 
			
		||||
    '''
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from . import routes  # noqa
 | 
			
		||||
							
								
								
									
										192
									
								
								app/blueprints/services/forms.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										192
									
								
								app/blueprints/services/forms.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,192 @@
 | 
			
		||||
from flask_wtf import FlaskForm
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from flask_wtf.file import FileField, FileRequired
 | 
			
		||||
from wtforms import (
 | 
			
		||||
    BooleanField,
 | 
			
		||||
    DecimalRangeField,
 | 
			
		||||
    MultipleFileField,
 | 
			
		||||
    SelectField,
 | 
			
		||||
    StringField,
 | 
			
		||||
    SubmitField,
 | 
			
		||||
    ValidationError
 | 
			
		||||
)
 | 
			
		||||
from wtforms.validators import InputRequired, Length
 | 
			
		||||
from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel
 | 
			
		||||
from . import SERVICES
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CreateJobBaseForm(FlaskForm):
 | 
			
		||||
    description = StringField(
 | 
			
		||||
        'Description',
 | 
			
		||||
        validators=[InputRequired(), Length(max=255)]
 | 
			
		||||
    )
 | 
			
		||||
    title = StringField(
 | 
			
		||||
        'Title',
 | 
			
		||||
        validators=[InputRequired(), Length(max=32)]
 | 
			
		||||
    )
 | 
			
		||||
    version = SelectField('Version', validators=[InputRequired()])
 | 
			
		||||
    submit = SubmitField()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CreateFileSetupPipelineJobForm(CreateJobBaseForm):
 | 
			
		||||
    images = MultipleFileField('File(s)', validators=[InputRequired()])
 | 
			
		||||
 | 
			
		||||
    def validate_images(form, field):
 | 
			
		||||
        valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
 | 
			
		||||
        for image in field.data:
 | 
			
		||||
            if image.mimetype not in valid_mimetypes:
 | 
			
		||||
                raise ValidationError('JPEG, PNG and TIFF files only!')
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        if 'prefix' not in kwargs:
 | 
			
		||||
            kwargs['prefix'] = 'create-file-setup-pipeline-job-form'
 | 
			
		||||
        service_manifest = SERVICES['file-setup-pipeline']
 | 
			
		||||
        version = kwargs.pop('version', service_manifest['latest_version'])
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
        self.version.choices = [(x, x) for x in service_manifest['versions']]
 | 
			
		||||
        self.version.data = version
 | 
			
		||||
        self.version.default = service_manifest['latest_version']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
 | 
			
		||||
    binarization = BooleanField('Binarization')
 | 
			
		||||
    pdf = FileField('File', validators=[FileRequired()])
 | 
			
		||||
    model = SelectField('Model', validators=[InputRequired()])
 | 
			
		||||
    ocropus_nlbin_threshold = DecimalRangeField(
 | 
			
		||||
        render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5], 'disabled': True}
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    def validate_binarization(self, field):
 | 
			
		||||
        service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
 | 
			
		||||
        if field.data:
 | 
			
		||||
            if not('methods' in service_info and 'binarization' in service_info['methods']):
 | 
			
		||||
                raise ValidationError('Binarization is not available')
 | 
			
		||||
              
 | 
			
		||||
    def validate_pdf(self, field):
 | 
			
		||||
        if field.data.mimetype != 'application/pdf':
 | 
			
		||||
            raise ValidationError('PDF files only!')
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        if 'prefix' not in kwargs:
 | 
			
		||||
            kwargs['prefix'] = 'create-tesseract-ocr-pipeline-job-form'
 | 
			
		||||
        service_manifest = SERVICES['tesseract-ocr-pipeline']
 | 
			
		||||
        version = kwargs.pop('version', service_manifest['latest_version'])
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
        service_info = service_manifest['versions'][version]
 | 
			
		||||
        if self.binarization.render_kw is None:
 | 
			
		||||
            self.binarization.render_kw = {}
 | 
			
		||||
        self.binarization.render_kw['disabled'] = True
 | 
			
		||||
        if self.ocropus_nlbin_threshold.render_kw is None:
 | 
			
		||||
            self.ocropus_nlbin_threshold.render_kw = {}
 | 
			
		||||
        self.ocropus_nlbin_threshold.render_kw['disabled'] = True
 | 
			
		||||
        if 'methods' in service_info:
 | 
			
		||||
            if 'binarization' in service_info['methods']:
 | 
			
		||||
                del self.binarization.render_kw['disabled']
 | 
			
		||||
                if 'ocropus_nlbin_threshold' in service_info['methods']:
 | 
			
		||||
                    del self.ocropus_nlbin_threshold.render_kw['disabled']
 | 
			
		||||
        user_models = [
 | 
			
		||||
            x for x in current_user.tesseract_ocr_pipeline_models.order_by(TesseractOCRPipelineModel.title).all()
 | 
			
		||||
        ]
 | 
			
		||||
        models = [
 | 
			
		||||
            x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all()
 | 
			
		||||
            if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
 | 
			
		||||
        ]
 | 
			
		||||
        self.model.choices = {
 | 
			
		||||
            '': [('', 'Choose your option')],
 | 
			
		||||
            'Your models': [(x.hashid, f'{x.title} [{x.version}]') for x in user_models] if user_models else [(0, 'Nothing here yet...')],
 | 
			
		||||
            'Public models': [(x.hashid, f'{x.title} [{x.version}]') for x in models]
 | 
			
		||||
        }
 | 
			
		||||
        self.model.default = ''
 | 
			
		||||
        self.version.choices = [(x, x) for x in service_manifest['versions']]
 | 
			
		||||
        self.version.data = version
 | 
			
		||||
        self.version.default = service_manifest['latest_version']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
 | 
			
		||||
    binarization = BooleanField('Binarization')
 | 
			
		||||
    pdf = FileField('File', validators=[FileRequired()])
 | 
			
		||||
    model = SelectField('Model', validators=[InputRequired()])
 | 
			
		||||
 | 
			
		||||
    def validate_binarization(self, field):
 | 
			
		||||
        service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
 | 
			
		||||
        if field.data:
 | 
			
		||||
            if(
 | 
			
		||||
                'methods' not in service_info
 | 
			
		||||
                or 'binarization' not in service_info['methods']
 | 
			
		||||
            ):
 | 
			
		||||
                raise ValidationError('Binarization is not available')
 | 
			
		||||
 | 
			
		||||
    def validate_pdf(self, field):
 | 
			
		||||
        if field.data.mimetype != 'application/pdf':
 | 
			
		||||
            raise ValidationError('PDF files only!')
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        if 'prefix' not in kwargs:
 | 
			
		||||
            kwargs['prefix'] = 'create-transkribus-htr-pipeline-job-form'
 | 
			
		||||
        transkribus_htr_pipeline_models = kwargs.pop('transkribus_htr_pipeline_models', [])
 | 
			
		||||
        service_manifest = SERVICES['transkribus-htr-pipeline']
 | 
			
		||||
        version = kwargs.pop('version', service_manifest['latest_version'])
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
        service_info = service_manifest['versions'][version]
 | 
			
		||||
        if self.binarization.render_kw is None:
 | 
			
		||||
            self.binarization.render_kw = {}
 | 
			
		||||
        self.binarization.render_kw['disabled'] = True
 | 
			
		||||
        if 'methods' in service_info:
 | 
			
		||||
            if 'binarization' in service_info['methods']:
 | 
			
		||||
                del self.binarization.render_kw['disabled']
 | 
			
		||||
        self.model.choices = [('', 'Choose your option')]
 | 
			
		||||
        self.model.choices += [(x['modelId'], x['name']) for x in transkribus_htr_pipeline_models]
 | 
			
		||||
        self.model.default = ''
 | 
			
		||||
        self.version.choices = [(x, x) for x in service_manifest['versions']]
 | 
			
		||||
        self.version.data = version
 | 
			
		||||
        self.version.default = service_manifest['latest_version']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
 | 
			
		||||
    encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
 | 
			
		||||
    txt = FileField('File', validators=[FileRequired()])
 | 
			
		||||
    model = SelectField('Model', validators=[InputRequired()])
 | 
			
		||||
    
 | 
			
		||||
    def validate_encoding_detection(self, field):
 | 
			
		||||
        service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
 | 
			
		||||
        if field.data:
 | 
			
		||||
            if(
 | 
			
		||||
                'methods' not in service_info
 | 
			
		||||
                or 'encoding_detection' not in service_info['methods']
 | 
			
		||||
            ):
 | 
			
		||||
                raise ValidationError('Encoding detection is not available')
 | 
			
		||||
 | 
			
		||||
    def validate_txt(form, field):
 | 
			
		||||
        if field.data.mimetype != 'text/plain':
 | 
			
		||||
            raise ValidationError('Plain text files only!')
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        if 'prefix' not in kwargs:
 | 
			
		||||
            kwargs['prefix'] = 'create-spacy-nlp-pipeline-job-form'
 | 
			
		||||
        service_manifest = SERVICES['spacy-nlp-pipeline']
 | 
			
		||||
        version = kwargs.pop('version', service_manifest['latest_version'])
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
        service_info = service_manifest['versions'][version]
 | 
			
		||||
        print(service_info)
 | 
			
		||||
        if self.encoding_detection.render_kw is None:
 | 
			
		||||
            self.encoding_detection.render_kw = {}
 | 
			
		||||
        self.encoding_detection.render_kw['disabled'] = True
 | 
			
		||||
        if 'methods' in service_info:
 | 
			
		||||
            if 'encoding_detection' in service_info['methods']:
 | 
			
		||||
                del self.encoding_detection.render_kw['disabled']
 | 
			
		||||
        user_models = [
 | 
			
		||||
            x for x in current_user.spacy_nlp_pipeline_models.order_by(SpaCyNLPPipelineModel.title).all()
 | 
			
		||||
        ]
 | 
			
		||||
        models = [
 | 
			
		||||
            x for x in SpaCyNLPPipelineModel.query.filter(SpaCyNLPPipelineModel.user != current_user, SpaCyNLPPipelineModel.is_public == True).order_by(SpaCyNLPPipelineModel.title).all()
 | 
			
		||||
            if version in x.compatible_service_versions
 | 
			
		||||
        ]
 | 
			
		||||
        self.model.choices = {
 | 
			
		||||
            '': [('', 'Choose your option')],
 | 
			
		||||
            'Your models': [(x.hashid, f'{x.title} [{x.version}]') for x in user_models] if user_models else [(0, 'Nothing here yet...')],
 | 
			
		||||
            'Public models': [(x.hashid, f'{x.title} [{x.version}]') for x in models]
 | 
			
		||||
        }
 | 
			
		||||
        self.model.default = ''
 | 
			
		||||
        self.version.choices = [(x, x) for x in service_manifest['versions']]
 | 
			
		||||
        self.version.data = version
 | 
			
		||||
        self.version.default = version
 | 
			
		||||
							
								
								
									
										224
									
								
								app/blueprints/services/routes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										224
									
								
								app/blueprints/services/routes.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,224 @@
 | 
			
		||||
from flask import abort, current_app, flash, redirect, render_template, request, url_for
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
import requests
 | 
			
		||||
from app import db, hashids
 | 
			
		||||
from app.models import (
 | 
			
		||||
    Job,
 | 
			
		||||
    JobInput,
 | 
			
		||||
    JobStatus,
 | 
			
		||||
    TesseractOCRPipelineModel,
 | 
			
		||||
    SpaCyNLPPipelineModel
 | 
			
		||||
)
 | 
			
		||||
from . import bp, SERVICES
 | 
			
		||||
from .forms import (
 | 
			
		||||
    CreateFileSetupPipelineJobForm,
 | 
			
		||||
    CreateTesseractOCRPipelineJobForm,
 | 
			
		||||
    CreateTranskribusHTRPipelineJobForm,
 | 
			
		||||
    CreateSpacyNLPPipelineJobForm
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/services')
 | 
			
		||||
def services():
 | 
			
		||||
    return redirect(url_for('main.dashboard'))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/file-setup-pipeline', methods=['GET', 'POST'])
 | 
			
		||||
def file_setup_pipeline():
 | 
			
		||||
    service = 'file-setup-pipeline'
 | 
			
		||||
    service_manifest = SERVICES[service]
 | 
			
		||||
    version = request.args.get('version', service_manifest['latest_version'])
 | 
			
		||||
    if version not in service_manifest['versions']:
 | 
			
		||||
        abort(404)
 | 
			
		||||
    form = CreateFileSetupPipelineJobForm(prefix='create-job-form', version=version)
 | 
			
		||||
    if form.is_submitted():
 | 
			
		||||
        if not form.validate():
 | 
			
		||||
            response = {'errors': form.errors}
 | 
			
		||||
            return response, 400
 | 
			
		||||
        try:
 | 
			
		||||
            job = Job.create(
 | 
			
		||||
                title=form.title.data,
 | 
			
		||||
                description=form.description.data,
 | 
			
		||||
                service=service,
 | 
			
		||||
                service_args={},
 | 
			
		||||
                service_version=form.version.data,
 | 
			
		||||
                user=current_user
 | 
			
		||||
            )
 | 
			
		||||
        except OSError:
 | 
			
		||||
            abort(500)
 | 
			
		||||
        for input_file in form.images.data:
 | 
			
		||||
            try:
 | 
			
		||||
                JobInput.create(input_file, job=job)
 | 
			
		||||
            except (AttributeError, OSError):
 | 
			
		||||
                abort(500)
 | 
			
		||||
        job.status = JobStatus.SUBMITTED
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
        message = f'Job "<a href="{job.url}">{job.title}</a>" created'
 | 
			
		||||
        flash(message, 'job')
 | 
			
		||||
        return {}, 201, {'Location': job.url}
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'services/file_setup_pipeline.html.j2',
 | 
			
		||||
        title=service_manifest['name'],
 | 
			
		||||
        form=form
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST'])
 | 
			
		||||
def tesseract_ocr_pipeline():
 | 
			
		||||
    service_name = 'tesseract-ocr-pipeline'
 | 
			
		||||
    service_manifest = SERVICES[service_name]
 | 
			
		||||
    version = request.args.get('version', service_manifest['latest_version'])
 | 
			
		||||
    if version not in service_manifest['versions']:
 | 
			
		||||
        abort(404)
 | 
			
		||||
    form = CreateTesseractOCRPipelineJobForm(prefix='create-job-form', version=version)
 | 
			
		||||
    if form.is_submitted():
 | 
			
		||||
        if not form.validate():
 | 
			
		||||
            response = {'errors': form.errors}
 | 
			
		||||
            return response, 400
 | 
			
		||||
        try:
 | 
			
		||||
            job = Job.create(
 | 
			
		||||
                title=form.title.data,
 | 
			
		||||
                description=form.description.data,
 | 
			
		||||
                service=service_name,
 | 
			
		||||
                service_args={
 | 
			
		||||
                    'binarization': form.binarization.data,
 | 
			
		||||
                    'model': hashids.decode(form.model.data),
 | 
			
		||||
                    'ocropus_nlbin_threshold': float(form.ocropus_nlbin_threshold.data)
 | 
			
		||||
                },
 | 
			
		||||
                service_version=form.version.data,
 | 
			
		||||
                user=current_user
 | 
			
		||||
            )
 | 
			
		||||
        except OSError:
 | 
			
		||||
            abort(500)
 | 
			
		||||
        try:
 | 
			
		||||
            JobInput.create(form.pdf.data, job=job)
 | 
			
		||||
        except (AttributeError, OSError):
 | 
			
		||||
            abort(500)
 | 
			
		||||
        job.status = JobStatus.SUBMITTED
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
        message = f'Job "<a href="{job.url}">{job.title}</a>" created'
 | 
			
		||||
        flash(message, 'job')
 | 
			
		||||
        return {}, 201, {'Location': job.url}
 | 
			
		||||
    tesseract_ocr_pipeline_models = [
 | 
			
		||||
        x for x in TesseractOCRPipelineModel.query.all()
 | 
			
		||||
        if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
 | 
			
		||||
    ]
 | 
			
		||||
    user_tesseract_ocr_pipeline_models_count = len(current_user.tesseract_ocr_pipeline_models.all())
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'services/tesseract_ocr_pipeline.html.j2',
 | 
			
		||||
        title=service_manifest['name'],
 | 
			
		||||
        form=form,
 | 
			
		||||
        tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
 | 
			
		||||
        user_tesseract_ocr_pipeline_models_count=user_tesseract_ocr_pipeline_models_count
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/transkribus-htr-pipeline', methods=['GET', 'POST'])
 | 
			
		||||
def transkribus_htr_pipeline():
 | 
			
		||||
    if not current_app.config.get('NOPAQUE_TRANSKRIBUS_ENABLED'):
 | 
			
		||||
        abort(404)
 | 
			
		||||
    service = 'transkribus-htr-pipeline'
 | 
			
		||||
    service_manifest = SERVICES[service]
 | 
			
		||||
    version = request.args.get('version', service_manifest['latest_version'])
 | 
			
		||||
    if version not in service_manifest['versions']:
 | 
			
		||||
        abort(404)
 | 
			
		||||
    r = requests.get(
 | 
			
		||||
        'https://transkribus.eu/TrpServer/rest/models/text',
 | 
			
		||||
        headers={'Accept': 'application/json'}
 | 
			
		||||
    )
 | 
			
		||||
    if r.status_code != 200:
 | 
			
		||||
        abort(500)
 | 
			
		||||
    transkribus_htr_pipeline_models = r.json()['trpModelMetadata']
 | 
			
		||||
    transkribus_htr_pipeline_models.append({'modelId': 48513, 'name': 'Caroline Minuscle', 'language': 'lat', 'isoLanguages': ['lat']})
 | 
			
		||||
    form = CreateTranskribusHTRPipelineJobForm(
 | 
			
		||||
        prefix='create-job-form',
 | 
			
		||||
        transkribus_htr_pipeline_models=transkribus_htr_pipeline_models,
 | 
			
		||||
        version=version
 | 
			
		||||
    )
 | 
			
		||||
    if form.is_submitted():
 | 
			
		||||
        if not form.validate():
 | 
			
		||||
            response = {'errors': form.errors}
 | 
			
		||||
            return response, 400
 | 
			
		||||
        try:
 | 
			
		||||
            job = Job.create(
 | 
			
		||||
                title=form.title.data,
 | 
			
		||||
                description=form.description.data,
 | 
			
		||||
                service=service,
 | 
			
		||||
                service_args={
 | 
			
		||||
                    'binarization': form.binarization.data,
 | 
			
		||||
                    'model': form.model.data
 | 
			
		||||
                },
 | 
			
		||||
                service_version=form.version.data,
 | 
			
		||||
                user=current_user
 | 
			
		||||
            )
 | 
			
		||||
        except OSError:
 | 
			
		||||
            abort(500)
 | 
			
		||||
        try:
 | 
			
		||||
            JobInput.create(form.pdf.data, job=job)
 | 
			
		||||
        except (AttributeError, OSError):
 | 
			
		||||
            abort(500)
 | 
			
		||||
        job.status = JobStatus.SUBMITTED
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
        message = f'Job "<a href="{job.url}">{job.title}</a>" created'
 | 
			
		||||
        flash(message, 'job')
 | 
			
		||||
        return {}, 201, {'Location': job.url}
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'services/transkribus_htr_pipeline.html.j2',
 | 
			
		||||
        title=service_manifest['name'],
 | 
			
		||||
        form=form,
 | 
			
		||||
        transkribus_htr_pipeline_models=transkribus_htr_pipeline_models
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/spacy-nlp-pipeline', methods=['GET', 'POST'])
 | 
			
		||||
def spacy_nlp_pipeline():
 | 
			
		||||
    service = 'spacy-nlp-pipeline'
 | 
			
		||||
    service_manifest = SERVICES[service]
 | 
			
		||||
    version = request.args.get('version', SERVICES[service]['latest_version'])
 | 
			
		||||
    if version not in service_manifest['versions']:
 | 
			
		||||
        abort(404)
 | 
			
		||||
    form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version)
 | 
			
		||||
    spacy_nlp_pipeline_models = SpaCyNLPPipelineModel.query.all()
 | 
			
		||||
    user_spacy_nlp_pipeline_models_count = len(current_user.spacy_nlp_pipeline_models.all())
 | 
			
		||||
    if form.is_submitted():
 | 
			
		||||
        if not form.validate():
 | 
			
		||||
            response = {'errors': form.errors}
 | 
			
		||||
            return response, 400
 | 
			
		||||
        try:
 | 
			
		||||
            job = Job.create(
 | 
			
		||||
                title=form.title.data,
 | 
			
		||||
                description=form.description.data,
 | 
			
		||||
                service=service,
 | 
			
		||||
                service_args={
 | 
			
		||||
                    'encoding_detection': form.encoding_detection.data,
 | 
			
		||||
                    'model': form.model.data
 | 
			
		||||
                },
 | 
			
		||||
                service_version=form.version.data,
 | 
			
		||||
                user=current_user
 | 
			
		||||
            )
 | 
			
		||||
        except OSError:
 | 
			
		||||
            abort(500)
 | 
			
		||||
        try:
 | 
			
		||||
            JobInput.create(form.txt.data, job=job)
 | 
			
		||||
        except (AttributeError, OSError):
 | 
			
		||||
            abort(500)
 | 
			
		||||
        job.status = JobStatus.SUBMITTED
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
        message = f'Job "<a href="{job.url}">{job.title}</a>" created'
 | 
			
		||||
        flash(message, 'job')
 | 
			
		||||
        return {}, 201, {'Location': job.url}
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'services/spacy_nlp_pipeline.html.j2',
 | 
			
		||||
        title=service_manifest['name'],
 | 
			
		||||
        form=form,
 | 
			
		||||
        spacy_nlp_pipeline_models=spacy_nlp_pipeline_models,
 | 
			
		||||
        user_spacy_nlp_pipeline_models_count=user_spacy_nlp_pipeline_models_count
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/corpus-analysis')
 | 
			
		||||
def corpus_analysis():
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'services/corpus_analysis.html.j2',
 | 
			
		||||
        title='Corpus Analysis'
 | 
			
		||||
    )
 | 
			
		||||
							
								
								
									
										66
									
								
								app/blueprints/services/services.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								app/blueprints/services/services.yml
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,66 @@
 | 
			
		||||
# TODO: This could also be done via GitLab/GitHub APIs
 | 
			
		||||
file-setup-pipeline:
 | 
			
		||||
  name: 'File Setup Pipeline'
 | 
			
		||||
  publisher: 'Bielefeld University - CRC 1288 - INF'
 | 
			
		||||
  latest_version: '0.1.0'
 | 
			
		||||
  versions:
 | 
			
		||||
    0.1.0:
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup-pipeline/-/releases/v0.1.0'
 | 
			
		||||
tesseract-ocr-pipeline:
 | 
			
		||||
  name: 'Tesseract OCR Pipeline'
 | 
			
		||||
  publisher: 'Bielefeld University - CRC 1288 - INF'
 | 
			
		||||
  latest_version: '0.1.2'
 | 
			
		||||
  versions:
 | 
			
		||||
    0.1.0:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.0'
 | 
			
		||||
    0.1.1:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
        - 'ocropus_nlbin_threshold'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
 | 
			
		||||
    0.1.2:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
        - 'ocropus_nlbin_threshold'
 | 
			
		||||
      publishing_year: 2023
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
 | 
			
		||||
transkribus-htr-pipeline:
 | 
			
		||||
  name: 'Transkribus HTR Pipeline'
 | 
			
		||||
  publisher: 'Bielefeld University - CRC 1288 - INF'
 | 
			
		||||
  latest_version: '0.1.1'
 | 
			
		||||
  versions:
 | 
			
		||||
    0.1.0:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.0'
 | 
			
		||||
    0.1.1:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'binarization'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.1'
 | 
			
		||||
spacy-nlp-pipeline:
 | 
			
		||||
  name: 'SpaCy NLP Pipeline'
 | 
			
		||||
  publisher: 'Bielefeld University - CRC 1288 - INF'
 | 
			
		||||
  latest_version: '0.1.1'
 | 
			
		||||
  versions:
 | 
			
		||||
    0.1.0:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'encoding_detection'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0'
 | 
			
		||||
    0.1.1:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'encoding_detection'
 | 
			
		||||
      publishing_year: 2022
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1'
 | 
			
		||||
    0.1.2:
 | 
			
		||||
      methods:
 | 
			
		||||
        - 'encoding_detection'
 | 
			
		||||
      publishing_year: 2024
 | 
			
		||||
      url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'
 | 
			
		||||
		Reference in New Issue
	
	Block a user