mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-03 20:02:47 +00:00 
			
		
		
		
	Contribution Package Tesseract OCR
This commit is contained in:
		@@ -1,3 +1,4 @@
 | 
			
		||||
from xml.dom import ValidationErr
 | 
			
		||||
from flask_wtf import FlaskForm
 | 
			
		||||
from flask_wtf.file import FileField, FileRequired
 | 
			
		||||
from wtforms import (
 | 
			
		||||
@@ -5,13 +6,13 @@ from wtforms import (
 | 
			
		||||
    StringField,
 | 
			
		||||
    SubmitField,
 | 
			
		||||
    SelectMultipleField,
 | 
			
		||||
    IntegerField
 | 
			
		||||
    IntegerField,
 | 
			
		||||
    ValidationError
 | 
			
		||||
)
 | 
			
		||||
from wtforms.validators import InputRequired, Length
 | 
			
		||||
from app.services import SERVICES
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TesseractOCRModelContributionForm(FlaskForm):
 | 
			
		||||
class CreateContributionBaseForm(FlaskForm):
 | 
			
		||||
    title = StringField(
 | 
			
		||||
        'Title',
 | 
			
		||||
        validators=[InputRequired(), Length(max=64)]
 | 
			
		||||
@@ -24,9 +25,6 @@ class TesseractOCRModelContributionForm(FlaskForm):
 | 
			
		||||
        'Version',
 | 
			
		||||
        validators=[InputRequired(), Length(max=16)]
 | 
			
		||||
    )
 | 
			
		||||
    compatible_service_versions = SelectMultipleField(
 | 
			
		||||
        'Compatible service versions'
 | 
			
		||||
    )
 | 
			
		||||
    publisher = StringField(
 | 
			
		||||
        'Publisher',
 | 
			
		||||
        validators=[InputRequired(), Length(max=128)]
 | 
			
		||||
@@ -43,10 +41,22 @@ class TesseractOCRModelContributionForm(FlaskForm):
 | 
			
		||||
        'Publishing year',
 | 
			
		||||
        validators=[InputRequired()]
 | 
			
		||||
    )
 | 
			
		||||
    shared = BooleanField('Shared', validators=[InputRequired()])
 | 
			
		||||
    model_file = FileField('File',validators=[FileRequired()])
 | 
			
		||||
    shared = BooleanField(
 | 
			
		||||
        'Shared'
 | 
			
		||||
    )
 | 
			
		||||
    submit = SubmitField()
 | 
			
		||||
 | 
			
		||||
class TesseractOCRModelContributionForm(CreateContributionBaseForm):
 | 
			
		||||
    tesseract_model_file = FileField(
 | 
			
		||||
        'File',
 | 
			
		||||
        validators=[FileRequired()]
 | 
			
		||||
    )
 | 
			
		||||
    compatible_service_versions = SelectMultipleField(
 | 
			
		||||
        'Compatible service versions'
 | 
			
		||||
    )
 | 
			
		||||
    def validate_traineddata(self, field):
 | 
			
		||||
        if field.data.mimetype != '.traineddata':
 | 
			
		||||
            raise ValidationError('traineddata files only!')
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        service_manifest = SERVICES['tesseract-ocr-pipeline']
 | 
			
		||||
@@ -56,3 +66,17 @@ class TesseractOCRModelContributionForm(FlaskForm):
 | 
			
		||||
            (x, x) for x in service_manifest['versions'].keys()
 | 
			
		||||
        ]
 | 
			
		||||
        self.compatible_service_versions.default = ''
 | 
			
		||||
 | 
			
		||||
class TesseractOCRModelEditForm(CreateContributionBaseForm):
 | 
			
		||||
    def prefill(self, model_file):
 | 
			
		||||
        ''' Pre-fill the form with data of an exististing corpus file '''
 | 
			
		||||
        self.title.data = model_file.title
 | 
			
		||||
        self.description.data = model_file.description
 | 
			
		||||
        self.publisher.data = model_file.publisher
 | 
			
		||||
        self.publishing_year.data = model_file.publishing_year
 | 
			
		||||
        self.publisher_url.data = model_file.publisher_url
 | 
			
		||||
        self.publishing_url.data = model_file.publishing_url
 | 
			
		||||
        self.version.data = model_file.version
 | 
			
		||||
        self.shared.data = model_file.shared
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,11 @@
 | 
			
		||||
from flask import abort, flash, Markup, render_template, url_for
 | 
			
		||||
from flask_login import login_required
 | 
			
		||||
from flask import abort, current_app, flash, Markup, redirect, render_template, url_for
 | 
			
		||||
from flask_login import login_required, current_user
 | 
			
		||||
from threading import Thread
 | 
			
		||||
from app import db
 | 
			
		||||
from app.decorators import permission_required
 | 
			
		||||
from app.decorators import admin_required, permission_required 
 | 
			
		||||
from app.models import TesseractOCRPipelineModel, Permission
 | 
			
		||||
from . import bp
 | 
			
		||||
from .forms import TesseractOCRModelContributionForm
 | 
			
		||||
from .forms import TesseractOCRModelContributionForm, TesseractOCRModelEditForm
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.before_request
 | 
			
		||||
@@ -14,13 +15,77 @@ def before_request():
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('')
 | 
			
		||||
@bp.route('/')
 | 
			
		||||
@login_required
 | 
			
		||||
@admin_required
 | 
			
		||||
def contributions():
 | 
			
		||||
    pass
 | 
			
		||||
    tesseract_ocr_user_models = [
 | 
			
		||||
        x for x in current_user.tesseract_ocr_pipeline_models
 | 
			
		||||
    ]
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'contributions/contribution_overview.html.j2',
 | 
			
		||||
        tesseractOCRUserModels=tesseract_ocr_user_models,
 | 
			
		||||
        userId = current_user.hashid,
 | 
			
		||||
        title='Contribution Overview'
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
 | 
			
		||||
@login_required
 | 
			
		||||
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
 | 
			
		||||
    tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404(
 | 
			
		||||
        tesseract_ocr_pipeline_model_id
 | 
			
		||||
    )
 | 
			
		||||
    form = TesseractOCRModelEditForm(prefix='tesseract-ocr-model-edit-form')
 | 
			
		||||
    if form.validate_on_submit():
 | 
			
		||||
        if tesseract_ocr_pipeline_model.title != form.title.data:
 | 
			
		||||
            tesseract_ocr_pipeline_model.title = form.title.data
 | 
			
		||||
        if tesseract_ocr_pipeline_model.description != form.description.data:
 | 
			
		||||
            tesseract_ocr_pipeline_model.description = form.description.data
 | 
			
		||||
        if tesseract_ocr_pipeline_model.publisher != form.publisher.data:
 | 
			
		||||
            tesseract_ocr_pipeline_model.publisher = form.publisher.data
 | 
			
		||||
        if tesseract_ocr_pipeline_model.publishing_year != form.publishing_year.data:
 | 
			
		||||
            tesseract_ocr_pipeline_model.publishing_year = form.publishing_year.data
 | 
			
		||||
        if tesseract_ocr_pipeline_model.publisher_url != form.publisher_url.data:
 | 
			
		||||
            tesseract_ocr_pipeline_model.publisher_url = form.publisher_url.data
 | 
			
		||||
        if tesseract_ocr_pipeline_model.publishing_url != form.publishing_url.data:
 | 
			
		||||
            tesseract_ocr_pipeline_model.publishing_url = form.publishing_url.data
 | 
			
		||||
        if tesseract_ocr_pipeline_model.version != form.version.data:
 | 
			
		||||
            tesseract_ocr_pipeline_model.version = form.version.data
 | 
			
		||||
        if tesseract_ocr_pipeline_model.shared != form.shared.data:
 | 
			
		||||
            tesseract_ocr_pipeline_model.shared = form.shared.data
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
        message = Markup(f'Model "<a href="contribute/{tesseract_ocr_pipeline_model.hashid}">{tesseract_ocr_pipeline_model.title}</a>" updated')
 | 
			
		||||
        flash(message, category='corpus')
 | 
			
		||||
        return {}, 201, {'Location': url_for('contributions.contributions')}
 | 
			
		||||
    form.prefill(tesseract_ocr_pipeline_model)
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'contributions/tesseract_ocr_pipeline_model.html.j2',
 | 
			
		||||
        tesseract_ocr_pipeline_model=tesseract_ocr_pipeline_model,
 | 
			
		||||
        form=form,
 | 
			
		||||
        title='Edit your Tesseract OCR model'
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
@bp.route('/tesseract-ocr-pipeline-models', methods=['GET', 'POST'])
 | 
			
		||||
def tesseract_ocr_pipeline_models():
 | 
			
		||||
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
 | 
			
		||||
@login_required
 | 
			
		||||
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
 | 
			
		||||
    def _delete_tesseract_model(app, tesseract_ocr_pipeline_model_id):
 | 
			
		||||
        with app.app_context():
 | 
			
		||||
            model = TesseractOCRPipelineModel.query.get(tesseract_ocr_pipeline_model_id)
 | 
			
		||||
            model.delete()
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
    
 | 
			
		||||
    model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
 | 
			
		||||
    if not (model.user == current_user or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    thread = Thread(
 | 
			
		||||
        target=_delete_tesseract_model,
 | 
			
		||||
        args=(current_app._get_current_object(), tesseract_ocr_pipeline_model_id)
 | 
			
		||||
    )
 | 
			
		||||
    thread.start()
 | 
			
		||||
    return {}, 202
 | 
			
		||||
 | 
			
		||||
@bp.route('/add-tesseract-ocr-pipeline-model', methods=['GET', 'POST'])
 | 
			
		||||
def add_tesseract_ocr_pipeline_model():
 | 
			
		||||
    form = TesseractOCRModelContributionForm(
 | 
			
		||||
        prefix='contribute-tesseract-ocr-pipeline-model-form'
 | 
			
		||||
    )
 | 
			
		||||
@@ -30,7 +95,7 @@ def tesseract_ocr_pipeline_models():
 | 
			
		||||
            return response, 400
 | 
			
		||||
        try:
 | 
			
		||||
            tesseract_ocr_model = TesseractOCRPipelineModel.create(
 | 
			
		||||
                form.file.data,
 | 
			
		||||
                form.tesseract_model_file.data,
 | 
			
		||||
                compatible_service_versions=form.compatible_service_versions.data,
 | 
			
		||||
                description=form.description.data,
 | 
			
		||||
                publisher=form.publisher.data,
 | 
			
		||||
@@ -39,7 +104,8 @@ def tesseract_ocr_pipeline_models():
 | 
			
		||||
                publishing_year=form.publishing_year.data,
 | 
			
		||||
                shared=form.shared.data,
 | 
			
		||||
                title=form.title.data,
 | 
			
		||||
                version=form.version.data
 | 
			
		||||
                version=form.version.data,
 | 
			
		||||
                user=current_user
 | 
			
		||||
            )
 | 
			
		||||
        except OSError:
 | 
			
		||||
            abort(500)
 | 
			
		||||
@@ -47,8 +113,13 @@ def tesseract_ocr_pipeline_models():
 | 
			
		||||
        message = Markup(f'Model "{tesseract_ocr_model.title}" created')
 | 
			
		||||
        flash(message)
 | 
			
		||||
        return {}, 201, {'Location': url_for('contributions.contributions')}
 | 
			
		||||
    tesseract_ocr_pipeline_models = [
 | 
			
		||||
        x for x in TesseractOCRPipelineModel.query.all()
 | 
			
		||||
    ]
 | 
			
		||||
    
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'contributions/contribute.html.j2',
 | 
			
		||||
        'contributions/contribute_tesseract_ocr_models.html.j2',
 | 
			
		||||
        form=form,
 | 
			
		||||
        title='Contribution'
 | 
			
		||||
        tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
 | 
			
		||||
        title='Tesseract OCR Model Contribution'
 | 
			
		||||
    )
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user