mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-12 00:50:40 +00:00
Contribution Package Tesseract OCR
This commit is contained in:
@ -1,3 +1,4 @@
|
||||
from xml.dom import ValidationErr
|
||||
from flask_wtf import FlaskForm
|
||||
from flask_wtf.file import FileField, FileRequired
|
||||
from wtforms import (
|
||||
@ -5,13 +6,13 @@ from wtforms import (
|
||||
StringField,
|
||||
SubmitField,
|
||||
SelectMultipleField,
|
||||
IntegerField
|
||||
IntegerField,
|
||||
ValidationError
|
||||
)
|
||||
from wtforms.validators import InputRequired, Length
|
||||
from app.services import SERVICES
|
||||
|
||||
|
||||
class TesseractOCRModelContributionForm(FlaskForm):
|
||||
class CreateContributionBaseForm(FlaskForm):
|
||||
title = StringField(
|
||||
'Title',
|
||||
validators=[InputRequired(), Length(max=64)]
|
||||
@ -24,9 +25,6 @@ class TesseractOCRModelContributionForm(FlaskForm):
|
||||
'Version',
|
||||
validators=[InputRequired(), Length(max=16)]
|
||||
)
|
||||
compatible_service_versions = SelectMultipleField(
|
||||
'Compatible service versions'
|
||||
)
|
||||
publisher = StringField(
|
||||
'Publisher',
|
||||
validators=[InputRequired(), Length(max=128)]
|
||||
@ -43,10 +41,22 @@ class TesseractOCRModelContributionForm(FlaskForm):
|
||||
'Publishing year',
|
||||
validators=[InputRequired()]
|
||||
)
|
||||
shared = BooleanField('Shared', validators=[InputRequired()])
|
||||
model_file = FileField('File',validators=[FileRequired()])
|
||||
shared = BooleanField(
|
||||
'Shared'
|
||||
)
|
||||
submit = SubmitField()
|
||||
|
||||
class TesseractOCRModelContributionForm(CreateContributionBaseForm):
|
||||
tesseract_model_file = FileField(
|
||||
'File',
|
||||
validators=[FileRequired()]
|
||||
)
|
||||
compatible_service_versions = SelectMultipleField(
|
||||
'Compatible service versions'
|
||||
)
|
||||
def validate_traineddata(self, field):
|
||||
if field.data.mimetype != '.traineddata':
|
||||
raise ValidationError('traineddata files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
service_manifest = SERVICES['tesseract-ocr-pipeline']
|
||||
@ -56,3 +66,17 @@ class TesseractOCRModelContributionForm(FlaskForm):
|
||||
(x, x) for x in service_manifest['versions'].keys()
|
||||
]
|
||||
self.compatible_service_versions.default = ''
|
||||
|
||||
class TesseractOCRModelEditForm(CreateContributionBaseForm):
|
||||
def prefill(self, model_file):
|
||||
''' Pre-fill the form with data of an exististing corpus file '''
|
||||
self.title.data = model_file.title
|
||||
self.description.data = model_file.description
|
||||
self.publisher.data = model_file.publisher
|
||||
self.publishing_year.data = model_file.publishing_year
|
||||
self.publisher_url.data = model_file.publisher_url
|
||||
self.publishing_url.data = model_file.publishing_url
|
||||
self.version.data = model_file.version
|
||||
self.shared.data = model_file.shared
|
||||
|
||||
|
||||
|
@ -1,10 +1,11 @@
|
||||
from flask import abort, flash, Markup, render_template, url_for
|
||||
from flask_login import login_required
|
||||
from flask import abort, current_app, flash, Markup, redirect, render_template, url_for
|
||||
from flask_login import login_required, current_user
|
||||
from threading import Thread
|
||||
from app import db
|
||||
from app.decorators import permission_required
|
||||
from app.decorators import admin_required, permission_required
|
||||
from app.models import TesseractOCRPipelineModel, Permission
|
||||
from . import bp
|
||||
from .forms import TesseractOCRModelContributionForm
|
||||
from .forms import TesseractOCRModelContributionForm, TesseractOCRModelEditForm
|
||||
|
||||
|
||||
@bp.before_request
|
||||
@ -14,13 +15,77 @@ def before_request():
|
||||
pass
|
||||
|
||||
|
||||
@bp.route('')
|
||||
@bp.route('/')
|
||||
@login_required
|
||||
@admin_required
|
||||
def contributions():
|
||||
pass
|
||||
tesseract_ocr_user_models = [
|
||||
x for x in current_user.tesseract_ocr_pipeline_models
|
||||
]
|
||||
return render_template(
|
||||
'contributions/contribution_overview.html.j2',
|
||||
tesseractOCRUserModels=tesseract_ocr_user_models,
|
||||
userId = current_user.hashid,
|
||||
title='Contribution Overview'
|
||||
)
|
||||
|
||||
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
|
||||
tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404(
|
||||
tesseract_ocr_pipeline_model_id
|
||||
)
|
||||
form = TesseractOCRModelEditForm(prefix='tesseract-ocr-model-edit-form')
|
||||
if form.validate_on_submit():
|
||||
if tesseract_ocr_pipeline_model.title != form.title.data:
|
||||
tesseract_ocr_pipeline_model.title = form.title.data
|
||||
if tesseract_ocr_pipeline_model.description != form.description.data:
|
||||
tesseract_ocr_pipeline_model.description = form.description.data
|
||||
if tesseract_ocr_pipeline_model.publisher != form.publisher.data:
|
||||
tesseract_ocr_pipeline_model.publisher = form.publisher.data
|
||||
if tesseract_ocr_pipeline_model.publishing_year != form.publishing_year.data:
|
||||
tesseract_ocr_pipeline_model.publishing_year = form.publishing_year.data
|
||||
if tesseract_ocr_pipeline_model.publisher_url != form.publisher_url.data:
|
||||
tesseract_ocr_pipeline_model.publisher_url = form.publisher_url.data
|
||||
if tesseract_ocr_pipeline_model.publishing_url != form.publishing_url.data:
|
||||
tesseract_ocr_pipeline_model.publishing_url = form.publishing_url.data
|
||||
if tesseract_ocr_pipeline_model.version != form.version.data:
|
||||
tesseract_ocr_pipeline_model.version = form.version.data
|
||||
if tesseract_ocr_pipeline_model.shared != form.shared.data:
|
||||
tesseract_ocr_pipeline_model.shared = form.shared.data
|
||||
db.session.commit()
|
||||
message = Markup(f'Model "<a href="contribute/{tesseract_ocr_pipeline_model.hashid}">{tesseract_ocr_pipeline_model.title}</a>" updated')
|
||||
flash(message, category='corpus')
|
||||
return {}, 201, {'Location': url_for('contributions.contributions')}
|
||||
form.prefill(tesseract_ocr_pipeline_model)
|
||||
return render_template(
|
||||
'contributions/tesseract_ocr_pipeline_model.html.j2',
|
||||
tesseract_ocr_pipeline_model=tesseract_ocr_pipeline_model,
|
||||
form=form,
|
||||
title='Edit your Tesseract OCR model'
|
||||
)
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models', methods=['GET', 'POST'])
|
||||
def tesseract_ocr_pipeline_models():
|
||||
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
|
||||
@login_required
|
||||
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
||||
def _delete_tesseract_model(app, tesseract_ocr_pipeline_model_id):
|
||||
with app.app_context():
|
||||
model = TesseractOCRPipelineModel.query.get(tesseract_ocr_pipeline_model_id)
|
||||
model.delete()
|
||||
db.session.commit()
|
||||
|
||||
model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||
if not (model.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = Thread(
|
||||
target=_delete_tesseract_model,
|
||||
args=(current_app._get_current_object(), tesseract_ocr_pipeline_model_id)
|
||||
)
|
||||
thread.start()
|
||||
return {}, 202
|
||||
|
||||
@bp.route('/add-tesseract-ocr-pipeline-model', methods=['GET', 'POST'])
|
||||
def add_tesseract_ocr_pipeline_model():
|
||||
form = TesseractOCRModelContributionForm(
|
||||
prefix='contribute-tesseract-ocr-pipeline-model-form'
|
||||
)
|
||||
@ -30,7 +95,7 @@ def tesseract_ocr_pipeline_models():
|
||||
return response, 400
|
||||
try:
|
||||
tesseract_ocr_model = TesseractOCRPipelineModel.create(
|
||||
form.file.data,
|
||||
form.tesseract_model_file.data,
|
||||
compatible_service_versions=form.compatible_service_versions.data,
|
||||
description=form.description.data,
|
||||
publisher=form.publisher.data,
|
||||
@ -39,7 +104,8 @@ def tesseract_ocr_pipeline_models():
|
||||
publishing_year=form.publishing_year.data,
|
||||
shared=form.shared.data,
|
||||
title=form.title.data,
|
||||
version=form.version.data
|
||||
version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
@ -47,8 +113,13 @@ def tesseract_ocr_pipeline_models():
|
||||
message = Markup(f'Model "{tesseract_ocr_model.title}" created')
|
||||
flash(message)
|
||||
return {}, 201, {'Location': url_for('contributions.contributions')}
|
||||
tesseract_ocr_pipeline_models = [
|
||||
x for x in TesseractOCRPipelineModel.query.all()
|
||||
]
|
||||
|
||||
return render_template(
|
||||
'contributions/contribute.html.j2',
|
||||
'contributions/contribute_tesseract_ocr_models.html.j2',
|
||||
form=form,
|
||||
title='Contribution'
|
||||
tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
|
||||
title='Tesseract OCR Model Contribution'
|
||||
)
|
||||
|
Reference in New Issue
Block a user