Remove the TranskribusHTRPipelineModel and fetch data on request.

This commit is contained in:
Patrick Jentsch 2022-10-12 15:10:55 +02:00
parent 236d3e7ee4
commit dc3709decb
8 changed files with 64 additions and 92 deletions

View File

@ -5,8 +5,7 @@ import os
from app.models import (
Role,
User,
TesseractOCRPipelineModel,
TranskribusHTRPipelineModel
TesseractOCRPipelineModel
)
@ -42,8 +41,6 @@ def register(app):
User.insert_defaults()
current_app.logger.info('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRPipelineModel.insert_defaults()
current_app.logger.info('Insert/Update default TranskribusHTRPipelineModels')
TranskribusHTRPipelineModel.insert_defaults()
@app.cli.group()
def converter():

View File

@ -3,8 +3,7 @@ from app.models import (
Job,
JobResult,
JobStatus,
TesseractOCRPipelineModel,
TranskribusHTRPipelineModel
TesseractOCRPipelineModel
)
from datetime import datetime
from flask import current_app
@ -61,8 +60,8 @@ def _create_job_service(job):
if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize'
elif job.service == 'transkribus-htr-pipeline':
transkribus_htr_model = TranskribusHTRPipelineModel.query.get(job.service_args['model'])
command += f' -m {transkribus_htr_model.transkribus_model_id}'
transkribus_htr_pipeline_model_id = job.service_args['model']
command += f' -m {transkribus_htr_pipeline_model_id}'
readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME')
command += f' --readcoop-username "{readcoop_username}"'
readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD')

View File

@ -1,5 +1,6 @@
from datetime import datetime, timedelta
from enum import Enum, IntEnum
import re
from flask import current_app, url_for
from flask_hashids import HashidMixin
from flask_login import UserMixin
@ -20,9 +21,6 @@ from app.converters.vrt import normalize_vrt_file
from app.email import create_message
TRANSKRIBUS_HTR_MODELS = \
json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text', params={'docType': 'handwritten'}).content)['trpModelMetadata'] # noqa
##############################################################################
# enums #
##############################################################################
@ -279,12 +277,6 @@ class User(HashidMixin, UserMixin, db.Model):
cascade='all, delete-orphan',
lazy='dynamic'
)
transkribus_htr_pipeline_models = db.relationship(
'TranskribusHTRPipelineModel',
backref='user',
cascade='all, delete-orphan',
lazy='dynamic'
)
corpora = db.relationship(
'Corpus',
backref='user',
@ -622,51 +614,6 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
return _json
class TranskribusHTRPipelineModel(HashidMixin, db.Model):
__tablename__ = 'transkribus_htr_pipeline_models'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
shared = db.Column(db.Boolean, default=False)
transkribus_model_id = db.Column(db.Integer)
# Backrefs: user: User
@staticmethod
def insert_defaults():
nopaque_user = User.query.filter_by(username='nopaque').first()
# models = [
# m for m in TRANSKRIBUS_HTR_MODELS if True
# and 'creator' in m and m['creator'] == 'Transkribus Team'
# and 'docType' in m and m['docType'] == 'handwritten'
# ]
for m in TRANSKRIBUS_HTR_MODELS:
model = TranskribusHTRPipelineModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa
if model is not None:
model.shared = True
model.transkribus_model_id = m['modelId']
continue
model = TranskribusHTRPipelineModel(
transkribus_model_id=m['modelId'],
shared=True,
user=nopaque_user,
)
db.session.add(model)
db.session.commit()
def to_json(self, backrefs=False, relationships=False):
_json = {
'id': self.hashid,
'user_id': self.user.hashid,
'shared': self.shared,
'transkribus_model_id': self.transkribus_model_id,
}
if backrefs:
_json['user'] = self.user.to_json(backrefs=True)
return _json
class JobInput(FileMixin, HashidMixin, db.Model):
__tablename__ = 'job_inputs'
# Primary key

View File

@ -10,11 +10,7 @@ from wtforms import (
ValidationError
)
from wtforms.validators import InputRequired, Length
from app.models import (
TRANSKRIBUS_HTR_MODELS,
TesseractOCRPipelineModel,
TranskribusHTRPipelineModel
)
from app.models import TesseractOCRPipelineModel
from . import SERVICES
@ -107,6 +103,7 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
raise ValidationError('PDF files only!')
def __init__(self, *args, **kwargs):
transkribus_htr_pipeline_models = kwargs.pop('transkribus_htr_pipeline_models', [])
service_manifest = SERVICES['transkribus-htr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
@ -118,12 +115,8 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
if 'binarization' in service_info['methods']:
if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled']
models = [
x for x in TranskribusHTRPipelineModel.query.filter().all()
if x.shared == True or x.user == current_user
]
self.model.choices = [('', 'Choose your option')]
self.model.choices += [(x.hashid, [y['name'] for y in TRANSKRIBUS_HTR_MODELS if y['modelId'] == x.transkribus_model_id ][0]) for x in models]
self.model.choices += [(x['modelId'], x['name']) for x in transkribus_htr_pipeline_models]
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version

View File

@ -1,13 +1,12 @@
from flask import abort, current_app, flash, Markup, render_template, request
from flask import abort, current_app, flash, make_response, Markup, render_template, request
from flask_login import current_user, login_required
import requests
from app import db, hashids
from app.models import (
Job,
JobInput,
JobStatus,
TesseractOCRPipelineModel,
TRANSKRIBUS_HTR_MODELS,
TranskribusHTRPipelineModel
TesseractOCRPipelineModel
)
from . import bp, SERVICES
from .forms import (
@ -117,7 +116,18 @@ def transkribus_htr_pipeline():
version = request.args.get('version', service_manifest['latest_version'])
if version not in service_manifest['versions']:
abort(404)
form = CreateTranskribusHTRPipelineJobForm(prefix='create-job-form', version=version)
r = requests.get(
'https://transkribus.eu/TrpServer/rest/models/text',
headers={'Accept': 'application/json'}
)
if r.status_code != 200:
abort(500)
transkribus_htr_pipeline_models = r.json()['trpModelMetadata']
form = CreateTranskribusHTRPipelineJobForm(
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models,
prefix='create-job-form',
version=version
)
if form.is_submitted():
if not form.validate():
response = {'errors': form.errors}
@ -129,7 +139,7 @@ def transkribus_htr_pipeline():
service=service,
service_args={
'binarization': form.binarization.data,
'model': hashids.decode(form.model.data)
'model': form.model.data
},
service_version=form.version.data,
user=current_user
@ -145,15 +155,10 @@ def transkribus_htr_pipeline():
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
transkribus_htr_pipeline_models = [
x for x in TranskribusHTRPipelineModel.query.all()
if x.shared == True or x.user == current_user
]
return render_template(
'services/transkribus_htr_pipeline.html.j2',
form=form,
title=service_manifest['name'],
TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS,
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models
)

View File

@ -156,15 +156,13 @@
<div id="models-modal" class="modal">
<div class="modal-content">
<h4>Transkribus HTR Pipeline models</h4>
<ul class="collapsible popout" id="transkribus-htr-models">
{% for m in transkribus_htr_pipeline_models %}
<li id="transkribus-htr-pipeline-model-{{ m.hashid }}">
{% for m_info in TRANSKRIBUS_HTR_MODELS if m_info['modelId'] == m.transkribus_model_id %}
<div class="collapsible-header"><i class="material-icons">widgets</i>{{ m_info.name }}</div>
<ul class="collapsible popout" id="transkribus-htr-pipeline-models">
{% for x in transkribus_htr_pipeline_models %}
<li id="transkribus-htr-pipeline-model-{{ x.modelId }}">
<div class="collapsible-header"><i class="material-icons">widgets</i>{{ x.name }}</div>
<div class="collapsible-body">
{{ m_info|tojson }}
{{ x|tojson }}
</div>
{% endfor %}
</li>
{% endfor %}
</ul>

View File

@ -0,0 +1,35 @@
"""Remove transkribus_htr_pipeline_models table
Revision ID: a3b727e3ff71
Revises: 63b2cc26a01f
Create Date: 2022-10-12 13:08:19.065218
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'a3b727e3ff71'
down_revision = '63b2cc26a01f'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('transkribus_htr_pipeline_models')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('transkribus_htr_pipeline_models',
sa.Column('id', sa.INTEGER(), autoincrement=True, nullable=False),
sa.Column('user_id', sa.INTEGER(), autoincrement=False, nullable=True),
sa.Column('shared', sa.BOOLEAN(), autoincrement=False, nullable=True),
sa.Column('transkribus_model_id', sa.INTEGER(), autoincrement=False, nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], name='transkribus_htr_models_user_id_fkey'),
sa.PrimaryKeyConstraint('id', name='transkribus_htr_models_pkey')
)
# ### end Alembic commands ###

View File

@ -13,7 +13,6 @@ from app.models import (
Permission,
Role,
TesseractOCRPipelineModel,
TranskribusHTRPipelineModel,
User
) # noqa
from flask import Flask # noqa
@ -43,7 +42,6 @@ def make_shell_context() -> Dict[str, Any]:
'Permission': Permission,
'Role': Role,
'TesseractOCRPipelineModel': TesseractOCRPipelineModel,
'TranskribusHTRPipelineModel': TranskribusHTRPipelineModel,
'User': User
}