From dc3709decbe6e82428f437195fcf99f6628c1e13 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Wed, 12 Oct 2022 15:10:55 +0200
Subject: [PATCH] Remove the TranskribusHTRPipelineModel and fetch data on
request.
---
app/cli.py | 5 +-
app/daemon/job_utils.py | 7 +--
app/models.py | 55 +------------------
app/services/forms.py | 13 +----
app/services/routes.py | 27 +++++----
.../services/transkribus_htr_pipeline.html.j2 | 12 ++--
migrations/versions/a3b727e3ff71_.py | 35 ++++++++++++
nopaque.py | 2 -
8 files changed, 64 insertions(+), 92 deletions(-)
create mode 100644 migrations/versions/a3b727e3ff71_.py
diff --git a/app/cli.py b/app/cli.py
index bfaf8754..54226707 100644
--- a/app/cli.py
+++ b/app/cli.py
@@ -5,8 +5,7 @@ import os
from app.models import (
Role,
User,
- TesseractOCRPipelineModel,
- TranskribusHTRPipelineModel
+ TesseractOCRPipelineModel
)
@@ -42,8 +41,6 @@ def register(app):
User.insert_defaults()
current_app.logger.info('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRPipelineModel.insert_defaults()
- current_app.logger.info('Insert/Update default TranskribusHTRPipelineModels')
- TranskribusHTRPipelineModel.insert_defaults()
@app.cli.group()
def converter():
diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py
index 0f74f3a0..32def73d 100644
--- a/app/daemon/job_utils.py
+++ b/app/daemon/job_utils.py
@@ -3,8 +3,7 @@ from app.models import (
Job,
JobResult,
JobStatus,
- TesseractOCRPipelineModel,
- TranskribusHTRPipelineModel
+ TesseractOCRPipelineModel
)
from datetime import datetime
from flask import current_app
@@ -61,8 +60,8 @@ def _create_job_service(job):
if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize'
elif job.service == 'transkribus-htr-pipeline':
- transkribus_htr_model = TranskribusHTRPipelineModel.query.get(job.service_args['model'])
- command += f' -m {transkribus_htr_model.transkribus_model_id}'
+ transkribus_htr_pipeline_model_id = job.service_args['model']
+ command += f' -m {transkribus_htr_pipeline_model_id}'
readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME')
command += f' --readcoop-username "{readcoop_username}"'
readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD')
diff --git a/app/models.py b/app/models.py
index beab7d6e..18188b49 100644
--- a/app/models.py
+++ b/app/models.py
@@ -1,5 +1,6 @@
from datetime import datetime, timedelta
from enum import Enum, IntEnum
+import re
from flask import current_app, url_for
from flask_hashids import HashidMixin
from flask_login import UserMixin
@@ -20,9 +21,6 @@ from app.converters.vrt import normalize_vrt_file
from app.email import create_message
-TRANSKRIBUS_HTR_MODELS = \
- json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text', params={'docType': 'handwritten'}).content)['trpModelMetadata'] # noqa
-
##############################################################################
# enums #
##############################################################################
@@ -279,12 +277,6 @@ class User(HashidMixin, UserMixin, db.Model):
cascade='all, delete-orphan',
lazy='dynamic'
)
- transkribus_htr_pipeline_models = db.relationship(
- 'TranskribusHTRPipelineModel',
- backref='user',
- cascade='all, delete-orphan',
- lazy='dynamic'
- )
corpora = db.relationship(
'Corpus',
backref='user',
@@ -622,51 +614,6 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
return _json
-class TranskribusHTRPipelineModel(HashidMixin, db.Model):
- __tablename__ = 'transkribus_htr_pipeline_models'
- # Primary key
- id = db.Column(db.Integer, primary_key=True)
- # Foreign keys
- user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
- # Fields
- shared = db.Column(db.Boolean, default=False)
- transkribus_model_id = db.Column(db.Integer)
- # Backrefs: user: User
-
- @staticmethod
- def insert_defaults():
- nopaque_user = User.query.filter_by(username='nopaque').first()
- # models = [
- # m for m in TRANSKRIBUS_HTR_MODELS if True
- # and 'creator' in m and m['creator'] == 'Transkribus Team'
- # and 'docType' in m and m['docType'] == 'handwritten'
- # ]
- for m in TRANSKRIBUS_HTR_MODELS:
- model = TranskribusHTRPipelineModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa
- if model is not None:
- model.shared = True
- model.transkribus_model_id = m['modelId']
- continue
- model = TranskribusHTRPipelineModel(
- transkribus_model_id=m['modelId'],
- shared=True,
- user=nopaque_user,
- )
- db.session.add(model)
- db.session.commit()
-
- def to_json(self, backrefs=False, relationships=False):
- _json = {
- 'id': self.hashid,
- 'user_id': self.user.hashid,
- 'shared': self.shared,
- 'transkribus_model_id': self.transkribus_model_id,
- }
- if backrefs:
- _json['user'] = self.user.to_json(backrefs=True)
- return _json
-
-
class JobInput(FileMixin, HashidMixin, db.Model):
__tablename__ = 'job_inputs'
# Primary key
diff --git a/app/services/forms.py b/app/services/forms.py
index ea1c1e70..5c0af906 100644
--- a/app/services/forms.py
+++ b/app/services/forms.py
@@ -10,11 +10,7 @@ from wtforms import (
ValidationError
)
from wtforms.validators import InputRequired, Length
-from app.models import (
- TRANSKRIBUS_HTR_MODELS,
- TesseractOCRPipelineModel,
- TranskribusHTRPipelineModel
-)
+from app.models import TesseractOCRPipelineModel
from . import SERVICES
@@ -107,6 +103,7 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
raise ValidationError('PDF files only!')
def __init__(self, *args, **kwargs):
+ transkribus_htr_pipeline_models = kwargs.pop('transkribus_htr_pipeline_models', [])
service_manifest = SERVICES['transkribus-htr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
@@ -118,12 +115,8 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
if 'binarization' in service_info['methods']:
if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled']
- models = [
- x for x in TranskribusHTRPipelineModel.query.filter().all()
- if x.shared == True or x.user == current_user
- ]
self.model.choices = [('', 'Choose your option')]
- self.model.choices += [(x.hashid, [y['name'] for y in TRANSKRIBUS_HTR_MODELS if y['modelId'] == x.transkribus_model_id ][0]) for x in models]
+ self.model.choices += [(x['modelId'], x['name']) for x in transkribus_htr_pipeline_models]
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
diff --git a/app/services/routes.py b/app/services/routes.py
index 4f019525..b34d0619 100644
--- a/app/services/routes.py
+++ b/app/services/routes.py
@@ -1,13 +1,12 @@
-from flask import abort, current_app, flash, Markup, render_template, request
+from flask import abort, current_app, flash, make_response, Markup, render_template, request
from flask_login import current_user, login_required
+import requests
from app import db, hashids
from app.models import (
Job,
JobInput,
JobStatus,
- TesseractOCRPipelineModel,
- TRANSKRIBUS_HTR_MODELS,
- TranskribusHTRPipelineModel
+ TesseractOCRPipelineModel
)
from . import bp, SERVICES
from .forms import (
@@ -117,7 +116,18 @@ def transkribus_htr_pipeline():
version = request.args.get('version', service_manifest['latest_version'])
if version not in service_manifest['versions']:
abort(404)
- form = CreateTranskribusHTRPipelineJobForm(prefix='create-job-form', version=version)
+ r = requests.get(
+ 'https://transkribus.eu/TrpServer/rest/models/text',
+ headers={'Accept': 'application/json'}
+ )
+ if r.status_code != 200:
+ abort(500)
+ transkribus_htr_pipeline_models = r.json()['trpModelMetadata']
+ form = CreateTranskribusHTRPipelineJobForm(
+ transkribus_htr_pipeline_models=transkribus_htr_pipeline_models,
+ prefix='create-job-form',
+ version=version
+ )
if form.is_submitted():
if not form.validate():
response = {'errors': form.errors}
@@ -129,7 +139,7 @@ def transkribus_htr_pipeline():
service=service,
service_args={
'binarization': form.binarization.data,
- 'model': hashids.decode(form.model.data)
+ 'model': form.model.data
},
service_version=form.version.data,
user=current_user
@@ -145,15 +155,10 @@ def transkribus_htr_pipeline():
message = Markup(f'Job "{job.title}" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
- transkribus_htr_pipeline_models = [
- x for x in TranskribusHTRPipelineModel.query.all()
- if x.shared == True or x.user == current_user
- ]
return render_template(
'services/transkribus_htr_pipeline.html.j2',
form=form,
title=service_manifest['name'],
- TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS,
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models
)
diff --git a/app/templates/services/transkribus_htr_pipeline.html.j2 b/app/templates/services/transkribus_htr_pipeline.html.j2
index 79519f3a..d54d9906 100644
--- a/app/templates/services/transkribus_htr_pipeline.html.j2
+++ b/app/templates/services/transkribus_htr_pipeline.html.j2
@@ -156,15 +156,13 @@
Transkribus HTR Pipeline models
-
- {% for m in transkribus_htr_pipeline_models %}
- -
- {% for m_info in TRANSKRIBUS_HTR_MODELS if m_info['modelId'] == m.transkribus_model_id %}
-
+
diff --git a/migrations/versions/a3b727e3ff71_.py b/migrations/versions/a3b727e3ff71_.py
new file mode 100644
index 00000000..a2fb37fb
--- /dev/null
+++ b/migrations/versions/a3b727e3ff71_.py
@@ -0,0 +1,35 @@
+"""Remove transkribus_htr_pipeline_models table
+
+Revision ID: a3b727e3ff71
+Revises: 63b2cc26a01f
+Create Date: 2022-10-12 13:08:19.065218
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'a3b727e3ff71'
+down_revision = '63b2cc26a01f'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.drop_table('transkribus_htr_pipeline_models')
+ # ### end Alembic commands ###
+
+
+def downgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.create_table('transkribus_htr_pipeline_models',
+ sa.Column('id', sa.INTEGER(), autoincrement=True, nullable=False),
+ sa.Column('user_id', sa.INTEGER(), autoincrement=False, nullable=True),
+ sa.Column('shared', sa.BOOLEAN(), autoincrement=False, nullable=True),
+ sa.Column('transkribus_model_id', sa.INTEGER(), autoincrement=False, nullable=True),
+ sa.ForeignKeyConstraint(['user_id'], ['users.id'], name='transkribus_htr_models_user_id_fkey'),
+ sa.PrimaryKeyConstraint('id', name='transkribus_htr_models_pkey')
+ )
+ # ### end Alembic commands ###
diff --git a/nopaque.py b/nopaque.py
index 457add2c..bff58ae1 100644
--- a/nopaque.py
+++ b/nopaque.py
@@ -13,7 +13,6 @@ from app.models import (
Permission,
Role,
TesseractOCRPipelineModel,
- TranskribusHTRPipelineModel,
User
) # noqa
from flask import Flask # noqa
@@ -43,7 +42,6 @@ def make_shell_context() -> Dict[str, Any]:
'Permission': Permission,
'Role': Role,
'TesseractOCRPipelineModel': TesseractOCRPipelineModel,
- 'TranskribusHTRPipelineModel': TranskribusHTRPipelineModel,
'User': User
}