Rename tables for pipeline models

This commit is contained in:
Patrick Jentsch 2022-10-12 10:23:05 +02:00
parent 2470c8dfef
commit 236d3e7ee4
14 changed files with 127 additions and 58 deletions

View File

@ -1,4 +1,3 @@
from flask import current_app
from flask_httpauth import HTTPBasicAuth, HTTPTokenAuth
from werkzeug.exceptions import Forbidden, Unauthorized
from app.models import User

View File

@ -4,8 +4,8 @@ from apifairy.decorators import body, other_responses
from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError
from app import db, hashids
from app.models import Job, JobInput, JobStatus, TesseractOCRModel
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRModelSchema
from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema
from .auth import auth_error_responses, token_auth
@ -14,8 +14,8 @@ job_schema = JobSchema()
jobs_schema = JobSchema(many=True)
spacy_nlp_pipeline_job_schema = SpaCyNLPPipelineJobSchema()
tesseract_ocr_pipeline_job_schema = TesseractOCRPipelineJobSchema()
tesseract_ocr_model_schema = TesseractOCRModelSchema()
tesseract_ocr_models_schema = TesseractOCRModelSchema(many=True)
tesseract_ocr_pipeline_model_schema = TesseractOCRPipelineModelSchema()
tesseract_ocr_pipeline_models_schema = TesseractOCRPipelineModelSchema(many=True)
@bp.route('', methods=['GET'])
@ -60,11 +60,11 @@ def create_tesseract_ocr_pipeline_job(args):
@bp.route('/tesseract-ocr-pipeline/models', methods=['GET'])
@authenticate(token_auth)
@response(tesseract_ocr_models_schema)
@response(tesseract_ocr_pipeline_models_schema)
@other_responses(auth_error_responses)
def get_tesseract_ocr_models():
"""Get all Tesseract OCR Models"""
return TesseractOCRModel.query.all()
return TesseractOCRPipelineModel.query.all()
@bp.route('/<hashid:job_id>', methods=['DELETE'])

View File

@ -3,7 +3,14 @@ from marshmallow import validate, validates, ValidationError
from marshmallow.decorators import post_dump
from app import ma
from app.auth import USERNAME_REGEX
from app.models import Job, JobStatus, TesseractOCRModel, Token, User, UserSettingJobStatusMailNotificationLevel
from app.models import (
Job,
JobStatus,
TesseractOCRPipelineModel,
Token,
User,
UserSettingJobStatusMailNotificationLevel
)
from app.services import SERVICES
@ -21,9 +28,9 @@ class TokenSchema(ma.SQLAlchemySchema):
refresh_token = ma.String()
class TesseractOCRModelSchema(ma.SQLAlchemySchema):
class TesseractOCRPipelineModelSchema(ma.SQLAlchemySchema):
class Meta:
model = TesseractOCRModel
model = TesseractOCRPipelineModel
ordered = True
hashid = ma.String(data_key='id', dump_only=True)

View File

@ -1,7 +1,7 @@
from apifairy import authenticate, body, response
from apifairy.decorators import other_responses
from flask import abort, Blueprint, current_app
from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError
from app import db
from app.email import create_message, send

View File

@ -2,7 +2,12 @@ from flask import current_app
from flask_migrate import upgrade
import click
import os
from app.models import Role, User, TesseractOCRModel, TranskribusHTRModel
from app.models import (
Role,
User,
TesseractOCRPipelineModel,
TranskribusHTRPipelineModel
)
def _make_default_dirs():
@ -35,10 +40,10 @@ def register(app):
Role.insert_defaults()
current_app.logger.info('Insert/Update default users')
User.insert_defaults()
current_app.logger.info('Insert/Update default TesseractOCRModels')
TesseractOCRModel.insert_defaults()
current_app.logger.info('Insert/Update default TranskribusHTRModels')
TranskribusHTRModel.insert_defaults()
current_app.logger.info('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRPipelineModel.insert_defaults()
current_app.logger.info('Insert/Update default TranskribusHTRPipelineModels')
TranskribusHTRPipelineModel.insert_defaults()
@app.cli.group()
def converter():

View File

@ -3,8 +3,8 @@ from app.models import (
Job,
JobResult,
JobStatus,
TesseractOCRModel,
TranskribusHTRModel
TesseractOCRPipelineModel,
TranskribusHTRPipelineModel
)
from datetime import datetime
from flask import current_app
@ -61,7 +61,7 @@ def _create_job_service(job):
if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize'
elif job.service == 'transkribus-htr-pipeline':
transkribus_htr_model = TranskribusHTRModel.query.get(job.service_args['model'])
transkribus_htr_model = TranskribusHTRPipelineModel.query.get(job.service_args['model'])
command += f' -m {transkribus_htr_model.transkribus_model_id}'
readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME')
command += f' --readcoop-username "{readcoop_username}"'
@ -96,7 +96,7 @@ def _create_job_service(job):
else:
job.status = JobStatus.FAILED
return
model = TesseractOCRModel.query.get(model_id)
model = TesseractOCRPipelineModel.query.get(model_id)
if model is None:
job.status = JobStatus.FAILED
return

View File

@ -23,7 +23,6 @@ from app.email import create_message
TRANSKRIBUS_HTR_MODELS = \
json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text', params={'docType': 'handwritten'}).content)['trpModelMetadata'] # noqa
##############################################################################
# enums #
##############################################################################
@ -274,14 +273,14 @@ class User(HashidMixin, UserMixin, db.Model):
last_seen = db.Column(db.DateTime())
# Backrefs: role: Role
# Relationships
tesseract_ocr_models = db.relationship(
'TesseractOCRModel',
tesseract_ocr_pipeline_models = db.relationship(
'TesseractOCRPipelineModel',
backref='user',
cascade='all, delete-orphan',
lazy='dynamic'
)
transkribus_htr_models = db.relationship(
'TranskribusHTRModel',
transkribus_htr_pipeline_models = db.relationship(
'TranskribusHTRPipelineModel',
backref='user',
cascade='all, delete-orphan',
lazy='dynamic'
@ -342,7 +341,7 @@ class User(HashidMixin, UserMixin, db.Model):
db.session.refresh(user)
try:
os.mkdir(user.path)
os.mkdir(os.path.join(user.path, 'tesseract_ocr_models'))
os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models'))
os.mkdir(os.path.join(user.path, 'corpora'))
os.mkdir(os.path.join(user.path, 'jobs'))
except OSError as e:
@ -518,14 +517,14 @@ class User(HashidMixin, UserMixin, db.Model):
x.hashid: x.to_json(relationships=True)
for x in self.jobs
}
_json['tesseract_ocr_models'] = {
_json['tesseract_ocr_pipeline_models'] = {
x.hashid: x.to_json(relationships=True)
for x in self.tesseract_ocr_models
for x in self.tesseract_ocr_pipeline_models
}
return _json
class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
__tablename__ = 'tesseract_ocr_models'
class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
__tablename__ = 'tesseract_ocr_pipeline_models'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
@ -546,7 +545,7 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
def path(self):
return os.path.join(
self.user.path,
'tesseract_ocr_models',
'tesseract_ocr_pipeline_models',
str(self.id)
)
@ -555,12 +554,12 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'TesseractOCRModel.defaults.yml'
'TesseractOCRPipelineModel.defaults.yml'
)
with open(defaults_file, 'r') as f:
defaults = yaml.safe_load(f)
for m in defaults:
model = TesseractOCRModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description']
@ -572,7 +571,7 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
model.title = m['title']
model.version = m['version']
continue
model = TesseractOCRModel(
model = TesseractOCRPipelineModel(
compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
publisher=m['publisher'],
@ -623,8 +622,8 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
return _json
class TranskribusHTRModel(HashidMixin, db.Model):
__tablename__ = 'transkribus_htr_models'
class TranskribusHTRPipelineModel(HashidMixin, db.Model):
__tablename__ = 'transkribus_htr_pipeline_models'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
@ -643,12 +642,12 @@ class TranskribusHTRModel(HashidMixin, db.Model):
# and 'docType' in m and m['docType'] == 'handwritten'
# ]
for m in TRANSKRIBUS_HTR_MODELS:
model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa
model = TranskribusHTRPipelineModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa
if model is not None:
model.shared = True
model.transkribus_model_id = m['modelId']
continue
model = TranskribusHTRModel(
model = TranskribusHTRPipelineModel(
transkribus_model_id=m['modelId'],
shared=True,
user=nopaque_user,

View File

@ -12,8 +12,8 @@ from wtforms import (
from wtforms.validators import InputRequired, Length
from app.models import (
TRANSKRIBUS_HTR_MODELS,
TesseractOCRModel,
TranskribusHTRModel
TesseractOCRPipelineModel,
TranskribusHTRPipelineModel
)
from . import SERVICES
@ -77,7 +77,7 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled']
models = [
x for x in TesseractOCRModel.query.filter().all()
x for x in TesseractOCRPipelineModel.query.filter().all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
]
self.model.choices = [('', 'Choose your option')]
@ -119,7 +119,7 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled']
models = [
x for x in TranskribusHTRModel.query.filter().all()
x for x in TranskribusHTRPipelineModel.query.filter().all()
if x.shared == True or x.user == current_user
]
self.model.choices = [('', 'Choose your option')]

View File

@ -5,9 +5,9 @@ from app.models import (
Job,
JobInput,
JobStatus,
TesseractOCRModel,
TesseractOCRPipelineModel,
TRANSKRIBUS_HTR_MODELS,
TranskribusHTRModel
TranskribusHTRPipelineModel
)
from . import bp, SERVICES
from .forms import (
@ -95,14 +95,14 @@ def tesseract_ocr_pipeline():
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
tesseract_ocr_models = [
x for x in TesseractOCRModel.query.all()
tesseract_ocr_pipeline_models = [
x for x in TesseractOCRPipelineModel.query.all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
]
return render_template(
'services/tesseract_ocr_pipeline.html.j2',
form=form,
tesseract_ocr_models=tesseract_ocr_models,
tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
title=service_manifest['name']
)
@ -145,8 +145,8 @@ def transkribus_htr_pipeline():
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
transkribus_htr_models = [
x for x in TranskribusHTRModel.query.all()
transkribus_htr_pipeline_models = [
x for x in TranskribusHTRPipelineModel.query.all()
if x.shared == True or x.user == current_user
]
return render_template(
@ -154,7 +154,7 @@ def transkribus_htr_pipeline():
form=form,
title=service_manifest['name'],
TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS,
transkribus_htr_models=transkribus_htr_models
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models
)

View File

@ -160,8 +160,8 @@
</tr>
</thead>
<tbody>
{% for m in tesseract_ocr_models %}
<tr id="tesseract-ocr-model-{{ m.hashid }}">
{% for m in tesseract_ocr_pipeline_models %}
<tr id="tesseract-ocr-pipeline-model-{{ m.hashid }}">
<td>{{ m.title }}</td>
{% if m.description == '' %}
<td>Description is not available.</td>

View File

@ -157,8 +157,8 @@
<div class="modal-content">
<h4>Transkribus HTR Pipeline models</h4>
<ul class="collapsible popout" id="transkribus-htr-models">
{% for m in transkribus_htr_models %}
<li id="transkribus-htr-model-{{ m.hashid }}">
{% for m in transkribus_htr_pipeline_models %}
<li id="transkribus-htr-pipeline-model-{{ m.hashid }}">
{% for m_info in TRANSKRIBUS_HTR_MODELS if m_info['modelId'] == m.transkribus_model_id %}
<div class="collapsible-header"><i class="material-icons">widgets</i>{{ m_info.name }}</div>
<div class="collapsible-body">

View File

@ -0,0 +1,59 @@
"""Rename pipeline model tables
Revision ID: 63b2cc26a01f
Revises: 260b57d5f4e7
Create Date: 2022-10-11 14:32:13.227364
"""
from alembic import op
from flask import current_app
import os
from app.models import User
# revision identifiers, used by Alembic.
revision = '63b2cc26a01f'
down_revision = '260b57d5f4e7'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
users = User.query.all()
for user in users:
old_tesseract_ocr_pipeline_model_path = os.path.join(
user.path,
'tesseract_ocr_models'
)
new_tesseract_ocr_pipeline_model_path = os.path.join(
user.path,
'tesseract_ocr_pipeline_models'
)
os.rename(
old_tesseract_ocr_pipeline_model_path,
new_tesseract_ocr_pipeline_model_path
)
op.rename_table('tesseract_ocr_models', 'tesseract_ocr_pipeline_models')
op.rename_table('transkribus_htr_models', 'transkribus_htr_pipeline_models')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
users = User.query.all()
for user in users:
old_tesseract_ocr_pipeline_model_path = os.path.join(
user.path,
'tesseract_ocr_models'
)
new_tesseract_ocr_pipeline_model_path = os.path.join(
user.path,
'tesseract_ocr_pipeline_models'
)
os.rename(
new_tesseract_ocr_pipeline_model_path,
old_tesseract_ocr_pipeline_model_path
)
op.rename_table('tesseract_ocr_pipeline_models', 'tesseract_ocr_models')
op.rename_table('transkribus_htr_pipeline_models', 'transkribus_htr_models')
# ### end Alembic commands ###

View File

@ -12,8 +12,8 @@ from app.models import (
JobResult,
Permission,
Role,
TesseractOCRModel,
TranskribusHTRModel,
TesseractOCRPipelineModel,
TranskribusHTRPipelineModel,
User
) # noqa
from flask import Flask # noqa
@ -42,8 +42,8 @@ def make_shell_context() -> Dict[str, Any]:
'JobResult': JobResult,
'Permission': Permission,
'Role': Role,
'TesseractOCRModel': TesseractOCRModel,
'TranskribusHTRModel': TranskribusHTRModel,
'TesseractOCRPipelineModel': TesseractOCRPipelineModel,
'TranskribusHTRPipelineModel': TranskribusHTRPipelineModel,
'User': User
}