Rename tables for pipeline models

This commit is contained in:
Patrick Jentsch 2022-10-12 10:23:05 +02:00
parent 2470c8dfef
commit 236d3e7ee4
14 changed files with 127 additions and 58 deletions

View File

@ -1,4 +1,3 @@
from flask import current_app
from flask_httpauth import HTTPBasicAuth, HTTPTokenAuth from flask_httpauth import HTTPBasicAuth, HTTPTokenAuth
from werkzeug.exceptions import Forbidden, Unauthorized from werkzeug.exceptions import Forbidden, Unauthorized
from app.models import User from app.models import User

View File

@ -4,8 +4,8 @@ from apifairy.decorators import body, other_responses
from flask import abort, Blueprint from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError from werkzeug.exceptions import InternalServerError
from app import db, hashids from app import db, hashids
from app.models import Job, JobInput, JobStatus, TesseractOCRModel from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRModelSchema from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema
from .auth import auth_error_responses, token_auth from .auth import auth_error_responses, token_auth
@ -14,8 +14,8 @@ job_schema = JobSchema()
jobs_schema = JobSchema(many=True) jobs_schema = JobSchema(many=True)
spacy_nlp_pipeline_job_schema = SpaCyNLPPipelineJobSchema() spacy_nlp_pipeline_job_schema = SpaCyNLPPipelineJobSchema()
tesseract_ocr_pipeline_job_schema = TesseractOCRPipelineJobSchema() tesseract_ocr_pipeline_job_schema = TesseractOCRPipelineJobSchema()
tesseract_ocr_model_schema = TesseractOCRModelSchema() tesseract_ocr_pipeline_model_schema = TesseractOCRPipelineModelSchema()
tesseract_ocr_models_schema = TesseractOCRModelSchema(many=True) tesseract_ocr_pipeline_models_schema = TesseractOCRPipelineModelSchema(many=True)
@bp.route('', methods=['GET']) @bp.route('', methods=['GET'])
@ -60,11 +60,11 @@ def create_tesseract_ocr_pipeline_job(args):
@bp.route('/tesseract-ocr-pipeline/models', methods=['GET']) @bp.route('/tesseract-ocr-pipeline/models', methods=['GET'])
@authenticate(token_auth) @authenticate(token_auth)
@response(tesseract_ocr_models_schema) @response(tesseract_ocr_pipeline_models_schema)
@other_responses(auth_error_responses) @other_responses(auth_error_responses)
def get_tesseract_ocr_models(): def get_tesseract_ocr_models():
"""Get all Tesseract OCR Models""" """Get all Tesseract OCR Models"""
return TesseractOCRModel.query.all() return TesseractOCRPipelineModel.query.all()
@bp.route('/<hashid:job_id>', methods=['DELETE']) @bp.route('/<hashid:job_id>', methods=['DELETE'])

View File

@ -3,7 +3,14 @@ from marshmallow import validate, validates, ValidationError
from marshmallow.decorators import post_dump from marshmallow.decorators import post_dump
from app import ma from app import ma
from app.auth import USERNAME_REGEX from app.auth import USERNAME_REGEX
from app.models import Job, JobStatus, TesseractOCRModel, Token, User, UserSettingJobStatusMailNotificationLevel from app.models import (
Job,
JobStatus,
TesseractOCRPipelineModel,
Token,
User,
UserSettingJobStatusMailNotificationLevel
)
from app.services import SERVICES from app.services import SERVICES
@ -21,9 +28,9 @@ class TokenSchema(ma.SQLAlchemySchema):
refresh_token = ma.String() refresh_token = ma.String()
class TesseractOCRModelSchema(ma.SQLAlchemySchema): class TesseractOCRPipelineModelSchema(ma.SQLAlchemySchema):
class Meta: class Meta:
model = TesseractOCRModel model = TesseractOCRPipelineModel
ordered = True ordered = True
hashid = ma.String(data_key='id', dump_only=True) hashid = ma.String(data_key='id', dump_only=True)

View File

@ -1,7 +1,7 @@
from apifairy import authenticate, body, response from apifairy import authenticate, body, response
from apifairy.decorators import other_responses from apifairy.decorators import other_responses
from flask import abort, Blueprint, current_app from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError from werkzeug.exceptions import InternalServerError
from app import db from app import db
from app.email import create_message, send from app.email import create_message, send

View File

@ -2,7 +2,12 @@ from flask import current_app
from flask_migrate import upgrade from flask_migrate import upgrade
import click import click
import os import os
from app.models import Role, User, TesseractOCRModel, TranskribusHTRModel from app.models import (
Role,
User,
TesseractOCRPipelineModel,
TranskribusHTRPipelineModel
)
def _make_default_dirs(): def _make_default_dirs():
@ -35,10 +40,10 @@ def register(app):
Role.insert_defaults() Role.insert_defaults()
current_app.logger.info('Insert/Update default users') current_app.logger.info('Insert/Update default users')
User.insert_defaults() User.insert_defaults()
current_app.logger.info('Insert/Update default TesseractOCRModels') current_app.logger.info('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRModel.insert_defaults() TesseractOCRPipelineModel.insert_defaults()
current_app.logger.info('Insert/Update default TranskribusHTRModels') current_app.logger.info('Insert/Update default TranskribusHTRPipelineModels')
TranskribusHTRModel.insert_defaults() TranskribusHTRPipelineModel.insert_defaults()
@app.cli.group() @app.cli.group()
def converter(): def converter():

View File

@ -3,8 +3,8 @@ from app.models import (
Job, Job,
JobResult, JobResult,
JobStatus, JobStatus,
TesseractOCRModel, TesseractOCRPipelineModel,
TranskribusHTRModel TranskribusHTRPipelineModel
) )
from datetime import datetime from datetime import datetime
from flask import current_app from flask import current_app
@ -61,7 +61,7 @@ def _create_job_service(job):
if 'binarization' in job.service_args and job.service_args['binarization']: if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize' command += ' --binarize'
elif job.service == 'transkribus-htr-pipeline': elif job.service == 'transkribus-htr-pipeline':
transkribus_htr_model = TranskribusHTRModel.query.get(job.service_args['model']) transkribus_htr_model = TranskribusHTRPipelineModel.query.get(job.service_args['model'])
command += f' -m {transkribus_htr_model.transkribus_model_id}' command += f' -m {transkribus_htr_model.transkribus_model_id}'
readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME') readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME')
command += f' --readcoop-username "{readcoop_username}"' command += f' --readcoop-username "{readcoop_username}"'
@ -96,7 +96,7 @@ def _create_job_service(job):
else: else:
job.status = JobStatus.FAILED job.status = JobStatus.FAILED
return return
model = TesseractOCRModel.query.get(model_id) model = TesseractOCRPipelineModel.query.get(model_id)
if model is None: if model is None:
job.status = JobStatus.FAILED job.status = JobStatus.FAILED
return return

View File

@ -23,7 +23,6 @@ from app.email import create_message
TRANSKRIBUS_HTR_MODELS = \ TRANSKRIBUS_HTR_MODELS = \
json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text', params={'docType': 'handwritten'}).content)['trpModelMetadata'] # noqa json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text', params={'docType': 'handwritten'}).content)['trpModelMetadata'] # noqa
############################################################################## ##############################################################################
# enums # # enums #
############################################################################## ##############################################################################
@ -274,14 +273,14 @@ class User(HashidMixin, UserMixin, db.Model):
last_seen = db.Column(db.DateTime()) last_seen = db.Column(db.DateTime())
# Backrefs: role: Role # Backrefs: role: Role
# Relationships # Relationships
tesseract_ocr_models = db.relationship( tesseract_ocr_pipeline_models = db.relationship(
'TesseractOCRModel', 'TesseractOCRPipelineModel',
backref='user', backref='user',
cascade='all, delete-orphan', cascade='all, delete-orphan',
lazy='dynamic' lazy='dynamic'
) )
transkribus_htr_models = db.relationship( transkribus_htr_pipeline_models = db.relationship(
'TranskribusHTRModel', 'TranskribusHTRPipelineModel',
backref='user', backref='user',
cascade='all, delete-orphan', cascade='all, delete-orphan',
lazy='dynamic' lazy='dynamic'
@ -342,7 +341,7 @@ class User(HashidMixin, UserMixin, db.Model):
db.session.refresh(user) db.session.refresh(user)
try: try:
os.mkdir(user.path) os.mkdir(user.path)
os.mkdir(os.path.join(user.path, 'tesseract_ocr_models')) os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models'))
os.mkdir(os.path.join(user.path, 'corpora')) os.mkdir(os.path.join(user.path, 'corpora'))
os.mkdir(os.path.join(user.path, 'jobs')) os.mkdir(os.path.join(user.path, 'jobs'))
except OSError as e: except OSError as e:
@ -518,14 +517,14 @@ class User(HashidMixin, UserMixin, db.Model):
x.hashid: x.to_json(relationships=True) x.hashid: x.to_json(relationships=True)
for x in self.jobs for x in self.jobs
} }
_json['tesseract_ocr_models'] = { _json['tesseract_ocr_pipeline_models'] = {
x.hashid: x.to_json(relationships=True) x.hashid: x.to_json(relationships=True)
for x in self.tesseract_ocr_models for x in self.tesseract_ocr_pipeline_models
} }
return _json return _json
class TesseractOCRModel(FileMixin, HashidMixin, db.Model): class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
__tablename__ = 'tesseract_ocr_models' __tablename__ = 'tesseract_ocr_pipeline_models'
# Primary key # Primary key
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
# Foreign keys # Foreign keys
@ -546,7 +545,7 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
def path(self): def path(self):
return os.path.join( return os.path.join(
self.user.path, self.user.path,
'tesseract_ocr_models', 'tesseract_ocr_pipeline_models',
str(self.id) str(self.id)
) )
@ -555,12 +554,12 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
nopaque_user = User.query.filter_by(username='nopaque').first() nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join( defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.path.dirname(os.path.abspath(__file__)),
'TesseractOCRModel.defaults.yml' 'TesseractOCRPipelineModel.defaults.yml'
) )
with open(defaults_file, 'r') as f: with open(defaults_file, 'r') as f:
defaults = yaml.safe_load(f) defaults = yaml.safe_load(f)
for m in defaults: for m in defaults:
model = TesseractOCRModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None: if model is not None:
model.compatible_service_versions = m['compatible_service_versions'] model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description'] model.description = m['description']
@ -572,7 +571,7 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
model.title = m['title'] model.title = m['title']
model.version = m['version'] model.version = m['version']
continue continue
model = TesseractOCRModel( model = TesseractOCRPipelineModel(
compatible_service_versions=m['compatible_service_versions'], compatible_service_versions=m['compatible_service_versions'],
description=m['description'], description=m['description'],
publisher=m['publisher'], publisher=m['publisher'],
@ -623,8 +622,8 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
return _json return _json
class TranskribusHTRModel(HashidMixin, db.Model): class TranskribusHTRPipelineModel(HashidMixin, db.Model):
__tablename__ = 'transkribus_htr_models' __tablename__ = 'transkribus_htr_pipeline_models'
# Primary key # Primary key
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
# Foreign keys # Foreign keys
@ -643,12 +642,12 @@ class TranskribusHTRModel(HashidMixin, db.Model):
# and 'docType' in m and m['docType'] == 'handwritten' # and 'docType' in m and m['docType'] == 'handwritten'
# ] # ]
for m in TRANSKRIBUS_HTR_MODELS: for m in TRANSKRIBUS_HTR_MODELS:
model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa model = TranskribusHTRPipelineModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa
if model is not None: if model is not None:
model.shared = True model.shared = True
model.transkribus_model_id = m['modelId'] model.transkribus_model_id = m['modelId']
continue continue
model = TranskribusHTRModel( model = TranskribusHTRPipelineModel(
transkribus_model_id=m['modelId'], transkribus_model_id=m['modelId'],
shared=True, shared=True,
user=nopaque_user, user=nopaque_user,

View File

@ -12,8 +12,8 @@ from wtforms import (
from wtforms.validators import InputRequired, Length from wtforms.validators import InputRequired, Length
from app.models import ( from app.models import (
TRANSKRIBUS_HTR_MODELS, TRANSKRIBUS_HTR_MODELS,
TesseractOCRModel, TesseractOCRPipelineModel,
TranskribusHTRModel TranskribusHTRPipelineModel
) )
from . import SERVICES from . import SERVICES
@ -77,7 +77,7 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
if 'disabled' in self.binarization.render_kw: if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled'] del self.binarization.render_kw['disabled']
models = [ models = [
x for x in TesseractOCRModel.query.filter().all() x for x in TesseractOCRPipelineModel.query.filter().all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
] ]
self.model.choices = [('', 'Choose your option')] self.model.choices = [('', 'Choose your option')]
@ -119,7 +119,7 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
if 'disabled' in self.binarization.render_kw: if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled'] del self.binarization.render_kw['disabled']
models = [ models = [
x for x in TranskribusHTRModel.query.filter().all() x for x in TranskribusHTRPipelineModel.query.filter().all()
if x.shared == True or x.user == current_user if x.shared == True or x.user == current_user
] ]
self.model.choices = [('', 'Choose your option')] self.model.choices = [('', 'Choose your option')]

View File

@ -5,9 +5,9 @@ from app.models import (
Job, Job,
JobInput, JobInput,
JobStatus, JobStatus,
TesseractOCRModel, TesseractOCRPipelineModel,
TRANSKRIBUS_HTR_MODELS, TRANSKRIBUS_HTR_MODELS,
TranskribusHTRModel TranskribusHTRPipelineModel
) )
from . import bp, SERVICES from . import bp, SERVICES
from .forms import ( from .forms import (
@ -95,14 +95,14 @@ def tesseract_ocr_pipeline():
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created') message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job') flash(message, 'job')
return {}, 201, {'Location': job.url} return {}, 201, {'Location': job.url}
tesseract_ocr_models = [ tesseract_ocr_pipeline_models = [
x for x in TesseractOCRModel.query.all() x for x in TesseractOCRPipelineModel.query.all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
] ]
return render_template( return render_template(
'services/tesseract_ocr_pipeline.html.j2', 'services/tesseract_ocr_pipeline.html.j2',
form=form, form=form,
tesseract_ocr_models=tesseract_ocr_models, tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
title=service_manifest['name'] title=service_manifest['name']
) )
@ -145,8 +145,8 @@ def transkribus_htr_pipeline():
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created') message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job') flash(message, 'job')
return {}, 201, {'Location': job.url} return {}, 201, {'Location': job.url}
transkribus_htr_models = [ transkribus_htr_pipeline_models = [
x for x in TranskribusHTRModel.query.all() x for x in TranskribusHTRPipelineModel.query.all()
if x.shared == True or x.user == current_user if x.shared == True or x.user == current_user
] ]
return render_template( return render_template(
@ -154,7 +154,7 @@ def transkribus_htr_pipeline():
form=form, form=form,
title=service_manifest['name'], title=service_manifest['name'],
TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS, TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS,
transkribus_htr_models=transkribus_htr_models transkribus_htr_pipeline_models=transkribus_htr_pipeline_models
) )

View File

@ -160,8 +160,8 @@
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for m in tesseract_ocr_models %} {% for m in tesseract_ocr_pipeline_models %}
<tr id="tesseract-ocr-model-{{ m.hashid }}"> <tr id="tesseract-ocr-pipeline-model-{{ m.hashid }}">
<td>{{ m.title }}</td> <td>{{ m.title }}</td>
{% if m.description == '' %} {% if m.description == '' %}
<td>Description is not available.</td> <td>Description is not available.</td>

View File

@ -157,8 +157,8 @@
<div class="modal-content"> <div class="modal-content">
<h4>Transkribus HTR Pipeline models</h4> <h4>Transkribus HTR Pipeline models</h4>
<ul class="collapsible popout" id="transkribus-htr-models"> <ul class="collapsible popout" id="transkribus-htr-models">
{% for m in transkribus_htr_models %} {% for m in transkribus_htr_pipeline_models %}
<li id="transkribus-htr-model-{{ m.hashid }}"> <li id="transkribus-htr-pipeline-model-{{ m.hashid }}">
{% for m_info in TRANSKRIBUS_HTR_MODELS if m_info['modelId'] == m.transkribus_model_id %} {% for m_info in TRANSKRIBUS_HTR_MODELS if m_info['modelId'] == m.transkribus_model_id %}
<div class="collapsible-header"><i class="material-icons">widgets</i>{{ m_info.name }}</div> <div class="collapsible-header"><i class="material-icons">widgets</i>{{ m_info.name }}</div>
<div class="collapsible-body"> <div class="collapsible-body">

View File

@ -0,0 +1,59 @@
"""Rename pipeline model tables
Revision ID: 63b2cc26a01f
Revises: 260b57d5f4e7
Create Date: 2022-10-11 14:32:13.227364
"""
from alembic import op
from flask import current_app
import os
from app.models import User
# revision identifiers, used by Alembic.
revision = '63b2cc26a01f'
down_revision = '260b57d5f4e7'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
users = User.query.all()
for user in users:
old_tesseract_ocr_pipeline_model_path = os.path.join(
user.path,
'tesseract_ocr_models'
)
new_tesseract_ocr_pipeline_model_path = os.path.join(
user.path,
'tesseract_ocr_pipeline_models'
)
os.rename(
old_tesseract_ocr_pipeline_model_path,
new_tesseract_ocr_pipeline_model_path
)
op.rename_table('tesseract_ocr_models', 'tesseract_ocr_pipeline_models')
op.rename_table('transkribus_htr_models', 'transkribus_htr_pipeline_models')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
users = User.query.all()
for user in users:
old_tesseract_ocr_pipeline_model_path = os.path.join(
user.path,
'tesseract_ocr_models'
)
new_tesseract_ocr_pipeline_model_path = os.path.join(
user.path,
'tesseract_ocr_pipeline_models'
)
os.rename(
new_tesseract_ocr_pipeline_model_path,
old_tesseract_ocr_pipeline_model_path
)
op.rename_table('tesseract_ocr_pipeline_models', 'tesseract_ocr_models')
op.rename_table('transkribus_htr_pipeline_models', 'transkribus_htr_models')
# ### end Alembic commands ###

View File

@ -12,8 +12,8 @@ from app.models import (
JobResult, JobResult,
Permission, Permission,
Role, Role,
TesseractOCRModel, TesseractOCRPipelineModel,
TranskribusHTRModel, TranskribusHTRPipelineModel,
User User
) # noqa ) # noqa
from flask import Flask # noqa from flask import Flask # noqa
@ -42,8 +42,8 @@ def make_shell_context() -> Dict[str, Any]:
'JobResult': JobResult, 'JobResult': JobResult,
'Permission': Permission, 'Permission': Permission,
'Role': Role, 'Role': Role,
'TesseractOCRModel': TesseractOCRModel, 'TesseractOCRPipelineModel': TesseractOCRPipelineModel,
'TranskribusHTRModel': TranskribusHTRModel, 'TranskribusHTRPipelineModel': TranskribusHTRPipelineModel,
'User': User 'User': User
} }