From 61098535bea7cfbc4bff3d82f8e9317d04898cee Mon Sep 17 00:00:00 2001 From: dameyer Date: Sun, 2 Oct 2022 23:47:15 +0200 Subject: [PATCH 01/19] Update gitignore. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 76c4e06b..14a22fe1 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ __pycache__ # Virtual environment venv +.idea From 9802fdd175028c5e75c41358b935dd9c6e29c743 Mon Sep 17 00:00:00 2001 From: dameyer Date: Sun, 2 Oct 2022 23:49:14 +0200 Subject: [PATCH 02/19] Add form and template for adding a model. --- app/contributions/forms.py | 75 +++++++++++++++++++ app/contributions/routes.py | 11 ++- .../contributions/contribute.html.j2 | 32 ++++++++ 3 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 app/contributions/forms.py create mode 100644 app/templates/contributions/contribute.html.j2 diff --git a/app/contributions/forms.py b/app/contributions/forms.py new file mode 100644 index 00000000..f00e49c3 --- /dev/null +++ b/app/contributions/forms.py @@ -0,0 +1,75 @@ +from flask_wtf import FlaskForm +from wtforms import ( + BooleanField, + PasswordField, + StringField, + SubmitField, + SelectMultipleField, + ValidationError, FieldList, IntegerField +) +from wtforms.validators import InputRequired, Email, EqualTo, Length, Regexp +from app.models import User + + +class ContributionForm(FlaskForm): + + # The id field will be generated on insert. + # The user_id will be retrieved from the user who is performing this operation. + + # title = db.Column(db.String(64)) + title = StringField( + 'Title', + validators=[InputRequired(), Length(max=64)] + ) + + # description = db.Column(db.String(255)) + description = StringField( + 'Description', + validators=[InputRequired(), Length(max=255)] + ) + + # version = db.Column(db.String(16)) + version = StringField( + 'Version', + validators=[InputRequired(), Length(max=16)] + ) + + # compatible_service_versions = db.Column(ContainerColumn(list, 255)) + compatible_service_versions = SelectMultipleField( + 'Compatible Service Versions', + choices=["asd", "blub", "bla"] + ) + + # publisher = db.Column(db.String(128)) + publisher = StringField( + 'Publisher', + validators=[InputRequired(), Length(max=128)] + ) + + # publisher_url = db.Column(db.String(512)) + publisher_url = StringField( + 'Publisher URL', + validators=[InputRequired(), Length(max=512)] + ) + + # publishing_url = db.Column(db.String(512)) + publishing_url = StringField( + 'Publishing URL', + validators=[InputRequired(), Length(max=512)] + ) + + # publishing_year = db.Column(db.Integer) + publishing_year = IntegerField( + 'Publishing year', + validators=[InputRequired()] + ) + + # shared = db.Column(db.Boolean, default=False) + shared = BooleanField( + 'Shared', + validators=[InputRequired()] + ) + + submit = SubmitField() + + diff --git a/app/contributions/routes.py b/app/contributions/routes.py index 80c6a82d..34d54d41 100644 --- a/app/contributions/routes.py +++ b/app/contributions/routes.py @@ -1,7 +1,9 @@ +from flask import render_template from flask_login import login_required from app.decorators import permission_required from app.models import Permission from . import bp +from .forms import ContributionForm @bp.before_request @@ -11,6 +13,11 @@ def before_request(): pass -@bp.route('') +@bp.route('', methods=['GET', 'POST']) def contributions(): - pass + form = ContributionForm(prefix='contribution-form') + return render_template( + 'contributions/contribute.html.j2', + form=form, + title='Contribution' + ) diff --git a/app/templates/contributions/contribute.html.j2 b/app/templates/contributions/contribute.html.j2 new file mode 100644 index 00000000..6789e1f8 --- /dev/null +++ b/app/templates/contributions/contribute.html.j2 @@ -0,0 +1,32 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} + + +{% block page_content %} +
+
+
+

{{ title }}

+

+ In order to add a new model, please fill in the form below. +

+ +
+
+ {{ form.hidden_tag() }} + {{ wtf.render_field(form.title) }} + {{ wtf.render_field(form.description) }} + {{ wtf.render_field(form.publisher) }} + {{ wtf.render_field(form.publisher_url) }} + {{ wtf.render_field(form.publishing_url) }} + {{ wtf.render_field(form.publishing_year) }} + {{ wtf.render_field(form.shared) }} + {{ wtf.render_field(form.version) }} + {{ wtf.render_field(form.compatible_service_versions) }} + {{ wtf.render_field(form.submit, class_='width-100', material_icon='send') }} + +
+
+
+
+{% endblock page_content %} \ No newline at end of file From 236d3e7ee40cfaf757549e6e6255b26fec3fa21d Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Wed, 12 Oct 2022 10:23:05 +0200 Subject: [PATCH 03/19] Rename tables for pipeline models --- ...=> TesseractOCRPipelineModel.defaults.yml} | 0 app/api/auth.py | 1 - app/api/jobs.py | 12 ++-- app/api/schemas.py | 13 +++- app/api/users.py | 2 +- app/cli.py | 15 +++-- app/daemon/job_utils.py | 8 +-- app/models.py | 35 ++++++----- app/services/forms.py | 8 +-- app/services/routes.py | 16 ++--- .../services/tesseract_ocr_pipeline.html.j2 | 4 +- .../services/transkribus_htr_pipeline.html.j2 | 4 +- migrations/versions/63b2cc26a01f_.py | 59 +++++++++++++++++++ nopaque.py | 8 +-- 14 files changed, 127 insertions(+), 58 deletions(-) rename app/{TesseractOCRModel.defaults.yml => TesseractOCRPipelineModel.defaults.yml} (100%) create mode 100644 migrations/versions/63b2cc26a01f_.py diff --git a/app/TesseractOCRModel.defaults.yml b/app/TesseractOCRPipelineModel.defaults.yml similarity index 100% rename from app/TesseractOCRModel.defaults.yml rename to app/TesseractOCRPipelineModel.defaults.yml diff --git a/app/api/auth.py b/app/api/auth.py index afda3a30..398052f5 100644 --- a/app/api/auth.py +++ b/app/api/auth.py @@ -1,4 +1,3 @@ -from flask import current_app from flask_httpauth import HTTPBasicAuth, HTTPTokenAuth from werkzeug.exceptions import Forbidden, Unauthorized from app.models import User diff --git a/app/api/jobs.py b/app/api/jobs.py index e730f2e6..2eaecd3f 100644 --- a/app/api/jobs.py +++ b/app/api/jobs.py @@ -4,8 +4,8 @@ from apifairy.decorators import body, other_responses from flask import abort, Blueprint from werkzeug.exceptions import InternalServerError from app import db, hashids -from app.models import Job, JobInput, JobStatus, TesseractOCRModel -from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRModelSchema +from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel +from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema from .auth import auth_error_responses, token_auth @@ -14,8 +14,8 @@ job_schema = JobSchema() jobs_schema = JobSchema(many=True) spacy_nlp_pipeline_job_schema = SpaCyNLPPipelineJobSchema() tesseract_ocr_pipeline_job_schema = TesseractOCRPipelineJobSchema() -tesseract_ocr_model_schema = TesseractOCRModelSchema() -tesseract_ocr_models_schema = TesseractOCRModelSchema(many=True) +tesseract_ocr_pipeline_model_schema = TesseractOCRPipelineModelSchema() +tesseract_ocr_pipeline_models_schema = TesseractOCRPipelineModelSchema(many=True) @bp.route('', methods=['GET']) @@ -60,11 +60,11 @@ def create_tesseract_ocr_pipeline_job(args): @bp.route('/tesseract-ocr-pipeline/models', methods=['GET']) @authenticate(token_auth) -@response(tesseract_ocr_models_schema) +@response(tesseract_ocr_pipeline_models_schema) @other_responses(auth_error_responses) def get_tesseract_ocr_models(): """Get all Tesseract OCR Models""" - return TesseractOCRModel.query.all() + return TesseractOCRPipelineModel.query.all() @bp.route('/', methods=['DELETE']) diff --git a/app/api/schemas.py b/app/api/schemas.py index 394b1ebb..9474bd1a 100644 --- a/app/api/schemas.py +++ b/app/api/schemas.py @@ -3,7 +3,14 @@ from marshmallow import validate, validates, ValidationError from marshmallow.decorators import post_dump from app import ma from app.auth import USERNAME_REGEX -from app.models import Job, JobStatus, TesseractOCRModel, Token, User, UserSettingJobStatusMailNotificationLevel +from app.models import ( + Job, + JobStatus, + TesseractOCRPipelineModel, + Token, + User, + UserSettingJobStatusMailNotificationLevel +) from app.services import SERVICES @@ -21,9 +28,9 @@ class TokenSchema(ma.SQLAlchemySchema): refresh_token = ma.String() -class TesseractOCRModelSchema(ma.SQLAlchemySchema): +class TesseractOCRPipelineModelSchema(ma.SQLAlchemySchema): class Meta: - model = TesseractOCRModel + model = TesseractOCRPipelineModel ordered = True hashid = ma.String(data_key='id', dump_only=True) diff --git a/app/api/users.py b/app/api/users.py index fc180df0..c9ea5d39 100644 --- a/app/api/users.py +++ b/app/api/users.py @@ -1,7 +1,7 @@ from apifairy import authenticate, body, response from apifairy.decorators import other_responses -from flask import abort, Blueprint, current_app +from flask import abort, Blueprint from werkzeug.exceptions import InternalServerError from app import db from app.email import create_message, send diff --git a/app/cli.py b/app/cli.py index d9b4fdf0..bfaf8754 100644 --- a/app/cli.py +++ b/app/cli.py @@ -2,7 +2,12 @@ from flask import current_app from flask_migrate import upgrade import click import os -from app.models import Role, User, TesseractOCRModel, TranskribusHTRModel +from app.models import ( + Role, + User, + TesseractOCRPipelineModel, + TranskribusHTRPipelineModel +) def _make_default_dirs(): @@ -35,10 +40,10 @@ def register(app): Role.insert_defaults() current_app.logger.info('Insert/Update default users') User.insert_defaults() - current_app.logger.info('Insert/Update default TesseractOCRModels') - TesseractOCRModel.insert_defaults() - current_app.logger.info('Insert/Update default TranskribusHTRModels') - TranskribusHTRModel.insert_defaults() + current_app.logger.info('Insert/Update default TesseractOCRPipelineModels') + TesseractOCRPipelineModel.insert_defaults() + current_app.logger.info('Insert/Update default TranskribusHTRPipelineModels') + TranskribusHTRPipelineModel.insert_defaults() @app.cli.group() def converter(): diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index 38d6c48b..0f74f3a0 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -3,8 +3,8 @@ from app.models import ( Job, JobResult, JobStatus, - TesseractOCRModel, - TranskribusHTRModel + TesseractOCRPipelineModel, + TranskribusHTRPipelineModel ) from datetime import datetime from flask import current_app @@ -61,7 +61,7 @@ def _create_job_service(job): if 'binarization' in job.service_args and job.service_args['binarization']: command += ' --binarize' elif job.service == 'transkribus-htr-pipeline': - transkribus_htr_model = TranskribusHTRModel.query.get(job.service_args['model']) + transkribus_htr_model = TranskribusHTRPipelineModel.query.get(job.service_args['model']) command += f' -m {transkribus_htr_model.transkribus_model_id}' readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME') command += f' --readcoop-username "{readcoop_username}"' @@ -96,7 +96,7 @@ def _create_job_service(job): else: job.status = JobStatus.FAILED return - model = TesseractOCRModel.query.get(model_id) + model = TesseractOCRPipelineModel.query.get(model_id) if model is None: job.status = JobStatus.FAILED return diff --git a/app/models.py b/app/models.py index 21a64d35..beab7d6e 100644 --- a/app/models.py +++ b/app/models.py @@ -23,7 +23,6 @@ from app.email import create_message TRANSKRIBUS_HTR_MODELS = \ json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text', params={'docType': 'handwritten'}).content)['trpModelMetadata'] # noqa - ############################################################################## # enums # ############################################################################## @@ -274,14 +273,14 @@ class User(HashidMixin, UserMixin, db.Model): last_seen = db.Column(db.DateTime()) # Backrefs: role: Role # Relationships - tesseract_ocr_models = db.relationship( - 'TesseractOCRModel', + tesseract_ocr_pipeline_models = db.relationship( + 'TesseractOCRPipelineModel', backref='user', cascade='all, delete-orphan', lazy='dynamic' ) - transkribus_htr_models = db.relationship( - 'TranskribusHTRModel', + transkribus_htr_pipeline_models = db.relationship( + 'TranskribusHTRPipelineModel', backref='user', cascade='all, delete-orphan', lazy='dynamic' @@ -342,7 +341,7 @@ class User(HashidMixin, UserMixin, db.Model): db.session.refresh(user) try: os.mkdir(user.path) - os.mkdir(os.path.join(user.path, 'tesseract_ocr_models')) + os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models')) os.mkdir(os.path.join(user.path, 'corpora')) os.mkdir(os.path.join(user.path, 'jobs')) except OSError as e: @@ -518,14 +517,14 @@ class User(HashidMixin, UserMixin, db.Model): x.hashid: x.to_json(relationships=True) for x in self.jobs } - _json['tesseract_ocr_models'] = { + _json['tesseract_ocr_pipeline_models'] = { x.hashid: x.to_json(relationships=True) - for x in self.tesseract_ocr_models + for x in self.tesseract_ocr_pipeline_models } return _json -class TesseractOCRModel(FileMixin, HashidMixin, db.Model): - __tablename__ = 'tesseract_ocr_models' +class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): + __tablename__ = 'tesseract_ocr_pipeline_models' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys @@ -546,7 +545,7 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): def path(self): return os.path.join( self.user.path, - 'tesseract_ocr_models', + 'tesseract_ocr_pipeline_models', str(self.id) ) @@ -555,12 +554,12 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): nopaque_user = User.query.filter_by(username='nopaque').first() defaults_file = os.path.join( os.path.dirname(os.path.abspath(__file__)), - 'TesseractOCRModel.defaults.yml' + 'TesseractOCRPipelineModel.defaults.yml' ) with open(defaults_file, 'r') as f: defaults = yaml.safe_load(f) for m in defaults: - model = TesseractOCRModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa + model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa if model is not None: model.compatible_service_versions = m['compatible_service_versions'] model.description = m['description'] @@ -572,7 +571,7 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): model.title = m['title'] model.version = m['version'] continue - model = TesseractOCRModel( + model = TesseractOCRPipelineModel( compatible_service_versions=m['compatible_service_versions'], description=m['description'], publisher=m['publisher'], @@ -623,8 +622,8 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): return _json -class TranskribusHTRModel(HashidMixin, db.Model): - __tablename__ = 'transkribus_htr_models' +class TranskribusHTRPipelineModel(HashidMixin, db.Model): + __tablename__ = 'transkribus_htr_pipeline_models' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys @@ -643,12 +642,12 @@ class TranskribusHTRModel(HashidMixin, db.Model): # and 'docType' in m and m['docType'] == 'handwritten' # ] for m in TRANSKRIBUS_HTR_MODELS: - model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa + model = TranskribusHTRPipelineModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa if model is not None: model.shared = True model.transkribus_model_id = m['modelId'] continue - model = TranskribusHTRModel( + model = TranskribusHTRPipelineModel( transkribus_model_id=m['modelId'], shared=True, user=nopaque_user, diff --git a/app/services/forms.py b/app/services/forms.py index 008e0d0a..ea1c1e70 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -12,8 +12,8 @@ from wtforms import ( from wtforms.validators import InputRequired, Length from app.models import ( TRANSKRIBUS_HTR_MODELS, - TesseractOCRModel, - TranskribusHTRModel + TesseractOCRPipelineModel, + TranskribusHTRPipelineModel ) from . import SERVICES @@ -77,7 +77,7 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] models = [ - x for x in TesseractOCRModel.query.filter().all() + x for x in TesseractOCRPipelineModel.query.filter().all() if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) ] self.model.choices = [('', 'Choose your option')] @@ -119,7 +119,7 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm): if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] models = [ - x for x in TranskribusHTRModel.query.filter().all() + x for x in TranskribusHTRPipelineModel.query.filter().all() if x.shared == True or x.user == current_user ] self.model.choices = [('', 'Choose your option')] diff --git a/app/services/routes.py b/app/services/routes.py index 913acbb7..4f019525 100644 --- a/app/services/routes.py +++ b/app/services/routes.py @@ -5,9 +5,9 @@ from app.models import ( Job, JobInput, JobStatus, - TesseractOCRModel, + TesseractOCRPipelineModel, TRANSKRIBUS_HTR_MODELS, - TranskribusHTRModel + TranskribusHTRPipelineModel ) from . import bp, SERVICES from .forms import ( @@ -95,14 +95,14 @@ def tesseract_ocr_pipeline(): message = Markup(f'Job "{job.title}" created') flash(message, 'job') return {}, 201, {'Location': job.url} - tesseract_ocr_models = [ - x for x in TesseractOCRModel.query.all() + tesseract_ocr_pipeline_models = [ + x for x in TesseractOCRPipelineModel.query.all() if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) ] return render_template( 'services/tesseract_ocr_pipeline.html.j2', form=form, - tesseract_ocr_models=tesseract_ocr_models, + tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models, title=service_manifest['name'] ) @@ -145,8 +145,8 @@ def transkribus_htr_pipeline(): message = Markup(f'Job "{job.title}" created') flash(message, 'job') return {}, 201, {'Location': job.url} - transkribus_htr_models = [ - x for x in TranskribusHTRModel.query.all() + transkribus_htr_pipeline_models = [ + x for x in TranskribusHTRPipelineModel.query.all() if x.shared == True or x.user == current_user ] return render_template( @@ -154,7 +154,7 @@ def transkribus_htr_pipeline(): form=form, title=service_manifest['name'], TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS, - transkribus_htr_models=transkribus_htr_models + transkribus_htr_pipeline_models=transkribus_htr_pipeline_models ) diff --git a/app/templates/services/tesseract_ocr_pipeline.html.j2 b/app/templates/services/tesseract_ocr_pipeline.html.j2 index c38c3965..982265bc 100644 --- a/app/templates/services/tesseract_ocr_pipeline.html.j2 +++ b/app/templates/services/tesseract_ocr_pipeline.html.j2 @@ -160,8 +160,8 @@ - {% for m in tesseract_ocr_models %} - + {% for m in tesseract_ocr_pipeline_models %} + {{ m.title }} {% if m.description == '' %} Description is not available. diff --git a/app/templates/services/transkribus_htr_pipeline.html.j2 b/app/templates/services/transkribus_htr_pipeline.html.j2 index 7aedbd4f..79519f3a 100644 --- a/app/templates/services/transkribus_htr_pipeline.html.j2 +++ b/app/templates/services/transkribus_htr_pipeline.html.j2 @@ -157,8 +157,8 @@