Merge branch 'development' into contributions

This commit is contained in:
Inga Kirschnick 2022-10-26 12:13:39 +02:00
commit 024eeaa063
38 changed files with 1535 additions and 703 deletions

View File

@ -21,6 +21,9 @@ HOST_DOCKER_GID=
# NOTES: Use `.` as <project-basedir> # NOTES: Use `.` as <project-basedir>
# HOST_LOG_DIR= # HOST_LOG_DIR=
# DEFAULT: nopaque_default
# DOCKER_NETWORK_NAME=
################################################################################ ################################################################################
# Flask # # Flask #
# https://flask.palletsprojects.com/en/1.1.x/config/ # # https://flask.palletsprojects.com/en/1.1.x/config/ #

1
.gitignore vendored
View File

@ -39,3 +39,4 @@ __pycache__
# Virtual environment # Virtual environment
venv venv
.idea

View File

@ -0,0 +1,10 @@
- title: 'de_core_news_md-3.4.0'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.4.0'
publishing_year: 2022
version: '3.4.0'
compatible_service_versions:
- '0.1.0'

View File

@ -1,4 +1,3 @@
from flask import current_app
from flask_httpauth import HTTPBasicAuth, HTTPTokenAuth from flask_httpauth import HTTPBasicAuth, HTTPTokenAuth
from werkzeug.exceptions import Forbidden, Unauthorized from werkzeug.exceptions import Forbidden, Unauthorized
from app.models import User from app.models import User

View File

@ -4,8 +4,8 @@ from apifairy.decorators import body, other_responses
from flask import abort, Blueprint from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError from werkzeug.exceptions import InternalServerError
from app import db, hashids from app import db, hashids
from app.models import Job, JobInput, JobStatus, TesseractOCRModel from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRModelSchema from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema
from .auth import auth_error_responses, token_auth from .auth import auth_error_responses, token_auth
@ -14,8 +14,8 @@ job_schema = JobSchema()
jobs_schema = JobSchema(many=True) jobs_schema = JobSchema(many=True)
spacy_nlp_pipeline_job_schema = SpaCyNLPPipelineJobSchema() spacy_nlp_pipeline_job_schema = SpaCyNLPPipelineJobSchema()
tesseract_ocr_pipeline_job_schema = TesseractOCRPipelineJobSchema() tesseract_ocr_pipeline_job_schema = TesseractOCRPipelineJobSchema()
tesseract_ocr_model_schema = TesseractOCRModelSchema() tesseract_ocr_pipeline_model_schema = TesseractOCRPipelineModelSchema()
tesseract_ocr_models_schema = TesseractOCRModelSchema(many=True) tesseract_ocr_pipeline_models_schema = TesseractOCRPipelineModelSchema(many=True)
@bp.route('', methods=['GET']) @bp.route('', methods=['GET'])
@ -60,11 +60,11 @@ def create_tesseract_ocr_pipeline_job(args):
@bp.route('/tesseract-ocr-pipeline/models', methods=['GET']) @bp.route('/tesseract-ocr-pipeline/models', methods=['GET'])
@authenticate(token_auth) @authenticate(token_auth)
@response(tesseract_ocr_models_schema) @response(tesseract_ocr_pipeline_models_schema)
@other_responses(auth_error_responses) @other_responses(auth_error_responses)
def get_tesseract_ocr_models(): def get_tesseract_ocr_models():
"""Get all Tesseract OCR Models""" """Get all Tesseract OCR Models"""
return TesseractOCRModel.query.all() return TesseractOCRPipelineModel.query.all()
@bp.route('/<hashid:job_id>', methods=['DELETE']) @bp.route('/<hashid:job_id>', methods=['DELETE'])

View File

@ -3,7 +3,14 @@ from marshmallow import validate, validates, ValidationError
from marshmallow.decorators import post_dump from marshmallow.decorators import post_dump
from app import ma from app import ma
from app.auth import USERNAME_REGEX from app.auth import USERNAME_REGEX
from app.models import Job, JobStatus, TesseractOCRModel, Token, User, UserSettingJobStatusMailNotificationLevel from app.models import (
Job,
JobStatus,
TesseractOCRPipelineModel,
Token,
User,
UserSettingJobStatusMailNotificationLevel
)
from app.services import SERVICES from app.services import SERVICES
@ -21,9 +28,9 @@ class TokenSchema(ma.SQLAlchemySchema):
refresh_token = ma.String() refresh_token = ma.String()
class TesseractOCRModelSchema(ma.SQLAlchemySchema): class TesseractOCRPipelineModelSchema(ma.SQLAlchemySchema):
class Meta: class Meta:
model = TesseractOCRModel model = TesseractOCRPipelineModel
ordered = True ordered = True
hashid = ma.String(data_key='id', dump_only=True) hashid = ma.String(data_key='id', dump_only=True)

View File

@ -1,7 +1,7 @@
from apifairy import authenticate, body, response from apifairy import authenticate, body, response
from apifairy.decorators import other_responses from apifairy.decorators import other_responses
from flask import abort, Blueprint, current_app from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError from werkzeug.exceptions import InternalServerError
from app import db from app import db
from app.email import create_message, send from app.email import create_message, send

View File

@ -2,7 +2,12 @@ from flask import current_app
from flask_migrate import upgrade from flask_migrate import upgrade
import click import click
import os import os
from app.models import Role, User, TesseractOCRModel, TranskribusHTRModel from app.models import (
Role,
User,
TesseractOCRPipelineModel,
SpaCyNLPPipelineModel
)
def _make_default_dirs(): def _make_default_dirs():
@ -35,10 +40,10 @@ def register(app):
Role.insert_defaults() Role.insert_defaults()
current_app.logger.info('Insert/Update default users') current_app.logger.info('Insert/Update default users')
User.insert_defaults() User.insert_defaults()
current_app.logger.info('Insert/Update default TesseractOCRModels') current_app.logger.info('Insert/Update default SpaCyNLPPipelineModels')
TesseractOCRModel.insert_defaults() SpaCyNLPPipelineModel.insert_defaults()
current_app.logger.info('Insert/Update default TranskribusHTRModels') current_app.logger.info('Insert/Update default TesseractOCRPipelineModels')
TranskribusHTRModel.insert_defaults() TesseractOCRPipelineModel.insert_defaults()
@app.cli.group() @app.cli.group()
def converter(): def converter():

View File

@ -0,0 +1,58 @@
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired
from wtforms import (
BooleanField,
StringField,
SubmitField,
SelectMultipleField,
IntegerField
)
from wtforms.validators import InputRequired, Length
from app.services import SERVICES
class TesseractOCRModelContributionForm(FlaskForm):
title = StringField(
'Title',
validators=[InputRequired(), Length(max=64)]
)
description = StringField(
'Description',
validators=[InputRequired(), Length(max=255)]
)
version = StringField(
'Version',
validators=[InputRequired(), Length(max=16)]
)
compatible_service_versions = SelectMultipleField(
'Compatible service versions'
)
publisher = StringField(
'Publisher',
validators=[InputRequired(), Length(max=128)]
)
publisher_url = StringField(
'Publisher URL',
validators=[InputRequired(), Length(max=512)]
)
publishing_url = StringField(
'Publishing URL',
validators=[InputRequired(), Length(max=512)]
)
publishing_year = IntegerField(
'Publishing year',
validators=[InputRequired()]
)
shared = BooleanField('Shared', validators=[InputRequired()])
model_file = FileField('File',validators=[FileRequired()])
submit = SubmitField()
def __init__(self, *args, **kwargs):
service_manifest = SERVICES['tesseract-ocr-pipeline']
super().__init__(*args, **kwargs)
self.compatible_service_versions.choices = [('', 'Choose your option')]
self.compatible_service_versions.choices += [
(x, x) for x in service_manifest['versions'].keys()
]
self.compatible_service_versions.default = ''

View File

@ -1,7 +1,10 @@
from flask import abort, flash, Markup, render_template, url_for
from flask_login import login_required from flask_login import login_required
from app import db
from app.decorators import permission_required from app.decorators import permission_required
from app.models import Permission from app.models import TesseractOCRPipelineModel, Permission
from . import bp from . import bp
from .forms import TesseractOCRModelContributionForm
@bp.before_request @bp.before_request
@ -14,3 +17,38 @@ def before_request():
@bp.route('') @bp.route('')
def contributions(): def contributions():
pass pass
@bp.route('/tesseract-ocr-pipeline-models', methods=['GET', 'POST'])
def tesseract_ocr_pipeline_models():
form = TesseractOCRModelContributionForm(
prefix='contribute-tesseract-ocr-pipeline-model-form'
)
if form.is_submitted():
if not form.validate():
response = {'errors': form.errors}
return response, 400
try:
tesseract_ocr_model = TesseractOCRPipelineModel.create(
form.file.data,
compatible_service_versions=form.compatible_service_versions.data,
description=form.description.data,
publisher=form.publisher.data,
publisher_url=form.publisher_url.data,
publishing_url=form.publishing_url.data,
publishing_year=form.publishing_year.data,
shared=form.shared.data,
title=form.title.data,
version=form.version.data
)
except OSError:
abort(500)
db.session.commit()
message = Markup(f'Model "{tesseract_ocr_model.title}" created')
flash(message)
return {}, 201, {'Location': url_for('contributions.contributions')}
return render_template(
'contributions/contribute.html.j2',
form=form,
title='Contribution'
)

View File

@ -149,7 +149,7 @@ def create_corpus_file(corpus_id):
mimetype='application/vrt+xml', mimetype='application/vrt+xml',
corpus=corpus corpus=corpus
) )
except OSError: except (AttributeError, OSError):
abort(500) abort(500)
corpus.status = CorpusStatus.UNPREPARED corpus.status = CorpusStatus.UNPREPARED
db.session.commit() db.session.commit()

View File

@ -143,7 +143,7 @@ def _create_cqpserver_container(corpus):
''' ## Name ## ''' ''' ## Name ## '''
name = f'cqpserver_{corpus.id}' name = f'cqpserver_{corpus.id}'
''' ## Network ## ''' ''' ## Network ## '''
network = 'nopaque_default' network = f'{current_app.config["DOCKER_NETWORK_NAME"]}'
''' ## Volumes ## ''' ''' ## Volumes ## '''
volumes = [] volumes = []
''' ### Corpus data volume ### ''' ''' ### Corpus data volume ### '''

View File

@ -3,8 +3,7 @@ from app.models import (
Job, Job,
JobResult, JobResult,
JobStatus, JobStatus,
TesseractOCRModel, TesseractOCRPipelineModel
TranskribusHTRModel
) )
from datetime import datetime from datetime import datetime
from flask import current_app from flask import current_app
@ -61,8 +60,8 @@ def _create_job_service(job):
if 'binarization' in job.service_args and job.service_args['binarization']: if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize' command += ' --binarize'
elif job.service == 'transkribus-htr-pipeline': elif job.service == 'transkribus-htr-pipeline':
transkribus_htr_model = TranskribusHTRModel.query.get(job.service_args['model']) transkribus_htr_pipeline_model_id = job.service_args['model']
command += f' -m {transkribus_htr_model.transkribus_model_id}' command += f' -m {transkribus_htr_pipeline_model_id}'
readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME') readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME')
command += f' --readcoop-username "{readcoop_username}"' command += f' --readcoop-username "{readcoop_username}"'
readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD') readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD')
@ -96,7 +95,7 @@ def _create_job_service(job):
else: else:
job.status = JobStatus.FAILED job.status = JobStatus.FAILED
return return
model = TesseractOCRModel.query.get(model_id) model = TesseractOCRPipelineModel.query.get(model_id)
if model is None: if model is None:
job.status = JobStatus.FAILED job.status = JobStatus.FAILED
return return

View File

@ -1,5 +1,6 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from enum import Enum, IntEnum from enum import Enum, IntEnum
import re
from flask import current_app, url_for from flask import current_app, url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
from flask_login import UserMixin from flask_login import UserMixin
@ -20,10 +21,6 @@ from app.converters.vrt import normalize_vrt_file
from app.email import create_message from app.email import create_message
TRANSKRIBUS_HTR_MODELS = \
json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text', params={'docType': 'handwritten'}).content)['trpModelMetadata'] # noqa
############################################################################## ##############################################################################
# enums # # enums #
############################################################################## ##############################################################################
@ -91,6 +88,26 @@ class FileMixin:
), ),
'mimetype': self.mimetype 'mimetype': self.mimetype
} }
@classmethod
def create(cls, file_storage, **kwargs):
filename = kwargs.pop('filename', file_storage.filename)
mimetype = kwargs.pop('mimetype', file_storage.mimetype)
obj = cls(
filename=secure_filename(filename),
mimetype=mimetype,
**kwargs
)
db.session.add(obj)
db.session.flush(objects=[obj])
db.session.refresh(obj)
try:
file_storage.save(obj.path)
except (AttributeError, OSError) as e:
current_app.logger.error(e)
db.session.rollback()
raise e
return obj
# endregion mixins # endregion mixins
@ -254,14 +271,14 @@ class User(HashidMixin, UserMixin, db.Model):
last_seen = db.Column(db.DateTime()) last_seen = db.Column(db.DateTime())
# Backrefs: role: Role # Backrefs: role: Role
# Relationships # Relationships
tesseract_ocr_models = db.relationship( tesseract_ocr_pipeline_models = db.relationship(
'TesseractOCRModel', 'TesseractOCRPipelineModel',
backref='user', backref='user',
cascade='all, delete-orphan', cascade='all, delete-orphan',
lazy='dynamic' lazy='dynamic'
) )
transkribus_htr_models = db.relationship( spacy_nlp_pipeline_models = db.relationship(
'TranskribusHTRModel', 'SpaCyNLPPipelineModel',
backref='user', backref='user',
cascade='all, delete-orphan', cascade='all, delete-orphan',
lazy='dynamic' lazy='dynamic'
@ -322,7 +339,8 @@ class User(HashidMixin, UserMixin, db.Model):
db.session.refresh(user) db.session.refresh(user)
try: try:
os.mkdir(user.path) os.mkdir(user.path)
os.mkdir(os.path.join(user.path, 'tesseract_ocr_models')) os.mkdir(os.path.join(user.path, 'spacy_nlp_pipeline_models'))
os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models'))
os.mkdir(os.path.join(user.path, 'corpora')) os.mkdir(os.path.join(user.path, 'corpora'))
os.mkdir(os.path.join(user.path, 'jobs')) os.mkdir(os.path.join(user.path, 'jobs'))
except OSError as e: except OSError as e:
@ -498,14 +516,14 @@ class User(HashidMixin, UserMixin, db.Model):
x.hashid: x.to_json(relationships=True) x.hashid: x.to_json(relationships=True)
for x in self.jobs for x in self.jobs
} }
_json['tesseract_ocr_models'] = { _json['tesseract_ocr_pipeline_models'] = {
x.hashid: x.to_json(relationships=True) x.hashid: x.to_json(relationships=True)
for x in self.tesseract_ocr_models for x in self.tesseract_ocr_pipeline_models
} }
return _json return _json
class TesseractOCRModel(FileMixin, HashidMixin, db.Model): class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
__tablename__ = 'tesseract_ocr_models' __tablename__ = 'tesseract_ocr_pipeline_models'
# Primary key # Primary key
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
# Foreign keys # Foreign keys
@ -526,7 +544,7 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
def path(self): def path(self):
return os.path.join( return os.path.join(
self.user.path, self.user.path,
'tesseract_ocr_models', 'tesseract_ocr_pipeline_models',
str(self.id) str(self.id)
) )
@ -535,12 +553,12 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
nopaque_user = User.query.filter_by(username='nopaque').first() nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join( defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.path.dirname(os.path.abspath(__file__)),
'TesseractOCRModel.defaults.yml' 'TesseractOCRPipelineModel.defaults.yml'
) )
with open(defaults_file, 'r') as f: with open(defaults_file, 'r') as f:
defaults = yaml.safe_load(f) defaults = yaml.safe_load(f)
for m in defaults: for m in defaults:
model = TesseractOCRModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None: if model is not None:
model.compatible_service_versions = m['compatible_service_versions'] model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description'] model.description = m['description']
@ -552,7 +570,7 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
model.title = m['title'] model.title = m['title']
model.version = m['version'] model.version = m['version']
continue continue
model = TesseractOCRModel( model = TesseractOCRPipelineModel(
compatible_service_versions=m['compatible_service_versions'], compatible_service_versions=m['compatible_service_versions'],
description=m['description'], description=m['description'],
publisher=m['publisher'], publisher=m['publisher'],
@ -603,45 +621,99 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
return _json return _json
class TranskribusHTRModel(HashidMixin, db.Model): class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
__tablename__ = 'transkribus_htr_models' __tablename__ = 'spacy_nlp_pipeline_models'
# Primary key # Primary key
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
# Foreign keys # Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id')) user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields # Fields
title = db.Column(db.String(64))
description = db.Column(db.String(255))
version = db.Column(db.String(16))
compatible_service_versions = db.Column(ContainerColumn(list, 255))
publisher = db.Column(db.String(128))
publisher_url = db.Column(db.String(512))
publishing_url = db.Column(db.String(512))
publishing_year = db.Column(db.Integer)
shared = db.Column(db.Boolean, default=False) shared = db.Column(db.Boolean, default=False)
transkribus_model_id = db.Column(db.Integer)
# Backrefs: user: User # Backrefs: user: User
@property
def path(self):
return os.path.join(
self.user.path,
'spacy_nlp_pipeline_models',
str(self.id)
)
@staticmethod @staticmethod
def insert_defaults(): def insert_defaults():
nopaque_user = User.query.filter_by(username='nopaque').first() nopaque_user = User.query.filter_by(username='nopaque').first()
# models = [ defaults_file = os.path.join(
# m for m in TRANSKRIBUS_HTR_MODELS if True os.path.dirname(os.path.abspath(__file__)),
# and 'creator' in m and m['creator'] == 'Transkribus Team' 'SpaCyNLPPipelineModel.defaults.yml'
# and 'docType' in m and m['docType'] == 'handwritten' )
# ] with open(defaults_file, 'r') as f:
for m in TRANSKRIBUS_HTR_MODELS: defaults = yaml.safe_load(f)
model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa for m in defaults:
model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None: if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description']
model.publisher = m['publisher']
model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url']
model.publishing_year = m['publishing_year']
model.shared = True model.shared = True
model.transkribus_model_id = m['modelId'] model.title = m['title']
model.version = m['version']
continue continue
model = TranskribusHTRModel( model = SpaCyNLPPipelineModel(
transkribus_model_id=m['modelId'], compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
publisher=m['publisher'],
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
publishing_year=m['publishing_year'],
shared=True, shared=True,
title=m['title'],
user=nopaque_user, user=nopaque_user,
version=m['version']
) )
db.session.add(model) db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
model.filename = f'{model.id}.traineddata'
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
unit="B",
unit_scale=True,
unit_divisor=1024,
total=int(r.headers['Content-Length'])
)
pbar.clear()
with open(model.path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
pbar.update(len(chunk))
f.write(chunk)
pbar.close()
db.session.commit() db.session.commit()
def to_json(self, backrefs=False, relationships=False): def to_json(self, backrefs=False, relationships=False):
_json = { _json = {
'id': self.hashid, 'id': self.hashid,
'user_id': self.user.hashid, 'compatible_service_versions': self.compatible_service_versions,
'description': self.description,
'publisher': self.publisher,
'publisher_url': self.publisher_url,
'publishing_url': self.publishing_url,
'publishing_year': self.publishing_year,
'shared': self.shared, 'shared': self.shared,
'transkribus_model_id': self.transkribus_model_id, 'title': self.title,
**self.file_mixin_to_json()
} }
if backrefs: if backrefs:
_json['user'] = self.user.to_json(backrefs=True) _json['user'] = self.user.to_json(backrefs=True)
@ -691,26 +763,6 @@ class JobInput(FileMixin, HashidMixin, db.Model):
def user_id(self): def user_id(self):
return self.job.user_id return self.job.user_id
@staticmethod
def create(input_file, **kwargs):
filename = kwargs.get('filename', input_file.filename)
mimetype = kwargs.get('mimetype', input_file.mimetype)
job_input = JobInput(
filename=secure_filename(filename),
mimetype=mimetype,
**kwargs
)
db.session.add(job_input)
db.session.flush(objects=[job_input])
db.session.refresh(job_input)
try:
input_file.save(job_input.path)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise e
return job_input
def to_json(self, backrefs=False, relationships=False): def to_json(self, backrefs=False, relationships=False):
_json = { _json = {
'id': self.hashid, 'id': self.hashid,
@ -766,26 +818,6 @@ class JobResult(FileMixin, HashidMixin, db.Model):
def user_id(self): def user_id(self):
return self.job.user_id return self.job.user_id
@staticmethod
def create(input_file, **kwargs):
filename = kwargs.get('filename', input_file.filename)
mimetype = kwargs.get('mimetype', input_file.mimetype)
job_result = JobResult(
filename=secure_filename(filename),
mimetype=mimetype,
**kwargs
)
db.session.add(job_result)
db.session.flush(objects=[job_result])
db.session.refresh(job_result)
try:
input_file.save(job_result.path)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise e
return job_result
def to_json(self, backrefs=False, relationships=False): def to_json(self, backrefs=False, relationships=False):
_json = { _json = {
'id': self.hashid, 'id': self.hashid,
@ -1024,26 +1056,6 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
_json['corpus'] = self.corpus.to_json(backrefs=True) _json['corpus'] = self.corpus.to_json(backrefs=True)
return _json return _json
@staticmethod
def create(input_file, **kwargs):
filename = kwargs.pop('filename', input_file.filename)
mimetype = kwargs.pop('mimetype', input_file.mimetype)
corpus_file = CorpusFile(
filename=secure_filename(filename),
mimetype=mimetype,
**kwargs,
)
db.session.add(corpus_file)
db.session.flush(objects=[corpus_file])
db.session.refresh(corpus_file)
try:
input_file.save(corpus_file.path)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise e
return corpus_file
class Corpus(HashidMixin, db.Model): class Corpus(HashidMixin, db.Model):
''' '''
Class to define a corpus. Class to define a corpus.

View File

@ -10,11 +10,7 @@ from wtforms import (
ValidationError ValidationError
) )
from wtforms.validators import InputRequired, Length from wtforms.validators import InputRequired, Length
from app.models import ( from app.models import TesseractOCRPipelineModel
TRANSKRIBUS_HTR_MODELS,
TesseractOCRModel,
TranskribusHTRModel
)
from . import SERVICES from . import SERVICES
@ -77,7 +73,7 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
if 'disabled' in self.binarization.render_kw: if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled'] del self.binarization.render_kw['disabled']
models = [ models = [
x for x in TesseractOCRModel.query.filter().all() x for x in TesseractOCRPipelineModel.query.filter().all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
] ]
self.model.choices = [('', 'Choose your option')] self.model.choices = [('', 'Choose your option')]
@ -107,6 +103,7 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
raise ValidationError('PDF files only!') raise ValidationError('PDF files only!')
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
transkribus_htr_pipeline_models = kwargs.pop('transkribus_htr_pipeline_models', [])
service_manifest = SERVICES['transkribus-htr-pipeline'] service_manifest = SERVICES['transkribus-htr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version']) version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@ -118,12 +115,8 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
if 'binarization' in service_info['methods']: if 'binarization' in service_info['methods']:
if 'disabled' in self.binarization.render_kw: if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled'] del self.binarization.render_kw['disabled']
models = [
x for x in TranskribusHTRModel.query.filter().all()
if x.shared == True or x.user == current_user
]
self.model.choices = [('', 'Choose your option')] self.model.choices = [('', 'Choose your option')]
self.model.choices += [(x.hashid, [y['name'] for y in TRANSKRIBUS_HTR_MODELS if y['modelId'] == x.transkribus_model_id ][0]) for x in models] self.model.choices += [(x['modelId'], x['name']) for x in transkribus_htr_pipeline_models]
self.model.default = '' self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version self.version.data = version

View File

@ -1,13 +1,12 @@
from flask import abort, current_app, flash, Markup, render_template, request from flask import abort, current_app, flash, make_response, Markup, render_template, request
from flask_login import current_user, login_required from flask_login import current_user, login_required
import requests
from app import db, hashids from app import db, hashids
from app.models import ( from app.models import (
Job, Job,
JobInput, JobInput,
JobStatus, JobStatus,
TesseractOCRModel, TesseractOCRPipelineModel
TRANSKRIBUS_HTR_MODELS,
TranskribusHTRModel
) )
from . import bp, SERVICES from . import bp, SERVICES
from .forms import ( from .forms import (
@ -45,7 +44,7 @@ def file_setup_pipeline():
for input_file in form.images.data: for input_file in form.images.data:
try: try:
JobInput.create(input_file, job=job) JobInput.create(input_file, job=job)
except OSError: except (AttributeError, OSError):
abort(500) abort(500)
job.status = JobStatus.SUBMITTED job.status = JobStatus.SUBMITTED
db.session.commit() db.session.commit()
@ -88,21 +87,21 @@ def tesseract_ocr_pipeline():
abort(500) abort(500)
try: try:
JobInput.create(form.pdf.data, job=job) JobInput.create(form.pdf.data, job=job)
except OSError: except (AttributeError, OSError):
abort(500) abort(500)
job.status = JobStatus.SUBMITTED job.status = JobStatus.SUBMITTED
db.session.commit() db.session.commit()
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created') message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job') flash(message, 'job')
return {}, 201, {'Location': job.url} return {}, 201, {'Location': job.url}
tesseract_ocr_models = [ tesseract_ocr_pipeline_models = [
x for x in TesseractOCRModel.query.all() x for x in TesseractOCRPipelineModel.query.all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
] ]
return render_template( return render_template(
'services/tesseract_ocr_pipeline.html.j2', 'services/tesseract_ocr_pipeline.html.j2',
form=form, form=form,
tesseract_ocr_models=tesseract_ocr_models, tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
title=service_manifest['name'] title=service_manifest['name']
) )
@ -117,7 +116,18 @@ def transkribus_htr_pipeline():
version = request.args.get('version', service_manifest['latest_version']) version = request.args.get('version', service_manifest['latest_version'])
if version not in service_manifest['versions']: if version not in service_manifest['versions']:
abort(404) abort(404)
form = CreateTranskribusHTRPipelineJobForm(prefix='create-job-form', version=version) r = requests.get(
'https://transkribus.eu/TrpServer/rest/models/text',
headers={'Accept': 'application/json'}
)
if r.status_code != 200:
abort(500)
transkribus_htr_pipeline_models = r.json()['trpModelMetadata']
form = CreateTranskribusHTRPipelineJobForm(
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models,
prefix='create-job-form',
version=version
)
if form.is_submitted(): if form.is_submitted():
if not form.validate(): if not form.validate():
response = {'errors': form.errors} response = {'errors': form.errors}
@ -129,7 +139,7 @@ def transkribus_htr_pipeline():
service=service, service=service,
service_args={ service_args={
'binarization': form.binarization.data, 'binarization': form.binarization.data,
'model': hashids.decode(form.model.data) 'model': form.model.data
}, },
service_version=form.version.data, service_version=form.version.data,
user=current_user user=current_user
@ -138,23 +148,18 @@ def transkribus_htr_pipeline():
abort(500) abort(500)
try: try:
JobInput.create(form.pdf.data, job=job) JobInput.create(form.pdf.data, job=job)
except OSError: except (AttributeError, OSError):
abort(500) abort(500)
job.status = JobStatus.SUBMITTED job.status = JobStatus.SUBMITTED
db.session.commit() db.session.commit()
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created') message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job') flash(message, 'job')
return {}, 201, {'Location': job.url} return {}, 201, {'Location': job.url}
transkribus_htr_models = [
x for x in TranskribusHTRModel.query.all()
if x.shared == True or x.user == current_user
]
return render_template( return render_template(
'services/transkribus_htr_pipeline.html.j2', 'services/transkribus_htr_pipeline.html.j2',
form=form, form=form,
title=service_manifest['name'], title=service_manifest['name'],
TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS, transkribus_htr_pipeline_models=transkribus_htr_pipeline_models
transkribus_htr_models=transkribus_htr_models
) )
@ -187,7 +192,7 @@ def spacy_nlp_pipeline():
abort(500) abort(500)
try: try:
JobInput.create(form.txt.data, job=job) JobInput.create(form.txt.data, job=job)
except OSError: except (AttributeError, OSError):
abort(500) abort(500)
job.status = JobStatus.SUBMITTED job.status = JobStatus.SUBMITTED
db.session.commit() db.session.commit()

View File

@ -0,0 +1,146 @@
.modal-conent {
overflow-x: hidden;
}
#concordance-query-builder {
width: 70%;
}
#concordance-query-builder nav {
background-color: #6B3F89;
margin-top: -25px;
margin-left: -25px;
width: 105%;
}
#query-builder-nav{
padding-left: 15px;
}
#close-query-builder {
margin-right: 50px;
cursor: pointer;
}
#general-options-query-builder-tutorial-info-icon {
color: black;
}
#your-query {
border-bottom-style: solid;
border-bottom-width: 1px;
}
#insert-query-button {
background-color: #00426f;
text-align: center;
}
#structural-attr h6 {
margin-left: 15px;
}
#add-structural-attribute-tutorial-info-icon {
color: black;
}
#sentence {
background-color:#FD9720;
}
#entity {
background-color: #A6E22D;
}
#text-annotation {
background-color: #2FBBAB;
}
#no-value-metadata-message {
padding-top: 25px;
margin-left: -20px;
}
#token-kind-selector {
background-color: #f2eff7;
padding: 15px;
border-top-style: solid;
border-color: #6B3F89;
}
#token-kind-selector.s5 {
margin-top: 15px;
}
#token-kind-selector h6 {
margin-left: 15px;
}
#token-tutorial-info-icon {
color: black;
}
#no-value-message {
padding-top: 25px;
margin-left: -20px;
}
#token-edit-options h6 {
margin-left: 15px;
}
#edit-options-tutorial-info-icon {
color: black;
}
#incidence-modifiers-button a{
background-color: #2FBBAB;
}
#incidence-modifiers a{
background-color: white;
}
#ignore-case {
margin-left: 5px;
}
#or, #and {
background-color: #fc0;
}
#betweenNM {
width: 60%;
}
#query-builder-tutorial-modal {
width: 60%;
}
#query-builder-tutorial-modal ul {
margin-top: 10px;
}
#query-builder-tutorial {
padding:15px;
}
#scroll-up-button-query-builder-tutorial {
background-color: #28B3D1;
}
[data-type="start-sentence"], [data-type="end-sentence"] {
background-color: #FD9720;
}
[data-type="start-empty-entity"], [data-type="start-entity"], [data-type="end-entity"] {
background-color: #A6E22D;
}
[data-type="start-text-annotation"]{
background-color: #2FBBAB;
}
[data-type="token"] {
background-color: #28B3D1;
}

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,7 @@
<link href="{{ url_for('static', filename='css/materialize/sticky_footer.css') }}" media="screen,projection" rel="stylesheet"> <link href="{{ url_for('static', filename='css/materialize/sticky_footer.css') }}" media="screen,projection" rel="stylesheet">
<link href="{{ url_for('static', filename='css/materialize/fixes.css') }}" media="screen,projection" rel="stylesheet"> <link href="{{ url_for('static', filename='css/materialize/fixes.css') }}" media="screen,projection" rel="stylesheet">
<link href="{{ url_for('static', filename='css/nopaque_icons.css') }}" media="screen,projection" rel="stylesheet"> <link href="{{ url_for('static', filename='css/nopaque_icons.css') }}" media="screen,projection" rel="stylesheet">
<link href="{{ url_for('static', filename='css/queryBuilder.css') }}" media="screen,projection" rel="stylesheet">
{%- assets {%- assets
filters='pyscss', filters='pyscss',
output='gen/app.%(version)s.css', output='gen/app.%(version)s.css',

View File

@ -0,0 +1,32 @@
{% extends "base.html.j2" %}
{% import "materialize/wtf.html.j2" as wtf %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12 m8 offset-m2">
<h1 id="title">{{ title }}</h1>
<p>
In order to add a new model, please fill in the form below.
</p>
<form method="POST">
<div class="card-panel">
{{ form.hidden_tag() }}
{{ wtf.render_field(form.title) }}
{{ wtf.render_field(form.description) }}
{{ wtf.render_field(form.publisher) }}
{{ wtf.render_field(form.publisher_url) }}
{{ wtf.render_field(form.publishing_url) }}
{{ wtf.render_field(form.publishing_year) }}
{{ wtf.render_field(form.shared) }}
{{ wtf.render_field(form.version) }}
{{ wtf.render_field(form.compatible_service_versions) }}
{{ wtf.render_field(form.submit, class_='width-100', material_icon='send') }}
</div>
</form>
</div>
</div>
{% endblock page_content %}

View File

@ -64,9 +64,9 @@
</div> </div>
<div class="col s12 m3 l3 right-align"> <div class="col s12 m3 l3 right-align">
<p class="hide-on-small-only">&nbsp;</p> <p class="hide-on-small-only">&nbsp;</p>
<a class="btn disabled waves-effect waves-light"> <a class="btn waves-effect waves-light modal-trigger" href="#concordance-query-builder" id="concordance-query-builder-button">
<i class="material-icons left">build</i> <i class="material-icons left">build</i>
Query builder Query builder (beta)
</a> </a>
<button class="btn waves-effect waves-light corpus-analysis-action" id="concordance-extension-form-submit" type="submit" name="submit"> <button class="btn waves-effect waves-light corpus-analysis-action" id="concordance-extension-form-submit" type="submit" name="submit">
Send Send

View File

@ -248,6 +248,361 @@
</div> </div>
</div> </div>
</div> </div>
<div class="modal" id="concordance-query-builder">
<div class="modal-content">
<div>
<nav>
<div class="nav-wrapper" id="query-builder-nav">
<a href="#!" class="brand-logo"><i class="material-icons">build</i>Query Builder (beta)</a>
<i class="material-icons close right" id="close-query-builder">close</i>
<a class="modal-trigger" href="#query-builder-tutorial-modal" >
<i class="material-icons right tooltipped" id="query-builder-tutorial-info-icon" data-position="bottom" data-tooltip="Click here if you are unsure how to use the Query Builder <br>and want to find out what other options it offers.">help</i>
</a>
</div>
</nav>
</div>
<p></p>
<div id="query-container" class="hide">
<div class="row">
<h6 class="col s2">Your Query:
<a class="modal-trigger" href="#query-builder-tutorial-modal">
<i class="material-icons left" id="general-options-query-builder-tutorial-info-icon">help_outline</i></a>
</h6>
</div>
<div class="row">
<div class="col s10" id="your-query"></div>
<a class="btn-small waves-effect waves-teal col s1" id="insert-query-button">
<i class="material-icons">send</i>
</a>
</div>
<p><i> Preview:</i></p>
<p id="query-preview"></p>
<br>
</div>
<h6>Use the following options to build your query. If you need help, click on the question mark in the upper right corner!</h6>
<p></p>
<a class="btn-large waves-effect waves-light tooltipped" id="positional-attr-button" data-position="bottom" data-tooltip="Search for any token, for example a word, a lemma or a part-of-speech tag">Add new token to your query</a>
<a class="btn-large waves-effect waves-light tooltipped" id="structural-attr-button" data-position="bottom" data-tooltip="Structure your query with structural attributes, for example sentences, entities or annotate the text">Add structural attributes to your query</a>
<div id="structural-attr" class="hide">
<p></p>
<h6>Which structural attribute do you want to add to your query?<a class="modal-trigger" href="#query-builder-tutorial-modal"><i class="material-icons left" id="add-structural-attribute-tutorial-info-icon">help_outline</i></a></h6>
<p></p>
<div class="row">
<div class="col s12">
<a class="btn-small waves-effect waves-light" id="sentence">sentence</a>
<a class="btn-small waves-effect waves-light" id="entity">entity</a>
<a class="btn-small waves-effect waves-light" id="text-annotation">Meta Data</a>
</div>
</div>
<div id="entity-builder" class="hide">
<p></p>
<br>
<div class="row">
<a class="btn waves-effect waves-light col s4" id="empty-entity">Add Entity of any type</a>
<p class="col s1 l1"></p>
<div class= "input-field col s3">
<select name="englishenttype" id="english-ent-type">
<option value="" disabled selected>English ent_type</option>
<option value="CARDINAL">CARDINAL</option>
<option value="DATE">DATE</option>
<option value="EVENT">EVENT</option>
<option value="FAC">FAC</option>
<option value="GPE">GPE</option>
<option value="LANGUAGE">LANGUAGE</option>
<option value="LAW">LAW</option>
<option value="LOC">LOC</option>
<option value="MONEY">MONEY</option>
<option value="NORP">NORP</option>
<option value="ORDINAL">ORDINAL</option>
<option value="ORG">ORG</option>
<option value="PERCENT">PERCENT</option>
<option value="PERSON">PERSON</option>
<option value="PRODUCT">PRODUCT</option>
<option value="QUANTITY">QUANTITY</option>
<option value="TIME">TIME</option>
<option value="WORK_OF_ART">WORK_OF_ART</option>
</select>
<label>Entity Type</label>
</div>
<div class= "input-field col s3">
<select name="germanenttype" id="german-ent-type">
<option value="" disabled selected>German ent_type</option>
<option value="LOC">LOC</option>
<option value="MISC">MISC</option>
<option value="ORG">ORG</option>
<option value="PER">PER</option>
</select>
</div>
</div>
</div>
<div id="text-annotation-builder" class="hide">
<p></p>
<br>
<div class="row">
<div class= "input-field col s4 l3">
<select name="text-annotation-options" id="text-annotation-options">
<option class="btn-small waves-effect waves-light" value="address">address</option>
<option class="btn-small waves-effect waves-light" value="author">author</option>
<option class="btn-small waves-effect waves-light" value="booktitle">booktitle</option>
<option class="btn-small waves-effect waves-light" value="chapter">chapter</option>
<option class="btn-small waves-effect waves-light" value="editor">editor</option>
<option class="btn-small waves-effect waves-light" value="institution">institution</option>
<option class="btn-small waves-effect waves-light" value="journal">journal</option>
<option class="btn-small waves-effect waves-light" value="pages">pages</option>
<option class="btn-small waves-effect waves-light" value="publisher">publisher</option>
<option class="btn-small waves-effect waves-light" value="publishing_year">publishing year</option>
<option class="btn-small waves-effect waves-light" value="school">school</option>
<option class="btn-small waves-effect waves-light" value="title">title</option>
</select>
<label>Meta data</label>
</div>
<div class= "input-field col s7 l5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your text annotation" type="text" id="text-annotation-input">
</div>
<div class="col s1 l1 center-align">
<p class="btn-floating waves-effect waves-light" id="text-annotation-submit">
<i class="material-icons right">send</i>
</p>
</div>
<div class="hide" id="no-value-metadata-message"><i>No value entered!</i></div>
</div>
</div>
</div>
<div id="positional-attr" class="hide">
<p></p>
<div class="row" id="token-kind-selector">
<div class="col s5">
<h6>Which kind of token are you looking for? <a class="modal-trigger" href="#query-builder-tutorial-modal"><i class="material-icons left" id="token-tutorial-info-icon">help_outline</i></a></h6>
</div>
<div class="input-field col s3">
<select id="token-attr">
<option value="word" selected>word</option>
<option value="lemma">lemma</option>
<option value="english-pos">english pos</option>
<option value="german-pos">german pos</option>
<option value="simple-pos-button">simple_pos</option>
<option value="empty-token">empty token</option>
</select>
</div>
</div>
<p></p>
<div id="token-builder-content">
<div class="row" >
<div id="token-query"></div>
<div id="word-builder">
<div class= "input-field col s3 l4">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your word" type="text" id="word-input">
</div>
</div>
<div id="lemma-builder" class="hide" >
<div class= "input-field col s3 l4">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your lemma" type="text" id="lemma-input">
</div>
</div>
<div id="english-pos-builder" class="hide">
<div class="col s6 m4 l4">
<div class="row">
<div class= "input-field col s12">
<select name="englishpos" id="english-pos">
<option value="default" disabled selected>English pos tagset</option>
<option value="ADD">email</option>
<option value="AFX">affix</option>
<option value="CC">conjunction, coordinating</option>
<option value="CD">cardinal number</option>
<option value="DT">determiner</option>
<option value="EX">existential there</option>
<option value="FW">foreign word</option>
<option value="HYPH">punctuation mark, hyphen</option>
<option value="IN">conjunction, subordinating or preposition</option>
<option value="JJ">adjective</option>
<option value="JJR">adjective, comparative</option>
<option value="JJS">adjective, superlative</option>
</select>
<label>Part-of-speech tags</label>
</div>
</div>
</div>
</div>
<div id="german-pos-builder" class="hide">
<div class="col s6 m4 l4">
<div class="row">
<div class= "input-field col s12">
<select name="germanpos" id="german-pos">
<option value="default" disabled selected>German pos tagset</option>
<option value="ADJA">adjective, attributive</option>
<option value="ADJD">adjective, adverbial or predicative</option>
<option value="ADV">adverb</option>
<option value="APPO">postposition</option>
<option value="APPR">preposition; circumposition left</option>
<option value="APPRART">preposition with article</option>
<option value="APZR">circumposition right</option>
<option value="ART">definite or indefinite article</option>
</select>
<label>Part-of-speech tags</label>
</div>
</div>
</div>
</div>
<div id="simplepos-builder" class="hide">
<div class="col s6 m4 l4">
<div class="row">
<div class= "input-field col s12">
<select name="simplepos" id="simple-pos">
<option value="default" disabled selected>simple_pos tagset</option>
<option value="ADJ">adjective</option>
<option value="ADP">adposition</option>
<option value="ADV">adverb</option>
<option value="AUX">auxiliary verb</option>
<option value="CONJ">coordinating conjunction</option>
<option value="DET">determiner</option>
<option value="INTJ">interjection</option>
<option value="NOUN">noun</option>
<option value="NUM">numeral</option>
<option value="PART">particle</option>
<option value="PRON">pronoun</option>
<option value="PROPN">proper noun</option>
<option value="PUNCT">punctuation</option>
<option value="SCONJ">subordinating conjunction</option>
<option value="SYM">symbol</option>
<option value="VERB">verb</option>
<option value="X">other</option>
</select>
<label>Simple part-of-speech tags</label>
</div>
</div>
</div>
</div>
<div class="col s1 l1 center-align">
<p class="btn-floating waves-effect waves-light" id="token-submit">
<i class="material-icons right">send</i>
</p>
</div>
<div class="hide" id="no-value-message"><i>No value entered!</i></div>
</div>
<div id="token-edit-options">
<div class="row">
<h6>Options to edit your token: <a class="modal-trigger" href="#query-builder-tutorial-modal"><i class="material-icons left" id="edit-options-tutorial-info-icon">help_outline</i></a></h6>
</div>
<p></p>
<div class="row">
<div id="input-options" class="col s5 m5 l5 xl4">
<a id="wildcard-char" class="btn-small waves-effect waves-light tooltipped" data-position="top" data-tooltip="Look for a variable character (also called wildcard character)">Wildcard character</a>
<a id="option-group" class="btn-small waves-effect waves-light tooltipped" data-position="top" data-tooltip="Find character sequences from a list of options">Option Group</a>
</div>
<div class="col s3 m3 l3 xl3" id="incidence-modifiers-button">
<a class="dropdown-trigger btn-small waves-effect waves-light" href="#" data-target="incidence-modifiers" data-position="top" data-tooltip="Incidence Modifiers are special characters or patterns, <br>which determine how often a character represented previously should occur.">incidence modifiers</a>
</div>
<ul id="incidence-modifiers" class="dropdown-content">
<li><a id="one-or-more" data-token="+" class="tooltipped" data-position ="top" data-tooltip="...occurrences of the character/token before">one or more (+)</a></li>
<li><a id="zero-or-more" data-token="*" class="tooltipped" data-position ="top" data-tooltip="...occurrences of the character/token before">zero or more (*)</a></li>
<li><a id="zero-or-one" data-token="?" class="tooltipped" data-position ="top" data-tooltip="...occurrences of the character/token before">zero or one (?)</a></li>
<li><a id="exactly-n" class="modal-trigger tooltipped" href="#exactlyN" data-token="{n}" class="" data-position ="top" data-tooltip="...occurrences of the character/token before">exactly n ({n})</a></li>
<li><a id="between-n-m" class="modal-trigger tooltipped" href="#betweenNM" data-token="{n,m}" class="" data-position ="top" data-tooltip="...occurrences of the character/token before">between n and m ({n,m})</a></li>
</ul>
<div id="ignore-case-checkbox" class="col s2 m2 l2 xl2">
<p id="ignore-case">
<label>
<input type="checkbox" class="filled-in" />
<span>Ignore Case</span>
</label>
</p>
</div>
<div class="col s2 m2 l2 xl2" id="condition-container">
<a class="btn-small tooltipped waves-effect waves-light" id="or" data-position="bottom" data-tooltip="You can add another condition to your token. <br>At least one must be fulfilled">or</a>
<a class="btn-small tooltipped waves-effect waves-light" id="and" data-position="bottom" data-tooltip="You can add another condition to your token. <br>Both must be fulfilled">and</a>
</div>
</div>
</div>
</div>
<div id="exactlyN" class="modal">
<div class="row modal-content">
<div class="input-field col s10">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="type in a number for 'n'" type="text" id="n-input">
</div>
<div class="col s2">
<p class="btn-floating waves-effect waves-light" id="n-submit">
<i class="material-icons right">send</i>
</p>
</div>
</div>
</div>
<div id="betweenNM" class="modal">
<div class="row modal-content">
<div class= "input-field col s5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="number for 'n'" type="text" id="n-m-input">
</div>
<div class= "input-field col s5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="number for 'm'" type="text" id="m-input">
</div>
<div class="col s2">
<p class="btn-floating waves-effect waves-light" id="n-m-submit">
<i class="material-icons right">send</i>
</p>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="modal modal-fixed-footer" id="query-builder-tutorial-modal">
<div class="modal-content" >
<div id="query-builder-tutorial-start"></div>
<ul class="tabs">
<li class="tab"><a class="active" href="#query-builder-tutorial">Query Builder Tutorial</a></li>
{# <li class="tab"><a href="#qb-examples">Examples</a></li> #}
<li class="tab"><a href="#cql-cb-tutorial">Corpus Query Language Tutorial</a></li>
<li class="tab"><a href="#tagsets-cb-tutorial">Tagsets</a></li>
</ul>
<div id="query-builder-tutorial">
{% include "main/manual/_09_query_builder.html.j2" %}
</div>
{# <div id="qb-examples"></div> #}
<div id ="cql-cb-tutorial">
{% with headline_num=4 %}
{% include "main/manual/_08_cqp_query_language.html.j2" %}
{% endwith %}
</div>
<div id="tagsets-cb-tutorial">
<h4>Tagsets</h4>
{% include "main/manual/_10_tagsets.html.j2" %}
</div>
<div class="fixed-action-btn">
<a class="btn-floating btn-large teal" id="scroll-up-button-query-builder-tutorial" href='#query-builder-tutorial-start'>
<i class="large material-icons">arrow_upward</i>
</a>
</div>
</div>
</div>
{% endblock modals %} {% endblock modals %}
{% block scripts %} {% block scripts %}
@ -256,6 +611,7 @@
const corpusAnalysisApp = new CorpusAnalysisApp({{ corpus.hashid|tojson }}); const corpusAnalysisApp = new CorpusAnalysisApp({{ corpus.hashid|tojson }});
const corpusAnalysisConcordance = new CorpusAnalysisConcordance(corpusAnalysisApp); const corpusAnalysisConcordance = new CorpusAnalysisConcordance(corpusAnalysisApp);
const corpusAnalysisReader = new CorpusAnalysisReader(corpusAnalysisApp); const corpusAnalysisReader = new CorpusAnalysisReader(corpusAnalysisApp);
const concordanceQueryBuilder = new ConcordanceQueryBuilder();
corpusAnalysisApp.init(); corpusAnalysisApp.init();
</script> </script>

View File

@ -38,14 +38,14 @@ under the tab "Examples".</p>
Submit button on the right. You can also use the options below to modify your Submit button on the right. You can also use the options below to modify your
token request before pressing the submit button. These options are explained token request before pressing the submit button. These options are explained
further here.</p> further here.</p>
<img src="static/images/manual/query_builder/word_lemma.gif" alt="word and lemma explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/word_lemma.gif') }}" alt="word and lemma explanation" width="100%;" style="margin-bottom:20px;">
<br> <br>
<h4>English pos, german pos or simple_pos</h4> <h4>English pos, german pos or simple_pos</h4>
<p>You can choose between the options "english pos", "german pos" and <p>You can choose between the options "english pos", "german pos" and
"simple_pos" to search for different parts-of-speech. You can find an overview "simple_pos" to search for different parts-of-speech. You can find an overview
of all tags under the "Tagsets" tab.</p> of all tags under the "Tagsets" tab.</p>
<img src="static/images/manual/query_builder/pos.gif" alt="part-of-speech-tag explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/pos.gif') }}" alt="part-of-speech-tag explanation" width="100%;" style="margin-bottom:20px;">
<h4>Empty Token</h4> <h4>Empty Token</h4>
<p>Here you can search for an empty token. This selection should never stand <p>Here you can search for an empty token. This selection should never stand
@ -75,7 +75,7 @@ under the tab "Examples".</p>
<p>With an option group you can search for different variants of a token. The <p>With an option group you can search for different variants of a token. The
variants are not limited, so you can manually enter more options in the same variants are not limited, so you can manually enter more options in the same
format. "Option1" and "option2" must be replaced accordingly. </p> format. "Option1" and "option2" must be replaced accordingly. </p>
<img src="static/images/manual/query_builder/option_group.gif" alt="option group explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/option_group.gif') }}" alt="option group explanation" width="100%;" style="margin-bottom:20px;">
<p></p> <p></p>
<br> <br>
@ -100,7 +100,7 @@ under the tab "Examples".</p>
it will be displayed. Note that "and" is not responsible for lining up tokens in it will be displayed. Note that "and" is not responsible for lining up tokens in
this case. For this you can simply string them together: <br> this case. For this you can simply string them together: <br>
[word="I"] [word="will" & simple_pos="VERB"] [word="go"].</p> [word="I"] [word="will" & simple_pos="VERB"] [word="go"].</p>
<img src="static/images/manual/query_builder/or_and.gif" alt="part-of-speech-tag explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/or_and.gif') }}" alt="OR/AND explanation" width="100%;" style="margin-bottom:20px;">
<p></p> <p></p>
<br> <br>
</div> </div>
@ -134,7 +134,7 @@ under the tab "Examples".</p>
the respective abbreviations under the tab "Tagsets". <br> the respective abbreviations under the tab "Tagsets". <br>
You can also search for unspecified entities by selecting "Add entity of any type".</p> You can also search for unspecified entities by selecting "Add entity of any type".</p>
To close the entity query you started, you have to click the entity button one more time. This will make the <div class="chip" style="background-color:#A6E22D;">Entity End</div> element appear in your query. To close the entity query you started, you have to click the entity button one more time. This will make the <div class="chip" style="background-color:#A6E22D;">Entity End</div> element appear in your query.
<img src="static/images/manual/query_builder/entity.gif" alt="entity explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/entity.gif') }}" alt="entity explanation" width="100%;" style="margin-bottom:20px;">
<p></p> <p></p>
<br> <br>
@ -142,7 +142,7 @@ under the tab "Examples".</p>
<p>With the meta data you can annotate your text and add specific conditions. <p>With the meta data you can annotate your text and add specific conditions.
You can select a category on the left and enter your desired value on the right. You can select a category on the left and enter your desired value on the right.
The selected metadata will apply to your entire request and will be added at the end.</p> The selected metadata will apply to your entire request and will be added at the end.</p>
<img src="static/images/manual/query_builder/meta_data.gif" alt="meta data explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/meta_data.gif') }}" alt="meta data explanation" width="100%;" style="margin-bottom:20px;">
<p></p> <p></p>
<br> <br>
@ -158,11 +158,11 @@ under the tab "Examples".</p>
<h4>Deleting the elements</h4> <h4>Deleting the elements</h4>
<p>You can delete the added elements from the query by clicking the X behind the respective content.</p> <p>You can delete the added elements from the query by clicking the X behind the respective content.</p>
<img src="static/images/manual/query_builder/delete.gif" alt="delete explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/delete.gif') }}" alt="delete explanation" width="100%;" style="margin-bottom:20px;">
<h4>Move the elements of your query</h4> <h4>Move the elements of your query</h4>
<p>You can drag and drop elements to customize your query.</p> <p>You can drag and drop elements to customize your query.</p>
<img src="static/images/manual/query_builder/drag_and_drop.gif" alt="Drag&Drop explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/drag_and_drop.gif') }}" alt="Drag&Drop explanation" width="100%;" style="margin-bottom:20px;">
</div> </div>

View File

@ -160,8 +160,8 @@
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for m in tesseract_ocr_models %} {% for m in tesseract_ocr_pipeline_models %}
<tr id="tesseract-ocr-model-{{ m.hashid }}"> <tr id="tesseract-ocr-pipeline-model-{{ m.hashid }}">
<td>{{ m.title }}</td> <td>{{ m.title }}</td>
{% if m.description == '' %} {% if m.description == '' %}
<td>Description is not available.</td> <td>Description is not available.</td>

View File

@ -156,15 +156,13 @@
<div id="models-modal" class="modal"> <div id="models-modal" class="modal">
<div class="modal-content"> <div class="modal-content">
<h4>Transkribus HTR Pipeline models</h4> <h4>Transkribus HTR Pipeline models</h4>
<ul class="collapsible popout" id="transkribus-htr-models"> <ul class="collapsible popout" id="transkribus-htr-pipeline-models">
{% for m in transkribus_htr_models %} {% for x in transkribus_htr_pipeline_models %}
<li id="transkribus-htr-model-{{ m.hashid }}"> <li id="transkribus-htr-pipeline-model-{{ x.modelId }}">
{% for m_info in TRANSKRIBUS_HTR_MODELS if m_info['modelId'] == m.transkribus_model_id %} <div class="collapsible-header"><i class="material-icons">widgets</i>{{ x.name }}</div>
<div class="collapsible-header"><i class="material-icons">widgets</i>{{ m_info.name }}</div>
<div class="collapsible-body"> <div class="collapsible-body">
{{ m_info|tojson }} {{ x|tojson }}
</div> </div>
{% endfor %}
</li> </li>
{% endfor %} {% endfor %}
</ul> </ul>

View File

@ -4,7 +4,6 @@
a {color: #FFFFFF;} a {color: #FFFFFF;}
</style> </style>
{% block main_attribs %} class="service-scheme" data-service="corpus-analysis" id="corpus-analysis-app-container"{% endblock main_attribs %} {% block main_attribs %} class="service-scheme" data-service="corpus-analysis" id="corpus-analysis-app-container"{% endblock main_attribs %}
{% block page_content %} {% block page_content %}
@ -82,13 +81,13 @@
</div> </div>
<div class="modal" id="concordance-query-builder" style="width:70%;"> <div class="modal" id="concordance-query-builder">
<div class="modal-content" style="overflow-x:hidden;"> <div class="modal-content">
<div > <div>
<nav style="background-color:#6B3F89; margin-top:-25px; margin-left:-25px; width:105%;"> <nav>
<div class="nav-wrapper" style="padding-left:15px;"> <div class="nav-wrapper" id="query-builder-nav">
<a href="#!" class="brand-logo"><i class="material-icons">build</i>Query Builder</a> <a href="#!" class="brand-logo"><i class="material-icons">build</i>Query Builder</a>
<i class="material-icons close right" style="margin-right: 50px; cursor:pointer;" id="close-query-builder">close</i> <i class="material-icons close right" id="close-query-builder">close</i>
<a class="modal-trigger" href="#query-builder-tutorial-modal" > <a class="modal-trigger" href="#query-builder-tutorial-modal" >
<i class="material-icons right tooltipped" id="query-builder-tutorial-info-icon" data-position="bottom" data-tooltip="Click here if you are unsure how to use the Query Builder <br>and want to find out what other options it offers.">help</i> <i class="material-icons right tooltipped" id="query-builder-tutorial-info-icon" data-position="bottom" data-tooltip="Click here if you are unsure how to use the Query Builder <br>and want to find out what other options it offers.">help</i>
</a> </a>
@ -103,12 +102,12 @@
<div class="row"> <div class="row">
<h6 class="col s2">Your Query: <h6 class="col s2">Your Query:
<a class="modal-trigger" href="#query-builder-tutorial-modal"> <a class="modal-trigger" href="#query-builder-tutorial-modal">
<i class="material-icons left" id="general-options-query-builder-tutorial-info-icon" style="color:black;">help_outline</i></a> <i class="material-icons left" id="general-options-query-builder-tutorial-info-icon">help_outline</i></a>
</h6> </h6>
</div> </div>
<div class="row"> <div class="row">
<div class="col s10 tooltipped" id="your-query" style="border-bottom-style: solid; border-bottom-width:1px;" data-position="bottom" data-tooltip="You can edit your query by deleting individual elements or moving them via drag and drop."></div> <div class="col s10" id="your-query" data-position="bottom" data-tooltip="You can edit your query by deleting individual elements or moving them via drag and drop."></div>
<a class="btn-small waves-effect waves-teal col s1" id="insert-query-button" style="background-color:#00426f; text-align:center"> <a class="btn-small waves-effect waves-teal col s1" id="insert-query-button">
<i class="material-icons">send</i> <i class="material-icons">send</i>
</a> </a>
</div> </div>
@ -125,13 +124,13 @@
<div id="structural-attr" class="hide"> <div id="structural-attr" class="hide">
<p></p> <p></p>
<h6 style="margin-left:15px;">Which structural attribute do you want to add to your query?<a class="modal-trigger" href="#query-builder-tutorial-modal"><i class="material-icons left" id="add-structural-attribute-tutorial-info-icon" style="color:black;">help_outline</i></a></h6> <h6>Which structural attribute do you want to add to your query?<a class="modal-trigger" href="#query-builder-tutorial-modal"><i class="material-icons left" id="add-structural-attribute-tutorial-info-icon">help_outline</i></a></h6>
<p></p> <p></p>
<div class="row"> <div class="row">
<div class="col s12"> <div class="col s12">
<a class="btn-small waves-effect waves-light" id="sentence" style="background-color:#FD9720">sentence</a> <a class="btn-small waves-effect waves-light" id="sentence">sentence</a>
<a class="btn-small waves-effect waves-light" id="entity" style="background-color:#A6E22D">entity</a> <a class="btn-small waves-effect waves-light" id="entity">entity</a>
<a class="btn-small waves-effect waves-light" id="text-annotation" style="background-color:#2FBBAB">Meta Data</a> <a class="btn-small waves-effect waves-light" id="text-annotation">Meta Data</a>
</div> </div>
</div> </div>
@ -209,7 +208,7 @@
<i class="material-icons right">send</i> <i class="material-icons right">send</i>
</p> </p>
</div> </div>
<div class="hide" id="no-value-metadata-message" style="padding-top:25px; margin-left:-20px;"><i>No value entered!</i></div> <div class="hide" id="no-value-metadata-message"><i>No value entered!</i></div>
</div> </div>
</div> </div>
@ -217,9 +216,9 @@
<div id="positional-attr" class="hide"> <div id="positional-attr" class="hide">
<p></p> <p></p>
<div class="row" style="background-color:#f2eff7; padding:15px; border-top-style: solid; border-color:#6B3F89"> <div class="row" id="token-kind-selector">
<div class="col s5" style="margin-top:15px;"> <div class="col s5">
<h6 style="margin-left:15px;">Which kind of token are you looking for? <a class="modal-trigger" href="#query-builder-tutorial-modal"><i class="material-icons left" id="token-tutorial-info-icon" style="color:black;">help_outline</i></a></h6> <h6>Which kind of token are you looking for? <a class="modal-trigger" href="#query-builder-tutorial-modal"><i class="material-icons left" id="token-tutorial-info-icon">help_outline</i></a></h6>
</div> </div>
<div class="input-field col s3"> <div class="input-field col s3">
<select id="token-attr"> <select id="token-attr">
@ -331,13 +330,13 @@
<i class="material-icons right">send</i> <i class="material-icons right">send</i>
</p> </p>
</div> </div>
<div class="hide" id="no-value-message" style="padding-top:25px; margin-left:-20px;"><i>No value entered!</i></div> <div class="hide" id="no-value-message"><i>No value entered!</i></div>
</div> </div>
<div id="token-edit-options"> <div id="token-edit-options">
<div class="row"> <div class="row">
<h6 style="margin-left:15px;">Options to edit your token: <a class="modal-trigger" href="#query-builder-tutorial-modal" style="color:black"><i class="material-icons left" id="edit-options-tutorial-info-icon">help_outline</i></a></h6> <h6>Options to edit your token: <a class="modal-trigger" href="#query-builder-tutorial-modal"><i class="material-icons left" id="edit-options-tutorial-info-icon">help_outline</i></a></h6>
</div> </div>
<p></p> <p></p>
<div class="row"> <div class="row">
@ -346,7 +345,7 @@
<a id="option-group" class="btn-small waves-effect waves-light tooltipped" data-position="top" data-tooltip="Find character sequences from a list of options">Option Group</a> <a id="option-group" class="btn-small waves-effect waves-light tooltipped" data-position="top" data-tooltip="Find character sequences from a list of options">Option Group</a>
</div> </div>
<div class="col s3 m3 l3 xl3" id="incidence-modifiers-button"> <div class="col s3 m3 l3 xl3" id="incidence-modifiers-button">
<a class="dropdown-trigger btn-small waves-effect waves-light" href="#" data-target="incidence-modifiers" style="background-color:#2fbbab" data-position="top" data-tooltip="Incidence Modifiers are special characters or patterns, <br>which determine how often a character represented previously should occur.">incidence modifiers</a> <a class="dropdown-trigger btn-small waves-effect waves-light" href="#" data-target="incidence-modifiers" data-position="top" data-tooltip="Incidence Modifiers are special characters or patterns, <br>which determine how often a character represented previously should occur.">incidence modifiers</a>
</div> </div>
<ul id="incidence-modifiers" class="dropdown-content"> <ul id="incidence-modifiers" class="dropdown-content">
@ -358,7 +357,7 @@
</ul> </ul>
<div id="ignore-case-checkbox" class="col s2 m2 l2 xl2"> <div id="ignore-case-checkbox" class="col s2 m2 l2 xl2">
<p id="ignore-case" style="margin-top:5px;"> <p id="ignore-case">
<label> <label>
<input type="checkbox" class="filled-in" /> <input type="checkbox" class="filled-in" />
<span>Ignore Case</span> <span>Ignore Case</span>
@ -366,17 +365,17 @@
</p> </p>
</div> </div>
<div class="col s2 m2 l2 xl2" id="condition-container"> <div class="col s2 m2 l2 xl2" id="condition-container">
<a class="btn-small tooltipped waves-effect waves-light" id="or" style="background-color:#fc0" data-position="bottom" data-tooltip="You can add another condition to your token. <br>At least one must be fulfilled">or</a> <a class="btn-small tooltipped waves-effect waves-light" id="or" data-position="bottom" data-tooltip="You can add another condition to your token. <br>At least one must be fulfilled">or</a>
<a class="btn-small tooltipped waves-effect waves-light" id="and" style="background-color:#fc0" data-position="bottom" data-tooltip="You can add another condition to your token. <br>Both must be fulfilled">and</a> <a class="btn-small tooltipped waves-effect waves-light" id="and" data-position="bottom" data-tooltip="You can add another condition to your token. <br>Both must be fulfilled">and</a>
</div> </div>
</div> </div>
</div> </div>
</div> </div>
<div id ="exactlyN" class="modal"> <div id="exactlyN" class="modal">
<div class="row modal-content"> <div class="row modal-content">
<div class= "input-field col s10"> <div class="input-field col s10">
<i class="material-icons prefix">mode_edit</i> <i class="material-icons prefix">mode_edit</i>
<input placeholder="type in a number for 'n'" type="text" id="n-input"> <input placeholder="type in a number for 'n'" type="text" id="n-input">
</div> </div>
@ -388,7 +387,7 @@
</div> </div>
</div> </div>
<div id ="betweenNM" class="modal" style="width:60%;"> <div id="betweenNM" class="modal">
<div class="row modal-content"> <div class="row modal-content">
<div class= "input-field col s5"> <div class= "input-field col s5">
<i class="material-icons prefix">mode_edit</i> <i class="material-icons prefix">mode_edit</i>
@ -410,17 +409,17 @@
</div> </div>
</div> </div>
<div class="modal modal-fixed-footer" id="query-builder-tutorial-modal" style="width:60%;"> <div class="modal modal-fixed-footer" id="query-builder-tutorial-modal">
<div class="modal-content" > <div class="modal-content" >
<div id="query-builder-tutorial-start"></div> <div id="query-builder-tutorial-start"></div>
<ul class="tabs" style="margin-top:10px;"> <ul class="tabs">
<li class="tab"><a class="active" href="#query-builder-tutorial">Query Builder Tutorial</a></li> <li class="tab"><a class="active" href="#query-builder-tutorial">Query Builder Tutorial</a></li>
{# <li class="tab"><a href="#qb-examples">Examples</a></li> #} {# <li class="tab"><a href="#qb-examples">Examples</a></li> #}
<li class="tab"><a href="#cql-cb-tutorial">Corpus Query Language Tutorial</a></li> <li class="tab"><a href="#cql-cb-tutorial">Corpus Query Language Tutorial</a></li>
<li class="tab"><a href="#tagsets-cb-tutorial">Tagsets</a></li> <li class="tab"><a href="#tagsets-cb-tutorial">Tagsets</a></li>
</ul> </ul>
<div id="query-builder-tutorial" style="padding:15px;"> <div id="query-builder-tutorial">
{% include "main/manual/_09_query_builder.html.j2" %} {% include "main/manual/_09_query_builder.html.j2" %}
</div> </div>
{# <div id="qb-examples"></div> #} {# <div id="qb-examples"></div> #}
@ -434,7 +433,7 @@
{% include "main/manual/_10_tagsets.html.j2" %} {% include "main/manual/_10_tagsets.html.j2" %}
</div> </div>
<div class="fixed-action-btn"> <div class="fixed-action-btn">
<a class="btn-floating btn-large teal" id="scroll-up-button-query-builder-tutorial" href='#query-builder-tutorial-start' style="background:#28B3D1"> <a class="btn-floating btn-large teal" id="scroll-up-button-query-builder-tutorial" href='#query-builder-tutorial-start'>
<i class="large material-icons">arrow_upward</i> <i class="large material-icons">arrow_upward</i>
</a> </a>
</div> </div>

View File

@ -11,6 +11,9 @@ load_dotenv(os.path.join(basedir, '.env'))
class Config: class Config:
''' Docker '''
DOCKER_NETWORK_NAME = os.environ.get('DOCKER_NETWORK_NAME', 'nopaque_default')
''' APIFairy ''' ''' APIFairy '''
APIFAIRY_TITLE = 'nopaque' APIFAIRY_TITLE = 'nopaque'
APIFAIRY_VERSION = '0.0.1' APIFAIRY_VERSION = '0.0.1'

View File

@ -1,5 +1,9 @@
version: "3.5" version: "3.5"
networks:
default:
name: "${DOCKER_NETWORK_NAME:-nopaque_default}"
services: services:
db: db:
env_file: db.env env_file: db.env

View File

@ -17,8 +17,8 @@ depends_on = None
def upgrade(): def upgrade():
# ### commands auto generated by Alembic - please adjust! ### op.create_table(
op.create_table('tokens', 'tokens',
sa.Column('id', sa.Integer(), nullable=False), sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=True), sa.Column('user_id', sa.Integer(), nullable=True),
sa.Column('access_token', sa.String(length=64), nullable=True), sa.Column('access_token', sa.String(length=64), nullable=True),
@ -30,12 +30,9 @@ def upgrade():
) )
op.create_index(op.f('ix_tokens_access_token'), 'tokens', ['access_token'], unique=False) op.create_index(op.f('ix_tokens_access_token'), 'tokens', ['access_token'], unique=False)
op.create_index(op.f('ix_tokens_refresh_token'), 'tokens', ['refresh_token'], unique=False) op.create_index(op.f('ix_tokens_refresh_token'), 'tokens', ['refresh_token'], unique=False)
# ### end Alembic commands ###
def downgrade(): def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_tokens_refresh_token'), table_name='tokens') op.drop_index(op.f('ix_tokens_refresh_token'), table_name='tokens')
op.drop_index(op.f('ix_tokens_access_token'), table_name='tokens') op.drop_index(op.f('ix_tokens_access_token'), table_name='tokens')
op.drop_table('tokens') op.drop_table('tokens')
# ### end Alembic commands ###

View File

@ -17,12 +17,10 @@ depends_on = None
def upgrade(): def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('transkribus_htr_models', 'transkribus_name') op.drop_column('transkribus_htr_models', 'transkribus_name')
# ### end Alembic commands ###
def downgrade(): def downgrade():
# ### commands auto generated by Alembic - please adjust! ### op.add_column('transkribus_htr_models',
op.add_column('transkribus_htr_models', sa.Column('transkribus_name', sa.VARCHAR(length=64), autoincrement=False, nullable=True)) sa.Column('transkribus_name', sa.String(length=64), autoincrement=False, nullable=True)
# ### end Alembic commands ### )

View File

@ -17,13 +17,12 @@ depends_on = None
def upgrade(): def upgrade():
# ### commands auto generated by Alembic - please adjust! ### op.add_column(
op.add_column('corpora', sa.Column('is_public', sa.Boolean(), nullable=True)) 'corpora',
sa.Column('is_public', sa.Boolean(), nullable=True)
)
op.execute('UPDATE corpora SET is_public = false;') op.execute('UPDATE corpora SET is_public = false;')
# ### end Alembic commands ###
def downgrade(): def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('corpora', 'is_public') op.drop_column('corpora', 'is_public')
# ### end Alembic commands ###

View File

@ -0,0 +1,63 @@
"""Add spacy_nlp_pipeline_models table
Revision ID: 31dd42e5ea6f
Revises: a3b727e3ff71
Create Date: 2022-10-13 12:47:50.870474
"""
from alembic import op
import shutil
import sqlalchemy as sa
import os
from app.models import User
# revision identifiers, used by Alembic.
revision = '31dd42e5ea6f'
down_revision = 'a3b727e3ff71'
branch_labels = None
depends_on = None
def upgrade():
for user in User.query.all():
spacy_nlp_pipeline_models_dir = os.path.join(user.path, 'spacy_nlp_pipeline_models')
if os.path.exists(spacy_nlp_pipeline_models_dir):
if not os.path.isdir(spacy_nlp_pipeline_models_dir):
raise OSError(f'Not a directory: {spacy_nlp_pipeline_models_dir}')
if not os.listdir(spacy_nlp_pipeline_models_dir):
raise OSError(f'Directory not empty: {spacy_nlp_pipeline_models_dir}')
else:
os.mkdir(spacy_nlp_pipeline_models_dir)
op.create_table(
'spacy_nlp_pipeline_models',
sa.Column('creation_date', sa.DateTime(), nullable=True),
sa.Column('filename', sa.String(length=255), nullable=True),
sa.Column('last_edited_date', sa.DateTime(), nullable=True),
sa.Column('mimetype', sa.String(length=255), nullable=True),
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=True),
sa.Column('title', sa.String(length=64), nullable=True),
sa.Column('description', sa.String(length=255), nullable=True),
sa.Column('version', sa.String(length=16), nullable=True),
sa.Column('compatible_service_versions', sa.String(length=255), nullable=True),
sa.Column('publisher', sa.String(length=128), nullable=True),
sa.Column('publisher_url', sa.String(length=512), nullable=True),
sa.Column('publishing_url', sa.String(length=512), nullable=True),
sa.Column('publishing_year', sa.Integer(), nullable=True),
sa.Column('shared', sa.Boolean(), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
sa.PrimaryKeyConstraint('id')
)
def downgrade():
for user in User.query.all():
spacy_nlp_pipeline_models_dir = os.path.join(user.path, 'spacy_nlp_pipeline_models')
if os.path.exists(spacy_nlp_pipeline_models_dir):
shutil.rmtree(spacy_nlp_pipeline_models_dir)
op.drop_table('spacy_nlp_pipeline_models')

View File

@ -0,0 +1,36 @@
"""Rename pipeline model tables
Revision ID: 63b2cc26a01f
Revises: 260b57d5f4e7
Create Date: 2022-10-11 14:32:13.227364
"""
from genericpath import isdir
from alembic import op
import os
from app.models import User
# revision identifiers, used by Alembic.
revision = '63b2cc26a01f'
down_revision = '260b57d5f4e7'
branch_labels = None
depends_on = None
def upgrade():
for user in User.query.all():
old_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_models')
new_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_pipeline_models')
os.rename(old_tesseract_ocr_pipeline_model_path, new_tesseract_ocr_pipeline_model_path)
op.rename_table('tesseract_ocr_models', 'tesseract_ocr_pipeline_models')
op.rename_table('transkribus_htr_models', 'transkribus_htr_pipeline_models')
def downgrade():
for user in User.query.all():
old_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_models')
new_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_pipeline_models')
os.rename(old_tesseract_ocr_pipeline_model_path, new_tesseract_ocr_pipeline_model_path)
os.rename(new_tesseract_ocr_pipeline_model_path, old_tesseract_ocr_pipeline_model_path)
op.rename_table('tesseract_ocr_pipeline_models', 'tesseract_ocr_models')
op.rename_table('transkribus_htr_pipeline_models', 'transkribus_htr_models')

View File

@ -6,7 +6,10 @@ Create Date: 2022-04-22 09:38:49.527498
""" """
from alembic import op from alembic import op
from flask import current_app
import sqlalchemy as sa import sqlalchemy as sa
import os
import shutil
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
@ -17,8 +20,18 @@ depends_on = None
def upgrade(): def upgrade():
# ### commands auto generated by Alembic - please adjust! ### users_dir = os.path.join(current_app.config['NOPAQUE_DATA_DIR'], 'users')
op.create_table('roles', if os.path.exists(users_dir):
if not os.path.isdir(users_dir):
raise OSError(f'Not a directory: {users_dir}')
if not os.listdir(users_dir):
raise OSError(f'Directory not empty: {users_dir}')
else:
os.mkdir(users_dir)
op.create_table(
'roles',
sa.Column('id', sa.Integer(), nullable=False), sa.Column('id', sa.Integer(), nullable=False),
sa.Column('default', sa.Boolean(), nullable=True), sa.Column('default', sa.Boolean(), nullable=True),
sa.Column('name', sa.String(length=64), nullable=True), sa.Column('name', sa.String(length=64), nullable=True),
@ -27,7 +40,9 @@ def upgrade():
sa.UniqueConstraint('name') sa.UniqueConstraint('name')
) )
op.create_index(op.f('ix_roles_default'), 'roles', ['default'], unique=False) op.create_index(op.f('ix_roles_default'), 'roles', ['default'], unique=False)
op.create_table('users',
op.create_table(
'users',
sa.Column('id', sa.Integer(), nullable=False), sa.Column('id', sa.Integer(), nullable=False),
sa.Column('role_id', sa.Integer(), nullable=True), sa.Column('role_id', sa.Integer(), nullable=True),
sa.Column('confirmed', sa.Boolean(), nullable=True), sa.Column('confirmed', sa.Boolean(), nullable=True),
@ -46,7 +61,9 @@ def upgrade():
op.create_index(op.f('ix_users_email'), 'users', ['email'], unique=True) op.create_index(op.f('ix_users_email'), 'users', ['email'], unique=True)
op.create_index(op.f('ix_users_token'), 'users', ['token'], unique=True) op.create_index(op.f('ix_users_token'), 'users', ['token'], unique=True)
op.create_index(op.f('ix_users_username'), 'users', ['username'], unique=True) op.create_index(op.f('ix_users_username'), 'users', ['username'], unique=True)
op.create_table('corpora',
op.create_table(
'corpora',
sa.Column('id', sa.Integer(), nullable=False), sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=True), sa.Column('user_id', sa.Integer(), nullable=True),
sa.Column('creation_date', sa.DateTime(), nullable=True), sa.Column('creation_date', sa.DateTime(), nullable=True),
@ -59,7 +76,9 @@ def upgrade():
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ), sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
sa.PrimaryKeyConstraint('id') sa.PrimaryKeyConstraint('id')
) )
op.create_table('jobs',
op.create_table(
'jobs',
sa.Column('id', sa.Integer(), nullable=False), sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=True), sa.Column('user_id', sa.Integer(), nullable=True),
sa.Column('creation_date', sa.DateTime(), nullable=True), sa.Column('creation_date', sa.DateTime(), nullable=True),
@ -73,7 +92,9 @@ def upgrade():
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ), sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
sa.PrimaryKeyConstraint('id') sa.PrimaryKeyConstraint('id')
) )
op.create_table('tesseract_ocr_models',
op.create_table(
'tesseract_ocr_models',
sa.Column('creation_date', sa.DateTime(), nullable=True), sa.Column('creation_date', sa.DateTime(), nullable=True),
sa.Column('filename', sa.String(length=255), nullable=True), sa.Column('filename', sa.String(length=255), nullable=True),
sa.Column('last_edited_date', sa.DateTime(), nullable=True), sa.Column('last_edited_date', sa.DateTime(), nullable=True),
@ -92,7 +113,9 @@ def upgrade():
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ), sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
sa.PrimaryKeyConstraint('id') sa.PrimaryKeyConstraint('id')
) )
op.create_table('transkribus_htr_models',
op.create_table(
'transkribus_htr_models',
sa.Column('id', sa.Integer(), nullable=False), sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=True), sa.Column('user_id', sa.Integer(), nullable=True),
sa.Column('shared', sa.Boolean(), nullable=True), sa.Column('shared', sa.Boolean(), nullable=True),
@ -101,7 +124,9 @@ def upgrade():
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ), sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
sa.PrimaryKeyConstraint('id') sa.PrimaryKeyConstraint('id')
) )
op.create_table('corpus_files',
op.create_table(
'corpus_files',
sa.Column('creation_date', sa.DateTime(), nullable=True), sa.Column('creation_date', sa.DateTime(), nullable=True),
sa.Column('filename', sa.String(length=255), nullable=True), sa.Column('filename', sa.String(length=255), nullable=True),
sa.Column('last_edited_date', sa.DateTime(), nullable=True), sa.Column('last_edited_date', sa.DateTime(), nullable=True),
@ -123,7 +148,9 @@ def upgrade():
sa.ForeignKeyConstraint(['corpus_id'], ['corpora.id'], ), sa.ForeignKeyConstraint(['corpus_id'], ['corpora.id'], ),
sa.PrimaryKeyConstraint('id') sa.PrimaryKeyConstraint('id')
) )
op.create_table('job_inputs',
op.create_table(
'job_inputs',
sa.Column('creation_date', sa.DateTime(), nullable=True), sa.Column('creation_date', sa.DateTime(), nullable=True),
sa.Column('filename', sa.String(length=255), nullable=True), sa.Column('filename', sa.String(length=255), nullable=True),
sa.Column('last_edited_date', sa.DateTime(), nullable=True), sa.Column('last_edited_date', sa.DateTime(), nullable=True),
@ -133,7 +160,9 @@ def upgrade():
sa.ForeignKeyConstraint(['job_id'], ['jobs.id'], ), sa.ForeignKeyConstraint(['job_id'], ['jobs.id'], ),
sa.PrimaryKeyConstraint('id') sa.PrimaryKeyConstraint('id')
) )
op.create_table('job_results',
op.create_table(
'job_results',
sa.Column('creation_date', sa.DateTime(), nullable=True), sa.Column('creation_date', sa.DateTime(), nullable=True),
sa.Column('filename', sa.String(length=255), nullable=True), sa.Column('filename', sa.String(length=255), nullable=True),
sa.Column('last_edited_date', sa.DateTime(), nullable=True), sa.Column('last_edited_date', sa.DateTime(), nullable=True),
@ -144,11 +173,14 @@ def upgrade():
sa.ForeignKeyConstraint(['job_id'], ['jobs.id'], ), sa.ForeignKeyConstraint(['job_id'], ['jobs.id'], ),
sa.PrimaryKeyConstraint('id') sa.PrimaryKeyConstraint('id')
) )
# ### end Alembic commands ###
def downgrade(): def downgrade():
# ### commands auto generated by Alembic - please adjust! ### users_dir = os.path.join(current_app.config['NOPAQUE_DATA_DIR'], 'users')
if os.path.exists(users_dir):
shutil.rmtree(users_dir)
op.drop_table('job_results') op.drop_table('job_results')
op.drop_table('job_inputs') op.drop_table('job_inputs')
op.drop_table('corpus_files') op.drop_table('corpus_files')
@ -162,4 +194,3 @@ def downgrade():
op.drop_table('users') op.drop_table('users')
op.drop_index(op.f('ix_roles_default'), table_name='roles') op.drop_index(op.f('ix_roles_default'), table_name='roles')
op.drop_table('roles') op.drop_table('roles')
# ### end Alembic commands ###

View File

@ -0,0 +1,32 @@
"""Remove transkribus_htr_pipeline_models table
Revision ID: a3b727e3ff71
Revises: 63b2cc26a01f
Create Date: 2022-10-12 13:08:19.065218
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'a3b727e3ff71'
down_revision = '63b2cc26a01f'
branch_labels = None
depends_on = None
def upgrade():
op.drop_table('transkribus_htr_pipeline_models')
def downgrade():
op.create_table(
'transkribus_htr_pipeline_models',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
sa.Column('user_id', sa.Integer(), autoincrement=False, nullable=True),
sa.Column('shared', sa.Boolean(), autoincrement=False, nullable=True),
sa.Column('transkribus_model_id', sa.Integer(), autoincrement=False, nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], name='transkribus_htr_models_user_id_fkey'),
sa.PrimaryKeyConstraint('id', name='transkribus_htr_models_pkey')
)

View File

@ -16,16 +16,18 @@ depends_on = None
def upgrade(): def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('ix_users_token', table_name='users') op.drop_index('ix_users_token', table_name='users')
op.drop_column('users', 'token') op.drop_column('users', 'token')
op.drop_column('users', 'token_expiration') op.drop_column('users', 'token_expiration')
# ### end Alembic commands ###
def downgrade(): def downgrade():
# ### commands auto generated by Alembic - please adjust! ### op.add_column(
op.add_column('users', sa.Column('token_expiration', sa.DateTime(), autoincrement=False, nullable=True)) 'users',
op.add_column('users', sa.Column('token', sa.VARCHAR(length=32), autoincrement=False, nullable=True)) sa.Column('token_expiration', sa.DateTime(), autoincrement=False, nullable=True)
)
op.add_column(
'users',
sa.Column('token', sa.String(length=32), autoincrement=False, nullable=True)
)
op.create_index('ix_users_token', 'users', ['token'], unique=False) op.create_index('ix_users_token', 'users', ['token'], unique=False)
# ### end Alembic commands ###

View File

@ -12,8 +12,8 @@ from app.models import (
JobResult, JobResult,
Permission, Permission,
Role, Role,
TesseractOCRModel, TesseractOCRPipelineModel,
TranskribusHTRModel, SpaCyNLPPipelineModel,
User User
) # noqa ) # noqa
from flask import Flask # noqa from flask import Flask # noqa
@ -42,8 +42,8 @@ def make_shell_context() -> Dict[str, Any]:
'JobResult': JobResult, 'JobResult': JobResult,
'Permission': Permission, 'Permission': Permission,
'Role': Role, 'Role': Role,
'TesseractOCRModel': TesseractOCRModel, 'TesseractOCRPipelineModel': TesseractOCRPipelineModel,
'TranskribusHTRModel': TranskribusHTRModel, 'SpaCyNLPPipelineModel': SpaCyNLPPipelineModel,
'User': User 'User': User
} }