66 Commits

Author SHA1 Message Date
713a7645db Bump nopaque version 2024-12-05 15:34:11 +01:00
0c64c07925 Update corpus analysis loading modal 2024-12-05 15:33:15 +01:00
a6ddf4c980 Remove import corpus button 2024-12-05 15:12:53 +01:00
cab5f7ea05 More js enhancements 2024-12-05 15:07:13 +01:00
07f09cdbd9 fix cqi_over_socketio 2024-12-05 15:07:03 +01:00
c97b2a886e Further js refactoring 2024-12-05 14:26:05 +01:00
df2bffe0fd implement first version of jobs socketio namespace 2024-12-03 16:09:14 +01:00
aafb3ca3ec Update javascript app structure 2024-12-03 15:59:08 +01:00
12a3ac1d5d Update JS code structure 2024-12-02 09:34:17 +01:00
a2904caea2 Update cqpserver image version 2024-11-28 10:02:27 +01:00
e325552100 Update corpus analysis tabs to look the same as before base template update 2024-11-28 10:02:00 +01:00
e269156925 fix socketio emits from database event listeners 2024-11-27 15:46:54 +01:00
9c9de242ca Remove unsed css 2024-11-27 11:35:51 +01:00
ec54fdc3bb Restore service scheme on pages 2024-11-27 11:34:21 +01:00
2263a8d27d codestyle enhancements in base template 2024-11-21 11:22:57 +01:00
143cdd91f9 update workspace settings 2024-11-21 11:22:46 +01:00
b5f7478e14 Update templates 2024-11-21 11:12:11 +01:00
a95b8d979d Fix forms 2024-11-20 15:56:48 +01:00
18d5ab160e Optimize jinja wtf macros 2024-11-20 15:56:29 +01:00
7439edacef Add background color to job list entries 2024-11-20 15:55:59 +01:00
99d7a8bdfc Some fixes and improve jinja2 template performance by reducing include statements 2024-11-19 15:28:43 +01:00
54c4295bf7 Fixes and more descriptions 2024-11-18 13:32:55 +01:00
1e5c26b8e3 Reorganize Socket.IO code 2024-11-18 12:36:37 +01:00
9f56647cf7 highlight active items in top navbar 2024-11-18 12:35:53 +01:00
460257294d Use relative import for sub blueprints 2024-11-18 11:08:28 +01:00
2c43333c94 Check tos accepted in registration form 2024-11-18 11:03:29 +01:00
fc8b11fa66 update auth package 2024-11-15 16:07:29 +01:00
a8ab1bee71 Move some blueprints and rename routes 2024-11-15 15:59:08 +01:00
ee7f64f5be Design update 2024-11-15 15:21:26 +01:00
6aacac2419 flatten the contributions blueprint 2024-11-14 14:36:18 +01:00
ce253f4a65 Make the header span over the complete width 2024-11-13 16:08:18 +01:00
7b604ce4f2 Remove manual-modal references 2024-11-11 14:51:17 +01:00
98b20e5cab Remove colors from social area 2024-11-11 13:38:47 +01:00
a322ffb2f1 Fix README 2024-11-11 12:05:03 +01:00
29365984a3 fix some namespace responses 2024-11-11 08:45:16 +01:00
bd0a9c60f8 strictly use socket.io class based namespaces 2024-11-07 12:12:42 +01:00
d41ebc6efe Fix project vscode settings 2024-11-07 10:51:35 +01:00
63690222ed Rename cqi extensions file 2024-11-07 10:44:27 +01:00
b4faa1c695 Code enhancements in vrt file normalizer module 2024-11-07 10:40:25 +01:00
909b130285 Fix wrong import 2024-11-07 09:48:40 +01:00
c223f07289 Codestyle enhancements 2024-11-07 08:57:32 +01:00
fcb49025e9 remove unused socketio event handlers 2024-11-07 08:51:49 +01:00
191d7813a7 prefix extension name with "nopaque_" 2024-11-07 08:35:02 +01:00
f255fef631 Remove debug print statement 2024-11-07 08:32:20 +01:00
76171f306d Remove debug print statements 2024-11-07 08:31:52 +01:00
5ea6d45f46 Reset all corpora on deploy cli command 2024-11-07 08:31:31 +01:00
289a551122 Create dedicated '/users' Socket.IO Namespace 2024-11-06 13:04:30 +01:00
2a28f19660 Move Socket.IO Namespaces to dedicated directory 2024-11-06 12:27:49 +01:00
fc2ace4b9e Remove unused Socket.IO AdminNamespace 2024-11-05 14:55:48 +01:00
a174bf968f Remove unused config entry 2024-11-05 14:02:45 +01:00
551b928dca Add typehints to email code 2024-11-05 09:05:31 +01:00
eeb5a280b3 move blueprints in dedicated folder 2024-09-30 13:30:13 +02:00
5fc3015bf1 rename functions to indicate that they should not be imported directly 2024-09-26 15:34:52 +02:00
5f05cedf5e Make the "daemon" (now tasks) more understandable 2024-09-26 15:33:32 +02:00
aabea234fe More simplification 2024-09-26 14:45:05 +02:00
492fdc9d28 modernize type hinting 2024-09-25 17:46:53 +02:00
02e6c7c16c various updates 2024-09-25 12:08:20 +02:00
c7ca674b2f Streamline setup process and init code 2024-09-25 10:45:53 +02:00
81c6f32a35 Simplify logging configuration 2024-08-01 16:29:06 +02:00
94548ac30c Move sheduler start logic 2024-08-01 12:10:33 +02:00
158190de1a Codesstyle enhancements 2024-08-01 12:00:52 +02:00
13e4d461c7 Update .env.tpl 2024-08-01 12:00:34 +02:00
e51dcafa6f Update vscode settings.json 2024-08-01 11:59:50 +02:00
f79c6d48b2 Going back to vanilla css 2024-07-01 15:37:34 +02:00
5ee9edef9f Fix multiple db event listener registrations 2024-06-03 11:08:21 +02:00
f1ccda6ad7 Fix colors in corpus analysis 2024-06-03 11:03:57 +02:00
221 changed files with 12476 additions and 2776 deletions

View File

@ -1,32 +1,20 @@
##############################################################################
# Variables for use in Docker Compose YAML files #
# Environment variables used by Docker Compose config files. #
##############################################################################
# HINT: Use this bash command `id -u`
# NOTE: 0 (= root user) is not allowed
HOST_UID=
# HINT: Use this bash command `id -g`
# NOTE: 0 (= root group) is not allowed
HOST_GID=
# HINT: Use this bash command `getent group docker | cut -d: -f3`
HOST_DOCKER_GID=
# DEFAULT: nopaque
# DOCKER_DEFAULT_NETWORK_NAME=
# DEFAULT: ./volumes/db/data
# NOTE: Use `.` as <project-basedir>
# DOCKER_DB_SERVICE_DATA_VOLUME_SOURCE_PATH=
# DEFAULT: ./volumes/mq/data
# NOTE: Use `.` as <project-basedir>
# DOCKER_MQ_SERVICE_DATA_VOLUME_SOURCE_PATH=
NOPAQUE_DOCKER_NETWORK_NAME=nopaque
# NOTE: This must be a network share and it must be available on all
# Docker Swarm nodes, mounted to the same path with the same
# user and group ownership.
DOCKER_NOPAQUE_SERVICE_DATA_VOLUME_SOURCE_PATH=
# DEFAULT: ./volumes/nopaque/logs
# NOTE: Use `.` as <project-basedir>
# DOCKER_NOPAQUE_SERVICE_LOGS_VOLUME_SOURCE_PATH=.
# Docker Swarm nodes, mounted to the same path.
HOST_NOPAQUE_DATA_PATH=/mnt/nopaque

2
.gitignore vendored
View File

@ -2,8 +2,6 @@
app/static/gen/
volumes/
docker-compose.override.yml
logs/
!logs/dummy
*.env
*.pjentsch-testing

17
.vscode/settings.json vendored
View File

@ -1,9 +1,17 @@
{
"editor.rulers": [79],
"files.insertFinalNewline": true,
"[css]": {
"editor.tabSize": 2
"editor.tabSize": 4,
"emmet.includeLanguages": {
"jinja-html": "html"
},
"files.associations": {
".flaskenv": "env",
"*.env.tpl": "env",
"*.txt.j2": "jinja"
},
"files.insertFinalNewline": true,
"files.trimFinalNewlines": true,
"files.trimTrailingWhitespace": true,
"[html]": {
"editor.tabSize": 2
},
@ -12,8 +20,5 @@
},
"[jinja-html]": {
"editor.tabSize": 2
},
"[scss]": {
"editor.tabSize": 2
}
}

View File

@ -46,7 +46,6 @@ COPY --chown=nopaque:nopaque app app
COPY --chown=nopaque:nopaque migrations migrations
COPY --chown=nopaque:nopaque tests tests
COPY --chown=nopaque:nopaque boot.sh config.py wsgi.py ./
RUN mkdir logs
EXPOSE 5000

View File

@ -35,7 +35,7 @@ username@hostname:~$ sudo mount --types cifs --options gid=${USER},password=nopa
# Clone the nopaque repository
username@hostname:~$ git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
# Create data directories
username@hostname:~$ mkdir data/{db,logs,mq}
username@hostname:~$ mkdir -p volumes/{db,mq}
username@hostname:~$ cp db.env.tpl db.env
username@hostname:~$ cp .env.tpl .env
# Fill out the variables within these files.

View File

@ -2,6 +2,7 @@ from apifairy import APIFairy
from config import Config
from docker import DockerClient
from flask import Flask
from flask.logging import default_handler
from flask_apscheduler import APScheduler
from flask_assets import Environment
from flask_login import LoginManager
@ -12,90 +13,142 @@ from flask_paranoid import Paranoid
from flask_socketio import SocketIO
from flask_sqlalchemy import SQLAlchemy
from flask_hashids import Hashids
from logging import Formatter, StreamHandler
from werkzeug.middleware.proxy_fix import ProxyFix
docker_client = DockerClient.from_env()
apifairy = APIFairy()
assets = Environment()
db = SQLAlchemy()
docker_client = DockerClient()
hashids = Hashids()
login = LoginManager()
login.login_view = 'auth.login'
login.login_message = 'Please log in to access this page.'
ma = Marshmallow()
mail = Mail()
migrate = Migrate(compare_type=True)
paranoid = Paranoid()
paranoid.redirect_view = '/'
scheduler = APScheduler()
socketio = SocketIO()
# TODO: Create export for lemmatized corpora
def create_app(config: Config = Config) -> Flask:
''' Creates an initialized Flask (WSGI Application) object. '''
''' Creates an initialized Flask object. '''
app = Flask(__name__)
app.config.from_object(config)
config.init_app(app)
# region Logging
log_formatter = Formatter(
fmt=app.config['NOPAQUE_LOG_FORMAT'],
datefmt=app.config['NOPAQUE_LOG_DATE_FORMAT']
)
log_handler = StreamHandler()
log_handler.setFormatter(log_formatter)
log_handler.setLevel(app.config['NOPAQUE_LOG_LEVEL'])
app.logger.setLevel('DEBUG')
app.logger.removeHandler(default_handler)
app.logger.addHandler(log_handler)
# endregion Logging
# region Middlewares
if app.config['NOPAQUE_PROXY_FIX_ENABLED']:
app.wsgi_app = ProxyFix(
app.wsgi_app,
x_for=app.config['NOPAQUE_PROXY_FIX_X_FOR'],
x_host=app.config['NOPAQUE_PROXY_FIX_X_HOST'],
x_port=app.config['NOPAQUE_PROXY_FIX_X_PORT'],
x_prefix=app.config['NOPAQUE_PROXY_FIX_X_PREFIX'],
x_proto=app.config['NOPAQUE_PROXY_FIX_X_PROTO']
)
# endregion Middlewares
# region Extensions
docker_client.login(
app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
registry=app.config['NOPAQUE_DOCKER_REGISTRY']
)
from .models import AnonymousUser, User
apifairy.init_app(app)
assets.init_app(app)
db.init_app(app)
hashids.init_app(app)
login.init_app(app)
login.anonymous_user = AnonymousUser
login.login_view = 'auth.login'
login.user_loader(lambda user_id: User.query.get(int(user_id)))
ma.init_app(app)
mail.init_app(app)
migrate.init_app(app, db)
paranoid.init_app(app)
paranoid.redirect_view = '/'
scheduler.init_app(app)
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI'])
# endregion Extensions
from .models.event_listeners import register_event_listeners
register_event_listeners()
from .admin import bp as admin_blueprint
# region Blueprints
from .blueprints.admin import bp as admin_blueprint
app.register_blueprint(admin_blueprint, url_prefix='/admin')
from .api import bp as api_blueprint
from .blueprints.api import bp as api_blueprint
app.register_blueprint(api_blueprint, url_prefix='/api')
from .auth import bp as auth_blueprint
from .blueprints.auth import bp as auth_blueprint
app.register_blueprint(auth_blueprint)
from .contributions import bp as contributions_blueprint
from .blueprints.contributions import bp as contributions_blueprint
app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
from .corpora import bp as corpora_blueprint
from .corpora.cqi_over_sio import CQiNamespace
from .blueprints.corpora import bp as corpora_blueprint
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
socketio.on_namespace(CQiNamespace('/cqi_over_sio'))
from .errors import bp as errors_bp
from .blueprints.errors import bp as errors_bp
app.register_blueprint(errors_bp)
from .jobs import bp as jobs_blueprint
from .blueprints.jobs import bp as jobs_blueprint
app.register_blueprint(jobs_blueprint, url_prefix='/jobs')
from .main import bp as main_blueprint
from .blueprints.main import bp as main_blueprint
app.register_blueprint(main_blueprint, cli_group=None)
from .services import bp as services_blueprint
from .blueprints.services import bp as services_blueprint
app.register_blueprint(services_blueprint, url_prefix='/services')
from .settings import bp as settings_blueprint
from .blueprints.settings import bp as settings_blueprint
app.register_blueprint(settings_blueprint, url_prefix='/settings')
from .users import bp as users_blueprint
from .blueprints.users import bp as users_blueprint
app.register_blueprint(users_blueprint, cli_group='user', url_prefix='/users')
from .workshops import bp as workshops_blueprint
from .blueprints.workshops import bp as workshops_blueprint
app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
# endregion Blueprints
# region SocketIO Namespaces
from .namespaces.cqi_over_sio import CQiOverSocketIONamespace
socketio.on_namespace(CQiOverSocketIONamespace('/cqi_over_sio'))
from .namespaces.users import UsersNamespace
socketio.on_namespace(UsersNamespace('/users'))
# endregion SocketIO Namespaces
# region Database event Listeners
from .models.event_listeners import register_event_listeners
register_event_listeners()
# endregion Database event Listeners
# region Add scheduler jobs
if app.config['NOPAQUE_IS_PRIMARY_INSTANCE']:
from .jobs import handle_corpora
scheduler.add_job('handle_corpora', handle_corpora, seconds=3, trigger='interval')
from .jobs import handle_jobs
scheduler.add_job('handle_jobs', handle_jobs, seconds=3, trigger='interval')
# endregion Add scheduler jobs
return app

View File

@ -1,49 +0,0 @@
from flask_login import current_user
from flask_socketio import disconnect, Namespace
from app import db, hashids
from app.decorators import socketio_admin_required
from app.models import User
class AdminNamespace(Namespace):
def on_connect(self):
# Check if the user is authenticated and is an administrator
if not (current_user.is_authenticated and current_user.is_administrator):
disconnect()
@socketio_admin_required
def on_set_user_confirmed(self, user_hashid: str, confirmed_value: bool):
# Decode the user hashid
user_id = hashids.decode(user_hashid)
# Validate user_id
if not isinstance(user_id, int):
return {
'code': 400,
'body': 'user_id is invalid'
}
# Validate confirmed_value
if not isinstance(confirmed_value, bool):
return {
'code': 400,
'body': 'confirmed_value is invalid'
}
# Load user from database
user = User.query.get(user_id)
if user is None:
return {
'code': 404,
'body': 'User not found'
}
# Update user confirmed status
user.confirmed = confirmed_value
db.session.commit()
return {
'code': 200,
'body': f'User "{user.username}" is now {"confirmed" if confirmed_value else "unconfirmed"}'
}

View File

@ -1,5 +0,0 @@
from flask import Blueprint
bp = Blueprint('auth', __name__)
from . import routes

View File

@ -1,7 +1,7 @@
from flask import abort, flash, redirect, render_template, url_for
from app import db, hashids
from app.models import Avatar, Corpus, Role, User
from app.users.settings.forms import (
from app.blueprints.users.settings.forms import (
UpdateAvatarForm,
UpdatePasswordForm,
UpdateNotificationsForm,

View File

@ -10,7 +10,7 @@ from app.models import (
User,
UserSettingJobStatusMailNotificationLevel
)
from app.services import SERVICES
from app.blueprints.services import SERVICES

View File

@ -0,0 +1,29 @@
from flask import Blueprint, redirect, request, url_for
from flask_login import current_user
from app import db
bp = Blueprint('auth', __name__)
@bp.before_app_request
def before_request():
if not current_user.is_authenticated:
return
current_user.ping()
db.session.commit()
if (
not current_user.confirmed
and request.endpoint
and request.blueprint != 'auth'
and request.endpoint != 'static'
):
return redirect(url_for('auth.unconfirmed'))
if not current_user.terms_of_use_accepted:
return redirect(url_for('main.terms_of_use'))
from . import routes

View File

@ -60,7 +60,11 @@ class RegistrationForm(FlaskForm):
def validate_username(self, field):
if User.query.filter_by(username=field.data).first():
raise ValidationError('Username already in use')
raise ValidationError('Username already registered')
def validate_terms_of_use_accepted(self, field):
if not field.data:
raise ValidationError('Terms of Use not accepted')
class LoginForm(FlaskForm):

View File

@ -12,26 +12,6 @@ from .forms import (
)
@bp.before_app_request
def before_request():
"""
Checks if a user is unconfirmed when visiting specific sites. Redirects to
unconfirmed view if user is unconfirmed.
"""
if not current_user.is_authenticated:
return
current_user.ping()
db.session.commit()
if (not current_user.confirmed
and request.endpoint
and request.blueprint != 'auth'
and request.endpoint != 'static'):
return redirect(url_for('auth.unconfirmed'))
if not current_user.terms_of_use_accepted:
return redirect(url_for('main.terms_of_use'))
@bp.route('/register', methods=['GET', 'POST'])
def register():
if current_user.is_authenticated:

View File

@ -0,0 +1,25 @@
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('contributions', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes
from .spacy_nlp_pipeline_models import bp as spacy_nlp_pipeline_models_bp
bp.register_blueprint(spacy_nlp_pipeline_models_bp, url_prefix='/spacy-nlp-pipeline-models')
from .tesseract_ocr_pipeline_models import bp as tesseract_ocr_pipeline_models_bp
bp.register_blueprint(tesseract_ocr_pipeline_models_bp, url_prefix='/tesseract-ocr-pipeline-models')

View File

@ -0,0 +1,7 @@
from flask import render_template
from . import bp
@bp.route('')
def index():
return render_template('contributions/index.html.j2', title='Contributions')

View File

@ -0,0 +1,18 @@
from flask import current_app, Blueprint
from flask_login import login_required
bp = Blueprint('spacy_nlp_pipeline_models', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes, json_routes

View File

@ -1,7 +1,7 @@
from flask_wtf.file import FileField, FileRequired
from wtforms import StringField, ValidationError
from wtforms.validators import InputRequired, Length
from app.services import SERVICES
from app.blueprints.services import SERVICES
from ..forms import ContributionBaseForm, UpdateContributionBaseForm

View File

@ -1,5 +1,5 @@
from flask import abort, current_app, request
from flask_login import current_user
from flask_login import current_user, login_required
from threading import Thread
from app import db
from app.decorators import content_negotiation, permission_required
@ -7,7 +7,8 @@ from app.models import SpaCyNLPPipelineModel
from . import bp
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
@login_required
@content_negotiation(produces='application/json')
def delete_spacy_model(spacy_nlp_pipeline_model_id):
def _delete_spacy_model(app, spacy_nlp_pipeline_model_id):
@ -15,7 +16,7 @@ def delete_spacy_model(spacy_nlp_pipeline_model_id):
snpm = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id)
snpm.delete()
db.session.commit()
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator):
abort(403)
@ -31,7 +32,7 @@ def delete_spacy_model(spacy_nlp_pipeline_model_id):
return response_data, 202
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>/is_public', methods=['PUT'])
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>/is_public', methods=['PUT'])
@permission_required('CONTRIBUTE')
@content_negotiation(consumes='application/json', produces='application/json')
def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):

View File

@ -1,5 +1,5 @@
from flask import abort, flash, redirect, render_template, url_for
from flask_login import current_user
from flask_login import current_user, login_required
from app import db
from app.models import SpaCyNLPPipelineModel
from . import bp
@ -9,16 +9,15 @@ from .forms import (
)
@bp.route('/spacy-nlp-pipeline-models')
def spacy_nlp_pipeline_models():
return render_template(
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_models.html.j2',
title='SpaCy NLP Pipeline Models'
)
@bp.route('/')
@login_required
def index():
return redirect(url_for('contributions.index', _anchor='spacy-nlp-pipeline-models'))
@bp.route('/spacy-nlp-pipeline-models/create', methods=['GET', 'POST'])
def create_spacy_nlp_pipeline_model():
@bp.route('/create', methods=['GET', 'POST'])
@login_required
def create():
form = CreateSpaCyNLPPipelineModelForm()
if form.is_submitted():
if not form.validate():
@ -42,7 +41,7 @@ def create_spacy_nlp_pipeline_model():
abort(500)
db.session.commit()
flash(f'SpaCy NLP Pipeline model "{snpm.title}" created')
return {}, 201, {'Location': url_for('.spacy_nlp_pipeline_models')}
return {}, 201, {'Location': url_for('.index')}
return render_template(
'contributions/spacy_nlp_pipeline_models/create.html.j2',
title='Create SpaCy NLP Pipeline Model',
@ -50,8 +49,9 @@ def create_spacy_nlp_pipeline_model():
)
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id):
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
@login_required
def entity(spacy_nlp_pipeline_model_id):
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator):
abort(403)
@ -61,9 +61,9 @@ def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id):
if db.session.is_modified(snpm):
flash(f'SpaCy NLP Pipeline model "{snpm.title}" updated')
db.session.commit()
return redirect(url_for('.spacy_nlp_pipeline_models'))
return redirect(url_for('.index'))
return render_template(
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_model.html.j2',
'contributions/spacy_nlp_pipeline_models/entity.html.j2',
title=f'{snpm.title} {snpm.version}',
form=form,
spacy_nlp_pipeline_model=snpm

View File

@ -2,7 +2,7 @@ from flask import Blueprint
from flask_login import login_required
bp = Blueprint('contributions', __name__)
bp = Blueprint('tesseract_ocr_pipeline_models', __name__)
@bp.before_request
@ -15,9 +15,4 @@ def before_request():
pass
from . import (
routes,
spacy_nlp_pipeline_models,
tesseract_ocr_pipeline_models,
transkribus_htr_pipeline_models
)
from . import json_routes, routes

View File

@ -1,6 +1,6 @@
from flask_wtf.file import FileField, FileRequired
from wtforms import ValidationError
from app.services import SERVICES
from app.blueprints.services import SERVICES
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
@ -9,7 +9,7 @@ class CreateTesseractOCRPipelineModelForm(ContributionBaseForm):
'File',
validators=[FileRequired()]
)
def validate_tesseract_model_file(self, field):
if not field.data.filename.lower().endswith('.traineddata'):
raise ValidationError('traineddata files only!')

View File

@ -7,7 +7,7 @@ from app.models import TesseractOCRPipelineModel
from . import bp
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
@content_negotiation(produces='application/json')
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
def _delete_tesseract_ocr_pipeline_model(app, tesseract_ocr_pipeline_model_id):
@ -31,7 +31,7 @@ def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
return response_data, 202
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>/is_public', methods=['PUT'])
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>/is_public', methods=['PUT'])
@permission_required('CONTRIBUTE')
@content_negotiation(consumes='application/json', produces='application/json')
def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_id):

View File

@ -9,16 +9,13 @@ from .forms import (
)
@bp.route('/tesseract-ocr-pipeline-models')
def tesseract_ocr_pipeline_models():
return render_template(
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_models.html.j2',
title='Tesseract OCR Pipeline Models'
)
@bp.route('/')
def index():
return redirect(url_for('contributions.index', _anchor='tesseract-ocr-pipeline-models'))
@bp.route('/tesseract-ocr-pipeline-models/create', methods=['GET', 'POST'])
def create_tesseract_ocr_pipeline_model():
@bp.route('/create', methods=['GET', 'POST'])
def create():
form = CreateTesseractOCRPipelineModelForm()
if form.is_submitted():
if not form.validate():
@ -41,7 +38,7 @@ def create_tesseract_ocr_pipeline_model():
abort(500)
db.session.commit()
flash(f'Tesseract OCR Pipeline model "{topm.title}" created')
return {}, 201, {'Location': url_for('.tesseract_ocr_pipeline_models')}
return {}, 201, {'Location': url_for('.index')}
return render_template(
'contributions/tesseract_ocr_pipeline_models/create.html.j2',
title='Create Tesseract OCR Pipeline Model',
@ -49,8 +46,8 @@ def create_tesseract_ocr_pipeline_model():
)
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
def entity(tesseract_ocr_pipeline_model_id):
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (topm.user == current_user or current_user.is_administrator):
abort(403)
@ -60,9 +57,9 @@ def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
if db.session.is_modified(topm):
flash(f'Tesseract OCR Pipeline model "{topm.title}" updated')
db.session.commit()
return redirect(url_for('.tesseract_ocr_pipeline_models'))
return redirect(url_for('.index'))
return render_template(
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_model.html.j2',
'contributions/tesseract_ocr_pipeline_models/entity.html.j2',
title=f'{topm.title} {topm.version}',
form=form,
tesseract_ocr_pipeline_model=topm

View File

@ -0,0 +1,18 @@
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('jobs', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes, json_routes

View File

@ -11,7 +11,7 @@ from . import bp
@bp.route('')
def corpora():
def jobs():
return redirect(url_for('main.dashboard', _anchor='jobs'))

View File

@ -1,8 +1,9 @@
from flask import current_app
from flask_migrate import upgrade
from pathlib import Path
from typing import List
from app import db
from app.models import (
Corpus,
CorpusFollowerRole,
Role,
SpaCyNLPPipelineModel,
@ -15,10 +16,10 @@ from . import bp
@bp.cli.command('deploy')
def deploy():
''' Run deployment tasks. '''
# Make default directories
print('Make default directories')
base_dir = current_app.config['NOPAQUE_DATA_DIR']
default_dirs: List[Path] = [
default_dirs: list[Path] = [
base_dir / 'tmp',
base_dir / 'users'
]
@ -28,11 +29,9 @@ def deploy():
if not default_dir.is_dir():
raise NotADirectoryError(f'{default_dir} is not a directory')
# migrate database to latest revision
print('Migrate database to latest revision')
upgrade()
# Insert/Update default database values
print('Insert/Update default Roles')
Role.insert_defaults()
print('Insert/Update default Users')
@ -44,4 +43,9 @@ def deploy():
print('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRPipelineModel.insert_defaults()
print('Stop running analysis sessions')
for corpus in Corpus.query.all():
corpus.num_analysis_sessions = 0
db.session.commit()
# TODO: Implement checks for if the nopaque network exists

View File

@ -1,6 +1,6 @@
from flask import flash, redirect, render_template, url_for
from flask_login import current_user, login_required, login_user
from app.auth.forms import LoginForm
from app.blueprints.auth.forms import LoginForm
from app.models import Corpus, User
from . import bp
@ -72,16 +72,14 @@ def terms_of_use():
)
@bp.route('/social-area')
@bp.route('/social')
@login_required
def social_area():
print('test')
def social():
corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
print(corpora)
users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
return render_template(
'main/social_area.html.j2',
title='Social Area',
'main/social.html.j2',
title='Social',
corpora=corpora,
users=users
)

View File

@ -61,7 +61,7 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
if field.data:
if not('methods' in service_info and 'binarization' in service_info['methods']):
raise ValidationError('Binarization is not available')
def validate_pdf(self, field):
if field.data.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
@ -146,7 +146,7 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
txt = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[InputRequired()])
def validate_encoding_detection(self, field):
service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
if field.data:
@ -167,7 +167,6 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
print(service_info)
if self.encoding_detection.render_kw is None:
self.encoding_detection.render_kw = {}
self.encoding_detection.render_kw['disabled'] = True

View File

@ -1,6 +1,6 @@
from flask import g, url_for
from flask_login import current_user
from app.users.settings.routes import settings as settings_route
from app.blueprints.users.settings.routes import settings as settings_route
from . import bp

View File

@ -15,4 +15,4 @@ def before_request():
pass
from . import cli, events, json_routes, routes, settings
from . import cli, json_routes, routes, settings

View File

@ -1,7 +0,0 @@
from flask import redirect, url_for
from . import bp
@bp.route('')
def contributions():
return redirect(url_for('main.dashboard', _anchor='contributions'))

View File

@ -1,2 +0,0 @@
from .. import bp
from . import routes

View File

@ -1,7 +0,0 @@
from flask import abort
from . import bp
@bp.route('/transkribus_htr_pipeline_models')
def transkribus_htr_pipeline_models():
return abort(503)

View File

@ -1,7 +1,6 @@
from datetime import datetime
from flask import current_app
from pathlib import Path
from typing import Dict, List
import json
import shutil
from app import db
@ -15,7 +14,7 @@ class SandpaperConverter:
def run(self):
with self.json_db_file.open('r') as f:
json_db: List[Dict] = json.load(f)
json_db: list[dict] = json.load(f)
for json_user in json_db:
if not json_user['confirmed']:
@ -26,7 +25,7 @@ class SandpaperConverter:
db.session.commit()
def convert_user(self, json_user: Dict, user_dir: Path):
def convert_user(self, json_user: dict, user_dir: Path):
current_app.logger.info(f'Create User {json_user["username"]}...')
try:
user = User.create(
@ -48,7 +47,7 @@ class SandpaperConverter:
current_app.logger.info('Done')
def convert_corpus(self, json_corpus: Dict, user: User, corpus_dir: Path):
def convert_corpus(self, json_corpus: dict, user: User, corpus_dir: Path):
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
try:
corpus = Corpus.create(
@ -64,7 +63,7 @@ class SandpaperConverter:
current_app.logger.info('Done')
def convert_corpus_file(self, json_corpus_file: Dict, corpus: Corpus, corpus_dir: Path):
def convert_corpus_file(self, json_corpus_file: dict, corpus: Corpus, corpus_dir: Path):
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
corpus_file = CorpusFile(
corpus=corpus,

View File

@ -1,69 +1,25 @@
from flask import current_app
from pathlib import Path
def normalize_vrt_file(input_file, output_file):
def check_pos_attribute_order(vrt_lines):
# The following orders are possible:
# since 26.02.2019: 'word,lemma,simple_pos,pos,ner'
# since 26.03.2021: 'word,pos,lemma,simple_pos,ner'
# since 27.01.2022: 'word,pos,lemma,simple_pos'
# This Function tries to find out which order we have by looking at the
# number of attributes and the position of the simple_pos attribute
SIMPLE_POS_LABELS = [
'ADJ', 'ADP', 'ADV', 'AUX', 'CONJ',
'DET', 'INTJ', 'NOUN', 'NUM', 'PART',
'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',
'VERB', 'X'
]
for line in vrt_lines:
if line.startswith('<'):
continue
pos_attrs = line.rstrip('\n').split('\t')
num_pos_attrs = len(pos_attrs)
if num_pos_attrs == 4:
if pos_attrs[3] in SIMPLE_POS_LABELS:
return ['word', 'pos', 'lemma', 'simple_pos']
continue
elif num_pos_attrs == 5:
if pos_attrs[2] in SIMPLE_POS_LABELS:
return ['word', 'lemma', 'simple_pos', 'pos', 'ner']
elif pos_attrs[3] in SIMPLE_POS_LABELS:
return ['word', 'pos', 'lemma', 'simple_pos', 'ner']
continue
return None
def check_has_ent_as_s_attr(vrt_lines):
for line in vrt_lines:
if line.startswith('<ent'):
return True
return False
def pos_attrs_to_string_1(pos_attrs):
return f'{pos_attrs[0]}\t{pos_attrs[3]}\t{pos_attrs[1]}\t{pos_attrs[2]}\n'
def pos_attrs_to_string_2(pos_attrs):
return f'{pos_attrs[0]}\t{pos_attrs[1]}\t{pos_attrs[2]}\t{pos_attrs[3]}\n'
def normalize_vrt_file(input_file: Path, output_file: Path):
current_app.logger.info(f'Converting {input_file}...')
with open(input_file) as f:
with input_file.open() as f:
input_vrt_lines = f.readlines()
pos_attr_order = check_pos_attribute_order(input_vrt_lines)
has_ent_as_s_attr = check_has_ent_as_s_attr(input_vrt_lines)
pos_attr_order = _check_pos_attribute_order(input_vrt_lines)
has_ent_as_s_attr = _check_has_ent_as_s_attr(input_vrt_lines)
current_app.logger.info(f'Detected pos_attr_order: [{",".join(pos_attr_order)}]')
current_app.logger.info(f'Detected has_ent_as_s_attr: {has_ent_as_s_attr}')
if pos_attr_order == ['word', 'lemma', 'simple_pos', 'pos', 'ner']:
pos_attrs_to_string_function = pos_attrs_to_string_1
pos_attrs_to_string_function = _pos_attrs_to_string_1
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos', 'ner']:
pos_attrs_to_string_function = pos_attrs_to_string_2
pos_attrs_to_string_function = _pos_attrs_to_string_2
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos']:
pos_attrs_to_string_function = pos_attrs_to_string_2
pos_attrs_to_string_function = _pos_attrs_to_string_2
else:
raise Exception('Can not handle format')
@ -113,5 +69,49 @@ def normalize_vrt_file(input_file, output_file):
current_ent = pos_attrs[4]
output_vrt += pos_attrs_to_string_function(pos_attrs)
with open(output_file, 'w') as f:
with output_file.open(mode='w') as f:
f.write(output_vrt)
def _check_pos_attribute_order(vrt_lines: list[str]) -> list[str]:
# The following orders are possible:
# since 26.02.2019: 'word,lemma,simple_pos,pos,ner'
# since 26.03.2021: 'word,pos,lemma,simple_pos,ner'
# since 27.01.2022: 'word,pos,lemma,simple_pos'
# This Function tries to find out which order we have by looking at the
# number of attributes and the position of the simple_pos attribute
SIMPLE_POS_LABELS = [
'ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM',
'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X'
]
for line in vrt_lines:
if line.startswith('<'):
continue
pos_attrs = line.rstrip('\n').split('\t')
num_pos_attrs = len(pos_attrs)
if num_pos_attrs == 4:
if pos_attrs[3] in SIMPLE_POS_LABELS:
return ['word', 'pos', 'lemma', 'simple_pos']
continue
elif num_pos_attrs == 5:
if pos_attrs[2] in SIMPLE_POS_LABELS:
return ['word', 'lemma', 'simple_pos', 'pos', 'ner']
elif pos_attrs[3] in SIMPLE_POS_LABELS:
return ['word', 'pos', 'lemma', 'simple_pos', 'ner']
continue
# TODO: raise exception "can't determine attribute order"
def _check_has_ent_as_s_attr(vrt_lines: list[str]) -> bool:
for line in vrt_lines:
if line.startswith('<ent'):
return True
return False
def _pos_attrs_to_string_1(pos_attrs: list[str]) -> str:
return f'{pos_attrs[0]}\t{pos_attrs[3]}\t{pos_attrs[1]}\t{pos_attrs[2]}\n'
def _pos_attrs_to_string_2(pos_attrs: list[str]) -> str:
return f'{pos_attrs[0]}\t{pos_attrs[1]}\t{pos_attrs[2]}\t{pos_attrs[3]}\n'

View File

@ -1,131 +0,0 @@
from cqi.models.corpora import Corpus as CQiCorpus
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
from typing import Dict, List
def lookups_by_cpos(corpus: CQiCorpus, cpos_list: List[int]) -> Dict:
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
cpos_attr_values: List[str] = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
for attr in corpus.structural_attributes.list():
# We only want to iterate over non subattributes, identifiable by
# attr.has_values == False
if attr.has_values:
continue
cpos_attr_ids: List[int] = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1:
continue
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
if len(occured_attr_ids) == 0:
continue
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
if len(subattrs) == 0:
continue
lookup_name: str = f'{attr.name}_lookup'
lookups[lookup_name] = {}
for attr_id in occured_attr_ids:
lookups[lookup_name][attr_id] = {}
for subattr in subattrs:
subattr_name = subattr.name[(len(attr.name) + 1):] # noqa
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
return lookups
def partial_export_subcorpus(
subcorpus: CQiSubcorpus,
match_id_list: List[int],
context: int = 25
) -> Dict:
if subcorpus.size == 0:
return {"matches": []}
match_boundaries = []
for match_id in match_id_list:
if match_id < 0 or match_id >= subcorpus.size:
continue
match_boundaries.append(
(
match_id,
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
)
)
cpos_set = set()
matches = []
for match_boundary in match_boundaries:
match_num, match_start, match_end = match_boundary
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}
def export_subcorpus(
subcorpus: CQiSubcorpus,
context: int = 25,
cutoff: float = float('inf'),
offset: int = 0
) -> Dict:
if subcorpus.size == 0:
return {"matches": []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
match_boundaries = zip(
range(first_match, last_match + 1),
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
for match_num, match_start, match_end in match_boundaries:
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}

View File

@ -1,2 +0,0 @@
from .. import bp
from . import json_routes, routes

View File

@ -1,11 +0,0 @@
from app import db
from flask import Flask
from .corpus_utils import check_corpora
from .job_utils import check_jobs
def daemon(app: Flask):
with app.app_context():
check_corpora()
check_jobs()
db.session.commit()

View File

@ -1,7 +1,7 @@
from flask import abort, request
from flask_login import current_user
from functools import wraps
from typing import List, Union
from typing import Optional
from werkzeug.exceptions import NotAcceptable
from app.models import Permission
@ -26,7 +26,7 @@ def socketio_login_required(f):
def wrapper(*args, **kwargs):
if current_user.is_authenticated:
return f(*args, **kwargs)
return {'code': 401, 'body': 'Unauthorized'}
return {'status': 401, 'statusText': 'Unauthorized'}
return wrapper
@ -35,7 +35,7 @@ def socketio_permission_required(permission):
@wraps(f)
def wrapper(*args, **kwargs):
if not current_user.can(permission):
return {'code': 403, 'body': 'Forbidden'}
return {'status': 403, 'statusText': 'Forbidden'}
return f(*args, **kwargs)
return wrapper
return decorator
@ -46,8 +46,8 @@ def socketio_admin_required(f):
def content_negotiation(
produces: Union[str, List[str], None] = None,
consumes: Union[str, List[str], None] = None
produces: Optional[str | list[str]] = None,
consumes: Optional[str | list[str]] = None
):
def decorator(f):
@wraps(f)

View File

@ -1,25 +1,32 @@
from flask import current_app, render_template
from flask import current_app, Flask, render_template
from flask_mail import Message
from threading import Thread
from app import mail
def create_message(recipient, subject, template, **kwargs):
subject_prefix: str = current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX']
msg: Message = Message(
body=render_template(f'{template}.txt.j2', **kwargs),
html=render_template(f'{template}.html.j2', **kwargs),
def create_message(
recipient: str,
subject: str,
template: str,
**context
) -> Message:
message = Message(
body=render_template(f'{template}.txt.j2', **context),
html=render_template(f'{template}.html.j2', **context),
recipients=[recipient],
subject=f'{subject_prefix} {subject}'
subject=f'[nopaque] {subject}'
)
return msg
return message
def send(msg, *args, **kwargs):
def _send(app, msg):
def send(message: Message) -> Thread:
def _send(app: Flask, message: Message):
with app.app_context():
mail.send(msg)
mail.send(message)
thread = Thread(target=_send, args=[current_app._get_current_object(), msg])
thread = Thread(
target=_send,
args=[current_app._get_current_object(), message]
)
thread.start()
return thread

View File

@ -1,18 +1,2 @@
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('jobs', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes, json_routes
from .handle_corpora import handle_corpora
from .handle_jobs import handle_jobs

View File

@ -1,138 +0,0 @@
from flask import current_app
from flask_login import current_user
from flask_socketio import Namespace
from app import db, hashids, socketio
from app.extensions.flask_socketio import admin_required, login_required
from app.models import Job, JobStatus
class JobsNamespace(Namespace):
@login_required
def on_delete(self, job_hashid: str):
# Decode the job hashid
job_id = hashids.decode(job_hashid)
# Validate job_id
if not isinstance(job_id, int):
return {
'code': 400,
'body': 'job_id is invalid'
}
# Load job from database
job = Job.query.get(job_id)
if job is None:
return {
'code': 404,
'body': 'Job not found'
}
# Check if the current user is allowed to delete the job
if not (job.user == current_user or current_user.is_administrator):
return {
'code': 403,
'body': 'Forbidden'
}
# TODO: This should be a method in the Job model
def _delete_job(app, job_id):
with app.app_context():
job = Job.query.get(job_id)
job.delete()
db.session.commit()
# Delete the job in a background task
socketio.start_background_task(
target=_delete_job,
app=current_app._get_current_object(),
job_id=job_id
)
return {
'code': 202,
'body': f'Job "{job.title}" marked for deletion'
}
@admin_required
def on_get_log(self, job_hashid: str):
# Decode the job hashid
job_id = hashids.decode(job_hashid)
# Validate job_id
if not isinstance(job_id, int):
return {
'code': 400,
'body': 'job_id is invalid'
}
# Load job from database
job = Job.query.get(job_id)
if job is None:
return {
'code': 404,
'body': 'Job not found'
}
# Check if the job is already processed
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return {
'code': 409,
'body': 'Job is not done processing'
}
# Read the log file
with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
job_log = log_file.read()
return {
'code': 200,
'body': job_log
}
@login_required
def on_restart(self, job_hashid: str):
# Decode the job hashid
job_id = hashids.decode(job_hashid)
# Validate job_id
if not isinstance(job_id, int):
return {
'code': 400,
'body': 'job_id is invalid'
}
# Load job from database
job = Job.query.get(job_id)
if job is None:
return {
'code': 404,
'body': 'Job not found'
}
# Check if the current user is allowed to restart the job
if not (job.user == current_user or current_user.is_administrator):
return {
'code': 403,
'body': 'Forbidden'
}
# TODO: This should be a method in the Job model
def _restart_job(app, job_id):
with app.app_context():
job = Job.query.get(job_id)
job.restart()
db.session.commit()
# Restart the job in a background task
socketio.start_background_task(
target=_restart_job,
app=current_app._get_current_object(),
job_id=job_id
)
return {
'code': 202,
'body': f'Job "{job.title}" restarted'
}

View File

@ -1,12 +1,16 @@
from app import docker_client
from app.models import Corpus, CorpusStatus
from flask import current_app
import docker
import os
import shutil
from app import db, docker_client, scheduler
from app.models import Corpus, CorpusStatus
def check_corpora():
def handle_corpora():
with scheduler.app.app_context():
_handle_corpora()
def _handle_corpora():
corpora = Corpus.query.all()
for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
_create_build_corpus_service(corpus)
@ -17,13 +21,14 @@ def check_corpora():
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
_checkout_analysing_corpus_container(corpus)
_checkout_cqpserver_container(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
_create_cqpserver_container(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
_remove_cqpserver_container(corpus)
db.session.commit()
def _create_build_corpus_service(corpus):
def _create_build_corpus_service(corpus: Corpus):
''' # Docker service settings # '''
''' ## Command ## '''
command = ['bash', '-c']
@ -45,12 +50,10 @@ def _create_build_corpus_service(corpus):
''' ## Constraints ## '''
constraints = ['node.role==worker']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1887'
''' ## Labels ## '''
labels = {
'origin': current_app.config['SERVER_NAME'],
'type': 'corpus.build',
'corpus_id': str(corpus.id)
'nopaque.server_name': current_app.config['SERVER_NAME']
}
''' ## Mounts ## '''
mounts = []
@ -95,7 +98,7 @@ def _create_build_corpus_service(corpus):
return
corpus.status = CorpusStatus.QUEUED
def _checkout_build_corpus_service(corpus):
def _checkout_build_corpus_service(corpus: Corpus):
service_name = f'build-corpus_{corpus.id}'
try:
service = docker_client.services.get(service_name)
@ -123,8 +126,7 @@ def _checkout_build_corpus_service(corpus):
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
def _create_cqpserver_container(corpus):
''' # Docker container settings # '''
def _create_cqpserver_container(corpus: Corpus):
''' ## Command ## '''
command = []
command.append(
@ -139,9 +141,9 @@ def _create_cqpserver_container(corpus):
''' ## Entrypoint ## '''
entrypoint = ['bash', '-c']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1887'
''' ## Name ## '''
name = f'cqpserver_{corpus.id}'
name = f'nopaque-cqpserver-{corpus.id}'
''' ## Network ## '''
network = f'{current_app.config["NOPAQUE_DOCKER_NETWORK_NAME"]}'
''' ## Volumes ## '''
@ -198,8 +200,8 @@ def _create_cqpserver_container(corpus):
return
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
def _checkout_analysing_corpus_container(corpus):
container_name = f'cqpserver_{corpus.id}'
def _checkout_cqpserver_container(corpus: Corpus):
container_name = f'nopaque-cqpserver-{corpus.id}'
try:
docker_client.containers.get(container_name)
except docker.errors.NotFound as e:
@ -209,8 +211,8 @@ def _checkout_analysing_corpus_container(corpus):
except docker.errors.DockerException as e:
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
def _remove_cqpserver_container(corpus):
container_name = f'cqpserver_{corpus.id}'
def _remove_cqpserver_container(corpus: Corpus):
container_name = f'nopaque-cqpserver-{corpus.id}'
try:
container = docker_client.containers.get(container_name)
except docker.errors.NotFound:

View File

@ -1,11 +1,3 @@
from app import db, docker_client, hashids
from app.models import (
Job,
JobResult,
JobStatus,
TesseractOCRPipelineModel,
SpaCyNLPPipelineModel
)
from datetime import datetime
from flask import current_app
from werkzeug.utils import secure_filename
@ -13,9 +5,21 @@ import docker
import json
import os
import shutil
from app import db, docker_client, hashids, scheduler
from app.models import (
Job,
JobResult,
JobStatus,
TesseractOCRPipelineModel,
SpaCyNLPPipelineModel
)
def check_jobs():
def handle_jobs():
with scheduler.app.app_context():
_handle_jobs()
def _handle_jobs():
jobs = Job.query.all()
for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
_create_job_service(job)
@ -23,8 +27,9 @@ def check_jobs():
_checkout_job_service(job)
for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
_remove_job_service(job)
db.session.commit()
def _create_job_service(job):
def _create_job_service(job: Job):
''' # Docker service settings # '''
''' ## Service specific settings ## '''
if job.service == 'file-setup-pipeline':
@ -81,9 +86,7 @@ def _create_job_service(job):
constraints = ['node.role==worker']
''' ## Labels ## '''
labels = {
'origin': current_app.config['SERVER_NAME'],
'type': 'job',
'job_id': str(job.id)
'origin': current_app.config['SERVER_NAME']
}
''' ## Mounts ## '''
mounts = []
@ -164,7 +167,7 @@ def _create_job_service(job):
return
job.status = JobStatus.QUEUED
def _checkout_job_service(job):
def _checkout_job_service(job: Job):
service_name = f'job_{job.id}'
try:
service = docker_client.services.get(service_name)
@ -213,7 +216,7 @@ def _checkout_job_service(job):
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
def _remove_job_service(job):
def _remove_job_service(job: Job):
service_name = f'job_{job.id}'
try:
service = docker_client.services.get(service_name)

View File

@ -1,6 +1,3 @@
from enum import Enum
from app import db, login, mail, socketio
from app.email import create_message
from .anonymous_user import *
from .avatar import *
from .corpus_file import *
@ -15,136 +12,3 @@ from .spacy_nlp_pipeline_model import *
from .tesseract_ocr_pipeline_model import *
from .token import *
from .user import *
@db.event.listens_for(Corpus, 'after_delete')
@db.event.listens_for(CorpusFile, 'after_delete')
@db.event.listens_for(Job, 'after_delete')
@db.event.listens_for(JobInput, 'after_delete')
@db.event.listens_for(JobResult, 'after_delete')
@db.event.listens_for(SpaCyNLPPipelineModel, 'after_delete')
@db.event.listens_for(TesseractOCRPipelineModel, 'after_delete')
def resource_after_delete(mapper, connection, resource):
print('[START] resource_after_delete')
jsonpatch = [
{
'op': 'remove',
'path': resource.jsonpatch_path
}
]
room = f'/users/{resource.user_hashid}'
print('[EMIT] PATCH', jsonpatch)
socketio.emit('PATCH', jsonpatch, room=room)
print('[END] resource_after_delete')
@db.event.listens_for(CorpusFollowerAssociation, 'after_delete')
def cfa_after_delete_handler(mapper, connection, cfa):
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
jsonpatch = [
{
'op': 'remove',
'path': jsonpatch_path
}
]
room = f'/users/{cfa.corpus.user.hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
@db.event.listens_for(Corpus, 'after_insert')
@db.event.listens_for(CorpusFile, 'after_insert')
@db.event.listens_for(Job, 'after_insert')
@db.event.listens_for(JobInput, 'after_insert')
@db.event.listens_for(JobResult, 'after_insert')
@db.event.listens_for(SpaCyNLPPipelineModel, 'after_insert')
@db.event.listens_for(TesseractOCRPipelineModel, 'after_insert')
def resource_after_insert_handler(mapper, connection, resource):
jsonpatch_value = resource.to_json_serializeable()
for attr in mapper.relationships:
jsonpatch_value[attr.key] = {}
jsonpatch = [
{
'op': 'add',
'path': resource.jsonpatch_path,
'value': jsonpatch_value
}
]
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
@db.event.listens_for(CorpusFollowerAssociation, 'after_insert')
def cfa_after_insert_handler(mapper, connection, cfa):
jsonpatch_value = cfa.to_json_serializeable()
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
jsonpatch = [
{
'op': 'add',
'path': jsonpatch_path,
'value': jsonpatch_value
}
]
room = f'/users/{cfa.corpus.user.hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
@db.event.listens_for(Corpus, 'after_update')
@db.event.listens_for(CorpusFile, 'after_update')
@db.event.listens_for(Job, 'after_update')
@db.event.listens_for(JobInput, 'after_update')
@db.event.listens_for(JobResult, 'after_update')
@db.event.listens_for(SpaCyNLPPipelineModel, 'after_update')
@db.event.listens_for(TesseractOCRPipelineModel, 'after_update')
def resource_after_update_handler(mapper, connection, resource):
jsonpatch = []
for attr in db.inspect(resource).attrs:
if attr.key in mapper.relationships:
continue
if not attr.load_history().has_changes():
continue
jsonpatch_path = f'{resource.jsonpatch_path}/{attr.key}'
if isinstance(attr.value, datetime):
jsonpatch_value = f'{attr.value.isoformat()}Z'
elif isinstance(attr.value, Enum):
jsonpatch_value = attr.value.name
else:
jsonpatch_value = attr.value
jsonpatch.append(
{
'op': 'replace',
'path': jsonpatch_path,
'value': jsonpatch_value
}
)
if jsonpatch:
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
@db.event.listens_for(Job, 'after_update')
def job_after_update_handler(mapper, connection, job):
for attr in db.inspect(job).attrs:
if attr.key != 'status':
continue
if not attr.load_history().has_changes():
return
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE:
return
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END:
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return
msg = create_message(
job.user.email,
f'Status update for your Job "{job.title}"',
'tasks/email/notification',
job=job
)
mail.send(msg)
login.anonymous_user = AnonymousUser
@login.user_loader
def load_user(user_id):
return User.query.get(int(user_id))

View File

@ -3,13 +3,12 @@ from enum import IntEnum
from flask import current_app, url_for
from flask_hashids import HashidMixin
from sqlalchemy.ext.associationproxy import association_proxy
from typing import Union
from pathlib import Path
import shutil
import xml.etree.ElementTree as ET
from app import db
from app.converters.vrt import normalize_vrt_file
from app.extensions.sqlalchemy_extras import IntEnumColumn
from app.extensions.nopaque_sqlalchemy_extras import IntEnumColumn
from .corpus_follower_association import CorpusFollowerAssociation
@ -25,7 +24,7 @@ class CorpusStatus(IntEnum):
CANCELING_ANALYSIS_SESSION = 9
@staticmethod
def get(corpus_status: Union['CorpusStatus', int, str]) -> 'CorpusStatus':
def get(corpus_status: 'CorpusStatus | int | str') -> 'CorpusStatus':
if isinstance(corpus_status, CorpusStatus):
return corpus_status
if isinstance(corpus_status, int):

View File

@ -1,6 +1,5 @@
from flask_hashids import HashidMixin
from enum import IntEnum
from typing import Union
from app import db
@ -11,7 +10,7 @@ class CorpusFollowerPermission(IntEnum):
MANAGE_CORPUS = 8
@staticmethod
def get(corpus_follower_permission: Union['CorpusFollowerPermission', int, str]) -> 'CorpusFollowerPermission':
def get(corpus_follower_permission: 'CorpusFollowerPermission | int | str') -> 'CorpusFollowerPermission':
if isinstance(corpus_follower_permission, CorpusFollowerPermission):
return corpus_follower_permission
if isinstance(corpus_follower_permission, int):
@ -38,16 +37,16 @@ class CorpusFollowerRole(HashidMixin, db.Model):
def __repr__(self):
return f'<CorpusFollowerRole {self.name}>'
def has_permission(self, permission: Union[CorpusFollowerPermission, int, str]):
def has_permission(self, permission: CorpusFollowerPermission | int | str):
perm = CorpusFollowerPermission.get(permission)
return self.permissions & perm.value == perm.value
def add_permission(self, permission: Union[CorpusFollowerPermission, int, str]):
def add_permission(self, permission: CorpusFollowerPermission | int | str):
perm = CorpusFollowerPermission.get(permission)
if not self.has_permission(perm):
self.permissions += perm.value
def remove_permission(self, permission: Union[CorpusFollowerPermission, int, str]):
def remove_permission(self, permission: CorpusFollowerPermission | int | str):
perm = CorpusFollowerPermission.get(permission)
if self.has_permission(perm):
self.permissions -= perm.value

View File

@ -10,6 +10,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Amharic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
@ -22,6 +23,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Arabic'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
@ -34,6 +36,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Assamese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
@ -46,6 +49,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Azerbaijani'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
@ -58,6 +62,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Azerbaijani - Cyrillic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
@ -70,6 +75,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Belarusian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
@ -82,6 +88,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Bengali'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
@ -94,6 +101,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Tibetan'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
@ -106,6 +114,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Bosnian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
@ -118,6 +127,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Bulgarian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
@ -130,6 +140,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Catalan; Valencian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
@ -142,6 +153,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Cebuano'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
@ -154,6 +166,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Czech'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
@ -166,6 +179,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Chinese - Simplified'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
@ -178,6 +192,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Chinese - Traditional'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
@ -190,6 +205,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Cherokee'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
@ -202,6 +218,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Welsh'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
@ -214,6 +231,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Danish'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
@ -226,6 +244,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'German'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
@ -238,6 +257,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Dzongkha'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
@ -250,6 +270,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Greek, Modern (1453-)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
@ -262,6 +283,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'English'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
@ -274,6 +296,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'English, Middle (1100-1500)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
@ -286,6 +309,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Esperanto'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
@ -298,6 +322,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Estonian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
@ -310,6 +335,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Basque'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
@ -322,6 +348,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Persian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
@ -334,6 +361,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Finnish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
@ -346,6 +374,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'French'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
@ -358,6 +387,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'German Fraktur'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
@ -370,6 +400,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'French, Middle (ca. 1400-1600)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
@ -382,6 +413,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Irish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
@ -394,6 +426,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Galician'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
@ -406,6 +439,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Greek, Ancient (-1453)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
@ -418,6 +452,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Gujarati'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
@ -430,6 +465,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Haitian; Haitian Creole'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
@ -442,6 +478,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Hebrew'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
@ -454,6 +491,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Hindi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
@ -466,6 +504,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Croatian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
@ -478,6 +517,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Hungarian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
@ -490,6 +530,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Inuktitut'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
@ -502,6 +543,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Indonesian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
@ -514,6 +556,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Icelandic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
@ -526,6 +569,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Italian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
@ -538,6 +582,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'Italian - Old'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
@ -550,6 +595,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Javanese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
@ -562,6 +608,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Japanese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
@ -574,6 +621,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Kannada'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
@ -586,6 +634,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Georgian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
@ -598,6 +647,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Georgian - Old'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
@ -610,6 +660,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Kazakh'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
@ -622,6 +673,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Central Khmer'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
@ -634,6 +686,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Kirghiz; Kyrgyz'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
@ -646,6 +699,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Korean'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
@ -658,6 +712,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Kurdish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
@ -670,6 +725,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Lao'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
@ -682,6 +738,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Latin'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
@ -694,6 +751,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Latvian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
@ -706,6 +764,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Lithuanian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
@ -718,6 +777,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Malayalam'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
@ -730,6 +790,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Marathi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
@ -742,6 +803,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Macedonian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
@ -754,6 +816,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Maltese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
@ -766,6 +829,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Malay'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
@ -778,6 +842,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Burmese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
@ -790,6 +855,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Nepali'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
@ -802,6 +868,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Dutch; Flemish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
@ -814,6 +881,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Norwegian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
@ -826,6 +894,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Oriya'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
@ -838,6 +907,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Panjabi; Punjabi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
@ -850,6 +920,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Polish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
@ -862,6 +933,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Portuguese'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
@ -874,6 +946,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Pushto; Pashto'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
@ -886,6 +959,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Romanian; Moldavian; Moldovan'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
@ -898,6 +972,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Russian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
@ -910,6 +985,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Sanskrit'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
@ -922,6 +998,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Sinhala; Sinhalese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
@ -934,6 +1011,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Slovak'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
@ -946,6 +1024,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Slovenian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
@ -958,6 +1037,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Spanish; Castilian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
@ -970,6 +1050,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'Spanish; Castilian - Old'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
@ -982,6 +1063,7 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Albanian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
@ -994,6 +1076,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Serbian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
@ -1006,6 +1089,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Serbian - Latin'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
@ -1018,6 +1102,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Swahili'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
@ -1030,6 +1115,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Swedish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
@ -1042,6 +1128,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Syriac'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
@ -1054,6 +1141,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Tamil'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
@ -1066,6 +1154,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Telugu'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
@ -1078,6 +1167,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Tajik'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
@ -1090,6 +1180,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Tagalog'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
@ -1102,6 +1193,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Thai'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
@ -1114,6 +1206,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Tigrinya'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
@ -1126,6 +1219,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Turkish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
@ -1138,6 +1232,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Uighur; Uyghur'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
@ -1150,6 +1245,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Ukrainian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
@ -1162,6 +1258,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Urdu'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
@ -1174,6 +1271,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Uzbek'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
@ -1186,6 +1284,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Uzbek - Cyrillic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
@ -1198,6 +1297,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Vietnamese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
@ -1210,6 +1310,7 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Yiddish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
@ -1222,3 +1323,4 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'

View File

@ -42,8 +42,9 @@ def resource_after_delete(mapper, connection, resource):
'path': resource.jsonpatch_path
}
]
namespace = '/users'
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
def cfa_after_delete(mapper, connection, cfa):
@ -54,8 +55,9 @@ def cfa_after_delete(mapper, connection, cfa):
'path': jsonpatch_path
}
]
namespace = '/users'
room = f'/users/{cfa.corpus.user.hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
def resource_after_insert(mapper, connection, resource):
@ -69,8 +71,9 @@ def resource_after_insert(mapper, connection, resource):
'value': jsonpatch_value
}
]
namespace = '/users'
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
def cfa_after_insert(mapper, connection, cfa):
@ -83,8 +86,9 @@ def cfa_after_insert(mapper, connection, cfa):
'value': jsonpatch_value
}
]
namespace = '/users'
room = f'/users/{cfa.corpus.user.hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
def resource_after_update(mapper, connection, resource):
@ -109,8 +113,9 @@ def resource_after_update(mapper, connection, resource):
}
)
if jsonpatch:
namespace = '/users'
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
def job_after_update(mapper, connection, job):

View File

@ -3,11 +3,10 @@ from enum import IntEnum
from flask import current_app, url_for
from flask_hashids import HashidMixin
from time import sleep
from typing import Union
from pathlib import Path
import shutil
from app import db
from app.extensions.sqlalchemy_extras import ContainerColumn, IntEnumColumn
from app.extensions.nopaque_sqlalchemy_extras import ContainerColumn, IntEnumColumn
class JobStatus(IntEnum):
@ -21,7 +20,7 @@ class JobStatus(IntEnum):
FAILED = 8
@staticmethod
def get(job_status: Union['JobStatus', int, str]) -> 'JobStatus':
def get(job_status: 'JobStatus | int | str') -> 'JobStatus':
if isinstance(job_status, JobStatus):
return job_status
if isinstance(job_status, int):

View File

@ -1,6 +1,5 @@
from enum import IntEnum
from flask_hashids import HashidMixin
from typing import Union
from app import db
@ -14,7 +13,7 @@ class Permission(IntEnum):
USE_API = 4
@staticmethod
def get(permission: Union['Permission', int, str]) -> 'Permission':
def get(permission: 'Permission | int | str') -> 'Permission':
if isinstance(permission, Permission):
return permission
if isinstance(permission, int):
@ -38,16 +37,16 @@ class Role(HashidMixin, db.Model):
def __repr__(self):
return f'<Role {self.name}>'
def has_permission(self, permission: Union[Permission, int, str]):
def has_permission(self, permission: Permission | int | str):
p = Permission.get(permission)
return self.permissions & p.value == p.value
def add_permission(self, permission: Union[Permission, int, str]):
def add_permission(self, permission: Permission | int | str):
p = Permission.get(permission)
if not self.has_permission(p):
self.permissions += p.value
def remove_permission(self, permission: Union[Permission, int, str]):
def remove_permission(self, permission: Permission | int | str):
p = Permission.get(permission)
if self.has_permission(p):
self.permissions -= p.value

View File

@ -5,7 +5,7 @@ from pathlib import Path
import requests
import yaml
from app import db
from app.extensions.sqlalchemy_extras import ContainerColumn
from app.extensions.nopaque_sqlalchemy_extras import ContainerColumn
from .file_mixin import FileMixin
from .user import User
@ -41,7 +41,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
@property
def url(self):
return url_for(
'contributions.spacy_nlp_pipeline_model',
'contributions.spacy_nlp_pipeline_models.entity',
spacy_nlp_pipeline_model_id=self.id
)
@ -104,7 +104,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
f.write(chunk)
pbar.close()
db.session.commit()
def delete(self):
try:
self.path.unlink(missing_ok=True)

View File

@ -5,7 +5,7 @@ from pathlib import Path
import requests
import yaml
from app import db
from app.extensions.sqlalchemy_extras import ContainerColumn
from app.extensions.nopaque_sqlalchemy_extras import ContainerColumn
from .file_mixin import FileMixin
from .user import User
@ -40,7 +40,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
@property
def url(self):
return url_for(
'contributions.tesseract_ocr_pipeline_model',
'contributions.tesseract_ocr_pipeline_models.entity',
tesseract_ocr_pipeline_model_id=self.id
)

View File

@ -5,14 +5,13 @@ from flask_hashids import HashidMixin
from flask_login import UserMixin
from sqlalchemy.ext.associationproxy import association_proxy
from pathlib import Path
from typing import Union
from werkzeug.security import generate_password_hash, check_password_hash
import jwt
import re
import secrets
import shutil
from app import db, hashids
from app.extensions.sqlalchemy_extras import IntEnumColumn
from app.extensions.nopaque_sqlalchemy_extras import IntEnumColumn
from .corpus import Corpus
from .corpus_follower_association import CorpusFollowerAssociation
from .corpus_follower_role import CorpusFollowerRole
@ -26,7 +25,7 @@ class ProfilePrivacySettings(IntEnum):
SHOW_MEMBER_SINCE = 4
@staticmethod
def get(profile_privacy_setting: Union['ProfilePrivacySettings', int, str]) -> 'ProfilePrivacySettings':
def get(profile_privacy_setting: 'ProfilePrivacySettings | int | str') -> 'ProfilePrivacySettings':
if isinstance(profile_privacy_setting, ProfilePrivacySettings):
return profile_privacy_setting
if isinstance(profile_privacy_setting, int):
@ -315,7 +314,7 @@ class User(HashidMixin, UserMixin, db.Model):
def has_profile_privacy_setting(self, setting):
s = ProfilePrivacySettings.get(setting)
return self.profile_privacy_settings & s.value == s.value
def add_profile_privacy_setting(self, setting):
s = ProfilePrivacySettings.get(setting)
if not self.has_profile_privacy_setting(s):
@ -350,7 +349,7 @@ class User(HashidMixin, UserMixin, db.Model):
def is_following_corpus(self, corpus):
return corpus in self.followed_corpora
def generate_follow_corpus_token(self, corpus_hashid, role_name, expiration=7):
now = datetime.utcnow()
payload = {
@ -366,7 +365,7 @@ class User(HashidMixin, UserMixin, db.Model):
current_app.config['SECRET_KEY'],
algorithm='HS256'
)
def follow_corpus_by_token(self, token):
try:
payload = jwt.decode(

View File

@ -1,17 +1,16 @@
from cqi import CQiClient
from cqi.errors import CQiException
from cqi.status import CQiStatus
from docker.models.containers import Container
from flask import current_app, session
from flask import current_app
from flask_login import current_user
from flask_socketio import Namespace
from inspect import signature
from threading import Lock
from typing import Callable, Dict, List, Optional
from app import db, docker_client, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusStatus
from . import extensions
from . import cqi_extension_functions
from .utils import SessionManager
'''
@ -19,7 +18,7 @@ This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
Basic concept:
1. A client connects to the "/cqi_over_sio" namespace.
1. A client connects to the namespace.
2. The client emits the "init" event and provides a corpus id for the corpus
that should be analysed in this session.
1.1 The analysis session counter of the corpus is incremented.
@ -28,17 +27,17 @@ Basic concept:
1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient, the Lock and the corpus id in the session for
subsequential use.
2. The client emits the "exec" event provides the name of a CQi API function
arguments (optional).
- The event "exec" handler will execute the function, make sure that the
result is serializable and returns the result back to the client.
4. Wait for more events
5. The client disconnects from the "/cqi_over_sio" namespace
1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
3. The client emits "exec" events, within which it provides the name of a CQi
API function and the corresponding arguments.
3.1 The "exec" event handler will execute the function, make sure that
the result is serializable and returns the result back to the client.
4. The client disconnects from the namespace
4.1 The analysis session counter of the corpus is decremented.
4.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
'''
CQI_API_FUNCTION_NAMES: List[str] = [
CQI_API_FUNCTION_NAMES = [
'ask_feature_cl_2_3',
'ask_feature_cqi_1_0',
'ask_feature_cqp_2_3',
@ -86,68 +85,90 @@ CQI_API_FUNCTION_NAMES: List[str] = [
]
class CQiNamespace(Namespace):
CQI_EXTENSION_FUNCTION_NAMES = [
'ext_corpus_update_db',
'ext_corpus_static_data',
'ext_corpus_paginate_corpus',
'ext_cqp_paginate_subcorpus',
'ext_cqp_partial_export_subcorpus',
'ext_cqp_export_subcorpus',
]
class CQiOverSocketIONamespace(Namespace):
@socketio_login_required
def on_connect(self):
pass
@socketio_login_required
def on_init(self, db_corpus_hashid: str):
db_corpus_id: int = hashids.decode(db_corpus_hashid)
db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
if db_corpus is None:
def on_init(self, corpus_hashid: str) -> dict:
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'code': 400, 'msg': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'code': 404, 'msg': 'Not Found'}
if not (db_corpus.user == current_user
or current_user.is_following_corpus(db_corpus)
or current_user.is_administrator):
if not (
corpus.user == current_user
or current_user.is_following_corpus(corpus)
or current_user.is_administrator
):
return {'code': 403, 'msg': 'Forbidden'}
if db_corpus.status not in [
if corpus.status not in [
CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION
]:
return {'code': 424, 'msg': 'Failed Dependency'}
if db_corpus.num_analysis_sessions is None:
db_corpus.num_analysis_sessions = 0
db.session.commit()
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
db.session.commit()
retry_counter: int = 20
while db_corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
retry_counter = 20
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
if retry_counter == 0:
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
return {'code': 408, 'msg': 'Request Timeout'}
socketio.sleep(3)
retry_counter -= 1
db.session.refresh(db_corpus)
# cqi_client: CQiClient = CQiClient(f'cqpserver_{db_corpus_id}')
cqpserver_container_name: str = f'cqpserver_{db_corpus_id}'
cqpserver_container: Container = docker_client.containers.get(cqpserver_container_name)
cqpserver_host: str = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
cqi_client: CQiClient = CQiClient(cqpserver_host)
session['cqi_over_sio'] = {
'cqi_client': cqi_client,
'cqi_client_lock': Lock(),
'db_corpus_id': db_corpus_id
}
db.session.refresh(corpus)
cqpserver_container_name = f'nopaque-cqpserver-{corpus_id}'
cqpserver_container = docker_client.containers.get(cqpserver_container_name)
cqpserver_ip_address = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
cqi_client = CQiClient(cqpserver_ip_address)
cqi_client_lock = Lock()
SessionManager.setup()
SessionManager.set_corpus_id(corpus_id)
SessionManager.set_cqi_client(cqi_client)
SessionManager.set_cqi_client_lock(cqi_client_lock)
return {'code': 200, 'msg': 'OK'}
@socketio_login_required
def on_exec(self, fn_name: str, fn_args: Dict = {}):
def on_exec(self, fn_name: str, fn_args: dict = {}) -> dict:
try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
cqi_client = SessionManager.get_cqi_client()
cqi_client_lock = SessionManager.get_cqi_client_lock()
except KeyError:
return {'code': 424, 'msg': 'Failed Dependency'}
if fn_name in CQI_API_FUNCTION_NAMES:
fn: Callable = getattr(cqi_client.api, fn_name)
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
fn: Callable = getattr(extensions, fn_name)
fn = getattr(cqi_client.api, fn_name)
elif fn_name in CQI_EXTENSION_FUNCTION_NAMES:
fn = getattr(cqi_extension_functions, fn_name)
else:
return {'code': 400, 'msg': 'Bad Request'}
for param in signature(fn).parameters.values():
# Check if the parameter is optional or required
if param.default is param.empty:
if param.name not in fn_args:
return {'code': 400, 'msg': 'Bad Request'}
@ -156,6 +177,7 @@ class CQiNamespace(Namespace):
continue
if type(fn_args[param.name]) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
cqi_client_lock.acquire()
try:
fn_return_value = fn(**fn_args)
@ -173,6 +195,7 @@ class CQiNamespace(Namespace):
}
finally:
cqi_client_lock.release()
if isinstance(fn_return_value, CQiStatus):
payload = {
'code': fn_return_value.code,
@ -180,27 +203,31 @@ class CQiNamespace(Namespace):
}
else:
payload = fn_return_value
return {'code': 200, 'msg': 'OK', 'payload': payload}
def on_disconnect(self):
try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
corpus_id = SessionManager.get_corpus_id()
cqi_client = SessionManager.get_cqi_client()
cqi_client_lock = SessionManager.get_cqi_client_lock()
SessionManager.teardown()
except KeyError:
return
cqi_client_lock.acquire()
try:
session.pop('cqi_over_sio')
except KeyError:
pass
try:
cqi_client.api.ctrl_bye()
except (BrokenPipeError, CQiException):
pass
cqi_client_lock.release()
db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
if db_corpus is None:
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()

View File

@ -1,55 +1,39 @@
from collections import Counter
from cqi import CQiClient
from cqi.models.corpora import Corpus as CQiCorpus
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
from cqi.models.attributes import (
PositionalAttribute as CQiPositionalAttribute,
StructuralAttribute as CQiStructuralAttribute
)
from cqi.status import StatusOk as CQiStatusOk
from flask import session
from typing import Dict, List
from flask import current_app
import gzip
import json
import math
from app import db
from app.models import Corpus
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
'ext_corpus_update_db',
'ext_corpus_static_data',
'ext_corpus_paginate_corpus',
'ext_cqp_paginate_subcorpus',
'ext_cqp_partial_export_subcorpus',
'ext_cqp_export_subcorpus',
]
from .utils import SessionManager
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
corpus_id = SessionManager.get_corpus_id()
cqi_client = SessionManager.get_cqi_client()
db_corpus = Corpus.query.get(corpus_id)
cqi_corpus = cqi_client.corpora.get(corpus)
db_corpus.num_tokens = cqi_corpus.size
db.session.commit()
return CQiStatusOk()
def ext_corpus_static_data(corpus: str) -> Dict:
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
def ext_corpus_static_data(corpus: str) -> dict:
corpus_id = SessionManager.get_corpus_id()
db_corpus = Corpus.query.get(corpus_id)
static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz'
if static_data_file_path.exists():
with static_data_file_path.open('rb') as f:
return f.read()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
cqi_p_attrs: List[CQiPositionalAttribute] = cqi_corpus.positional_attributes.list()
cqi_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list()
cqi_client = SessionManager.get_cqi_client()
cqi_corpus = cqi_client.corpora.get(corpus)
cqi_p_attrs = cqi_corpus.positional_attributes.list()
cqi_s_attrs = cqi_corpus.structural_attributes.list()
static_data = {
'corpus': {
@ -62,21 +46,21 @@ def ext_corpus_static_data(corpus: str) -> Dict:
}
for p_attr in cqi_p_attrs:
print(f'corpus.freqs.{p_attr.name}')
current_app.logger.info(f'corpus.freqs.{p_attr.name}')
static_data['corpus']['freqs'][p_attr.name] = []
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
p_attr_id_list = list(range(p_attr.lexicon_size))
static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list))
del p_attr_id_list
print(f'p_attrs.{p_attr.name}')
current_app.logger.info(f'p_attrs.{p_attr.name}')
static_data['p_attrs'][p_attr.name] = []
cpos_list: List[int] = list(range(cqi_corpus.size))
cpos_list = list(range(cqi_corpus.size))
static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list))
del cpos_list
print(f'values.p_attrs.{p_attr.name}')
current_app.logger.info(f'values.p_attrs.{p_attr.name}')
static_data['values']['p_attrs'][p_attr.name] = []
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
p_attr_id_list = list(range(p_attr.lexicon_size))
static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list))
del p_attr_id_list
@ -92,9 +76,9 @@ def ext_corpus_static_data(corpus: str) -> Dict:
# Note: Needs more testing, don't use it in production #
##############################################################
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
cqi_subcorpus: CQiSubcorpus = cqi_corpus.subcorpora.get('Last')
first_match: int = 0
last_match: int = cqi_subcorpus.size - 1
cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
first_match = 0
last_match = cqi_subcorpus.size - 1
match_boundaries = zip(
range(first_match, last_match + 1),
cqi_subcorpus.dump(
@ -112,7 +96,7 @@ def ext_corpus_static_data(corpus: str) -> Dict:
del cqi_subcorpus, first_match, last_match
for id, lbound, rbound in match_boundaries:
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
del match_boundaries
@ -124,33 +108,33 @@ def ext_corpus_static_data(corpus: str) -> Dict:
# This is a very slow operation, thats why we only use it for
# the text attribute
lbound, rbound = s_attr.cpos_by_id(id)
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
cpos_list: List[int] = list(range(lbound, rbound + 1))
cpos_list = list(range(lbound, rbound + 1))
for p_attr in cqi_p_attrs:
p_attr_ids: List[int] = []
p_attr_ids = []
p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list))
print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
del p_attr_ids
del cpos_list
sub_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
print(f's_attrs.{s_attr.name}.values')
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
current_app.logger.info(f's_attrs.{s_attr.name}.values')
static_data['s_attrs'][s_attr.name]['values'] = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
s_attr_id_list: List[int] = list(range(s_attr.size))
sub_s_attr_values: List[str] = []
s_attr_id_list = list(range(s_attr.size))
sub_s_attr_values = []
for sub_s_attr in sub_s_attrs:
tmp = []
tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list))
sub_s_attr_values.append(tmp)
del tmp
del s_attr_id_list
print(f'values.s_attrs.{s_attr.name}')
current_app.logger.info(f'values.s_attrs.{s_attr.name}')
static_data['values']['s_attrs'][s_attr.name] = [
{
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id]
@ -160,11 +144,11 @@ def ext_corpus_static_data(corpus: str) -> Dict:
} for s_attr_id in range(0, s_attr.size)
]
del sub_s_attr_values
print('Saving static data to file')
current_app.logger.info('Saving static data to file')
with gzip.open(static_data_file_path, 'wt') as f:
json.dump(static_data, f)
del static_data
print('Sending static data to client')
current_app.logger.info('Sending static data to client')
with open(static_data_file_path, 'rb') as f:
return f.read()
@ -173,8 +157,8 @@ def ext_corpus_paginate_corpus(
corpus: str,
page: int = 1,
per_page: int = 20
) -> Dict:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
) -> dict:
cqi_client = SessionManager.get_cqi_client()
cqi_corpus = cqi_client.corpora.get(corpus)
# Sanity checks
if (
@ -189,7 +173,7 @@ def ext_corpus_paginate_corpus(
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
lookups = _lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
@ -219,9 +203,9 @@ def ext_cqp_paginate_subcorpus(
context: int = 50,
page: int = 1,
per_page: int = 20
) -> Dict:
) -> dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client = SessionManager.get_cqi_client()
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
@ -236,7 +220,7 @@ def ext_cqp_paginate_subcorpus(
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_results_export = _export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
@ -266,22 +250,147 @@ def ext_cqp_partial_export_subcorpus(
subcorpus: str,
match_id_list: list,
context: int = 50
) -> Dict:
) -> dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client = SessionManager.get_cqi_client()
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
cqi_subcorpus_partial_export = _partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return cqi_subcorpus_partial_export
def ext_cqp_export_subcorpus(
subcorpus: str,
context: int = 50
) -> Dict:
def ext_cqp_export_subcorpus(subcorpus: str, context: int = 50) -> dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client = SessionManager.get_cqi_client()
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
cqi_subcorpus_export = _export_subcorpus(cqi_subcorpus, context=context)
return cqi_subcorpus_export
def _lookups_by_cpos(corpus: CQiCorpus, cpos_list: list[int]) -> dict:
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
cpos_attr_values = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
for attr in corpus.structural_attributes.list():
# We only want to iterate over non subattributes, identifiable by
# attr.has_values == False
if attr.has_values:
continue
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1:
continue
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
if len(occured_attr_ids) == 0:
continue
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
if len(subattrs) == 0:
continue
lookup_name = f'{attr.name}_lookup'
lookups[lookup_name] = {}
for attr_id in occured_attr_ids:
lookups[lookup_name][attr_id] = {}
for subattr in subattrs:
subattr_name = subattr.name[(len(attr.name) + 1):] # noqa
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
return lookups
def _partial_export_subcorpus(
subcorpus: CQiSubcorpus,
match_id_list: list[int],
context: int = 25
) -> dict:
if subcorpus.size == 0:
return {'matches': []}
match_boundaries = []
for match_id in match_id_list:
if match_id < 0 or match_id >= subcorpus.size:
continue
match_boundaries.append(
(
match_id,
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
)
)
cpos_set = set()
matches = []
for match_boundary in match_boundaries:
match_num, match_start, match_end = match_boundary
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = _lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}
def _export_subcorpus(
subcorpus: CQiSubcorpus,
context: int = 25,
cutoff: float = float('inf'),
offset: int = 0
) -> dict:
if subcorpus.size == 0:
return {'matches': []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
match_boundaries = zip(
range(first_match, last_match + 1),
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
for match_num, match_start, match_end in match_boundaries:
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = _lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}

View File

@ -0,0 +1,37 @@
from cqi import CQiClient
from threading import Lock
from flask import session
class SessionManager:
@staticmethod
def setup():
session['cqi_over_sio'] = {}
@staticmethod
def teardown():
session.pop('cqi_over_sio')
@staticmethod
def set_corpus_id(corpus_id: int):
session['cqi_over_sio']['corpus_id'] = corpus_id
@staticmethod
def get_corpus_id() -> int:
return session['cqi_over_sio']['corpus_id']
@staticmethod
def set_cqi_client(cqi_client: CQiClient):
session['cqi_over_sio']['cqi_client'] = cqi_client
@staticmethod
def get_cqi_client() -> CQiClient:
return session['cqi_over_sio']['cqi_client']
@staticmethod
def set_cqi_client_lock(cqi_client_lock: Lock):
session['cqi_over_sio']['cqi_client_lock'] = cqi_client_lock
@staticmethod
def get_cqi_client_lock() -> Lock:
return session['cqi_over_sio']['cqi_client_lock']

109
app/namespaces/jobs.py Normal file
View File

@ -0,0 +1,109 @@
from flask import current_app, Flask
from flask_login import current_user
from flask_socketio import Namespace
from app import db, hashids, socketio
from app.decorators import socketio_admin_required, socketio_login_required
from app.models import Job, JobStatus
def _delete_job(app: Flask, job_id: int):
with app.app_context():
job = Job.query.get(job_id)
job.delete()
db.session.commit()
def _restart_job(app, job_id):
with app.app_context():
job = Job.query.get(job_id)
job.restart()
db.session.commit()
class UsersNamespace(Namespace):
@socketio_login_required
def on_delete(self, job_hashid: str) -> dict:
job_id = hashids.decode(job_hashid)
if not isinstance(job_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
job = Job.query.get(job_id)
if job is None:
return {'status': 404, 'statusText': 'Not found'}
if not (
job.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
socketio.start_background_task(
_delete_job,
current_app._get_current_object(),
job_id
)
return {
'body': f'Job "{job.title}" marked for deletion',
'status': 202,
'statusText': 'Accepted'
}
@socketio_admin_required
def on_log(self, job_hashid: str):
job_id = hashids.decode(job_hashid)
if not isinstance(job_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
job = Job.query.get(job_id)
if job is None:
return {'status': 404, 'statusText': 'Not found'}
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return {'status': 409, 'statusText': 'Conflict'}
with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
log = log_file.read()
return {
'body': log,
'status': 200,
'statusText': 'Forbidden'
}
socketio_login_required
def on_restart(self, job_hashid: str):
job_id = hashids.decode(job_hashid)
if not isinstance(job_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
job = Job.query.get(job_id)
if job is None:
return {'status': 404, 'statusText': 'Not found'}
if not (
job.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
if job.status == JobStatus.FAILED:
return {'status': 409, 'statusText': 'Conflict'}
socketio.start_background_task(
_restart_job,
current_app._get_current_object(),
job_id
)
return {
'body': f'Job "{job.title}" marked for restarting',
'status': 202,
'statusText': 'Accepted'
}

116
app/namespaces/users.py Normal file
View File

@ -0,0 +1,116 @@
from flask import current_app, Flask
from flask_login import current_user
from flask_socketio import join_room, leave_room, Namespace
from app import db, hashids, socketio
from app.decorators import socketio_login_required
from app.models import User
def _delete_user(app: Flask, user_id: int):
with app.app_context():
user = User.query.get(user_id)
user.delete()
db.session.commit()
class UsersNamespace(Namespace):
@socketio_login_required
def on_get(self, user_hashid: str) -> dict:
user_id = hashids.decode(user_hashid)
if not isinstance(user_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
user = User.query.get(user_id)
if user is None:
return {'status': 404, 'statusText': 'Not found'}
if not (
user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
return {
'body': user.to_json_serializeable(
backrefs=True,
relationships=True
),
'status': 200,
'statusText': 'OK'
}
@socketio_login_required
def on_subscribe(self, user_hashid: str) -> dict:
user_id = hashids.decode(user_hashid)
if not isinstance(user_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
user = User.query.get(user_id)
if user is None:
return {'status': 404, 'statusText': 'Not found'}
if not (
user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
join_room(f'/users/{user.hashid}')
return {'status': 200, 'statusText': 'OK'}
@socketio_login_required
def on_unsubscribe(self, user_hashid: str) -> dict:
user_id = hashids.decode(user_hashid)
if not isinstance(user_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
user = User.query.get(user_id)
if user is None:
return {'status': 404, 'statusText': 'Not found'}
if not (
user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
leave_room(f'/users/{user.hashid}')
return {'status': 200, 'statusText': 'OK'}
@socketio_login_required
def on_delete(self, user_hashid: str) -> dict:
user_id = hashids.decode(user_hashid)
if not isinstance(user_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
user = User.query.get(user_id)
if user is None:
return {'status': 404, 'statusText': 'Not found'}
if not (
user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
socketio.start_background_task(
_delete_user,
current_app._get_current_object(),
user.id
)
return {
'body': f'User "{user.username}" marked for deletion',
'status': 202,
'statusText': 'Accepted'
}

Some files were not shown because too many files have changed in this diff Show More