6 Commits

452 changed files with 4848 additions and 1573939 deletions

View File

@ -5,9 +5,9 @@
!app !app
!migrations !migrations
!tests !tests
!.flaskenv
!boot.sh !boot.sh
!config.py !config.py
!docker-nopaque-entrypoint.sh !docker-nopaque-entrypoint.sh
!nopaque.py
!requirements.txt !requirements.txt
!requirements.freezed.txt
!wsgi.py

View File

@ -1,20 +1,32 @@
############################################################################## ##############################################################################
# Environment variables used by Docker Compose config files. # # Variables for use in Docker Compose YAML files #
############################################################################## ##############################################################################
# HINT: Use this bash command `id -u` # HINT: Use this bash command `id -u`
# NOTE: 0 (= root user) is not allowed # NOTE: 0 (= root user) is not allowed
HOST_UID= HOST_UID=
# HINT: Use this bash command `id -g` # HINT: Use this bash command `id -g`
# NOTE: 0 (= root group) is not allowed
HOST_GID= HOST_GID=
# HINT: Use this bash command `getent group docker | cut -d: -f3` # HINT: Use this bash command `getent group docker | cut -d: -f3`
HOST_DOCKER_GID= HOST_DOCKER_GID=
# DEFAULT: nopaque # DEFAULT: nopaque
NOPAQUE_DOCKER_NETWORK_NAME=nopaque # DOCKER_DEFAULT_NETWORK_NAME=
# DEFAULT: ./volumes/db/data
# NOTE: Use `.` as <project-basedir>
# DOCKER_DB_SERVICE_DATA_VOLUME_SOURCE_PATH=
# DEFAULT: ./volumes/mq/data
# NOTE: Use `.` as <project-basedir>
# DOCKER_MQ_SERVICE_DATA_VOLUME_SOURCE_PATH=
# NOTE: This must be a network share and it must be available on all # NOTE: This must be a network share and it must be available on all
# Docker Swarm nodes, mounted to the same path. # Docker Swarm nodes, mounted to the same path with the same
HOST_NOPAQUE_DATA_PATH=/mnt/nopaque # user and group ownership.
DOCKER_NOPAQUE_SERVICE_DATA_VOLUME_SOURCE_PATH=
# DEFAULT: ./volumes/nopaque/logs
# NOTE: Use `.` as <project-basedir>
# DOCKER_NOPAQUE_SERVICE_LOGS_VOLUME_SOURCE_PATH=.

1
.flaskenv Normal file
View File

@ -0,0 +1 @@
FLASK_APP=nopaque.py

2
.gitignore vendored
View File

@ -2,6 +2,8 @@
app/static/gen/ app/static/gen/
volumes/ volumes/
docker-compose.override.yml docker-compose.override.yml
logs/
!logs/dummy
*.env *.env
*.pjentsch-testing *.pjentsch-testing

18
.vscode/settings.json vendored
View File

@ -1,7 +1,19 @@
{ {
"editor.rulers": [79], "editor.rulers": [79],
"editor.tabSize": 2,
"files.insertFinalNewline": true, "files.insertFinalNewline": true,
"files.trimFinalNewlines": true, "[css]": {
"files.trimTrailingWhitespace": true "editor.tabSize": 2
},
"[html]": {
"editor.tabSize": 2
},
"[javascript]": {
"editor.tabSize": 2
},
"[jinja-html]": {
"editor.tabSize": 2
},
"[scss]": {
"editor.tabSize": 2
}
} }

View File

@ -4,13 +4,11 @@ FROM python:3.10.13-slim-bookworm
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>" LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
# Set environment variables
ENV LANG="C.UTF-8" ENV LANG="C.UTF-8"
ENV PYTHONDONTWRITEBYTECODE="1" ENV PYTHONDONTWRITEBYTECODE="1"
ENV PYTHONUNBUFFERED="1" ENV PYTHONUNBUFFERED="1"
# Install system dependencies
RUN apt-get update \ RUN apt-get update \
&& apt-get install --no-install-recommends --yes \ && apt-get install --no-install-recommends --yes \
build-essential \ build-essential \
@ -19,39 +17,37 @@ RUN apt-get update \
&& rm --recursive /var/lib/apt/lists/* && rm --recursive /var/lib/apt/lists/*
# Create a non-root user
RUN useradd --create-home --no-log-init nopaque \ RUN useradd --create-home --no-log-init nopaque \
&& groupadd docker \ && groupadd docker \
&& usermod --append --groups docker nopaque && usermod --append --groups docker nopaque
USER nopaque USER nopaque
WORKDIR /home/nopaque WORKDIR /home/nopaque
# Create a Python virtual environment
ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv" ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv"
RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}" RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}"
ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}" ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}"
# Install Python dependencies
COPY --chown=nopaque:nopaque requirements.freezed.txt requirements.freezed.txt
RUN python3 -m pip install --requirement requirements.freezed.txt \
&& rm requirements.freezed.txt
# Install the application
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
COPY --chown=nopaque:nopaque app app COPY --chown=nopaque:nopaque app app
COPY --chown=nopaque:nopaque migrations migrations COPY --chown=nopaque:nopaque migrations migrations
COPY --chown=nopaque:nopaque tests tests COPY --chown=nopaque:nopaque tests tests
COPY --chown=nopaque:nopaque boot.sh config.py wsgi.py ./ COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./
EXPOSE 5000 RUN python3 -m pip install --requirement requirements.txt \
&& mkdir logs
USER root USER root
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
EXPOSE 5000
ENTRYPOINT ["docker-nopaque-entrypoint.sh"] ENTRYPOINT ["docker-nopaque-entrypoint.sh"]

View File

@ -35,7 +35,7 @@ username@hostname:~$ sudo mount --types cifs --options gid=${USER},password=nopa
# Clone the nopaque repository # Clone the nopaque repository
username@hostname:~$ git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git username@hostname:~$ git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
# Create data directories # Create data directories
username@hostname:~$ mkdir volumes/{db,mq} username@hostname:~$ mkdir data/{db,logs,mq}
username@hostname:~$ cp db.env.tpl db.env username@hostname:~$ cp db.env.tpl db.env
username@hostname:~$ cp .env.tpl .env username@hostname:~$ cp .env.tpl .env
# Fill out the variables within these files. # Fill out the variables within these files.

View File

@ -120,7 +120,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'German' - title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.' description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz'
@ -132,7 +131,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Greek' - title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.' description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz'
@ -144,7 +142,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'English' - title: 'English'
description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz'
@ -156,7 +153,6 @@
version: '3.4.1' version: '3.4.1'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Spanish' - title: 'Spanish'
description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz'
@ -168,7 +164,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'French' - title: 'French'
description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz'
@ -180,7 +175,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Italian' - title: 'Italian'
description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner' description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner'
url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz'
@ -192,7 +186,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Polish' - title: 'Polish'
description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.' description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz'
@ -204,7 +197,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Russian' - title: 'Russian'
description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz'
@ -216,7 +208,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Chinese' - title: 'Chinese'
description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz'
@ -228,4 +219,3 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'

View File

@ -2,9 +2,9 @@ from apifairy import APIFairy
from config import Config from config import Config
from docker import DockerClient from docker import DockerClient
from flask import Flask from flask import Flask
from flask.logging import default_handler
from flask_apscheduler import APScheduler from flask_apscheduler import APScheduler
from flask_assets import Environment from flask_assets import Environment
from flask_breadcrumbs import Breadcrumbs, default_breadcrumb_root
from flask_login import LoginManager from flask_login import LoginManager
from flask_mail import Mail from flask_mail import Mail
from flask_marshmallow import Marshmallow from flask_marshmallow import Marshmallow
@ -13,139 +13,95 @@ from flask_paranoid import Paranoid
from flask_socketio import SocketIO from flask_socketio import SocketIO
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
from flask_hashids import Hashids from flask_hashids import Hashids
from logging import Formatter, StreamHandler
from werkzeug.middleware.proxy_fix import ProxyFix
docker_client = DockerClient.from_env()
apifairy = APIFairy() apifairy = APIFairy()
assets = Environment() assets = Environment()
breadcrumbs = Breadcrumbs()
db = SQLAlchemy() db = SQLAlchemy()
docker_client = DockerClient()
hashids = Hashids() hashids = Hashids()
login = LoginManager() login = LoginManager()
login.login_view = 'auth.login'
login.login_message = 'Please log in to access this page.'
ma = Marshmallow() ma = Marshmallow()
mail = Mail() mail = Mail()
migrate = Migrate(compare_type=True) migrate = Migrate(compare_type=True)
paranoid = Paranoid() paranoid = Paranoid()
paranoid.redirect_view = '/'
scheduler = APScheduler() scheduler = APScheduler()
socketio = SocketIO() socketio = SocketIO()
def create_app(config: Config = Config) -> Flask: def create_app(config: Config = Config) -> Flask:
''' Creates an initialized Flask object. ''' ''' Creates an initialized Flask (WSGI Application) object. '''
app = Flask(__name__) app = Flask(__name__)
app.config.from_object(config) app.config.from_object(config)
config.init_app(app)
# region Logging
log_formatter = Formatter(
fmt=app.config['NOPAQUE_LOG_FORMAT'],
datefmt=app.config['NOPAQUE_LOG_DATE_FORMAT']
)
log_handler = StreamHandler()
log_handler.setFormatter(log_formatter)
log_handler.setLevel(app.config['NOPAQUE_LOG_LEVEL'])
app.logger.setLevel('DEBUG')
app.logger.removeHandler(default_handler)
app.logger.addHandler(log_handler)
# endregion Logging
# region Middlewares
if app.config['NOPAQUE_PROXY_FIX_ENABLED']:
app.wsgi_app = ProxyFix(
app.wsgi_app,
x_for=app.config['NOPAQUE_PROXY_FIX_X_FOR'],
x_host=app.config['NOPAQUE_PROXY_FIX_X_HOST'],
x_port=app.config['NOPAQUE_PROXY_FIX_X_PORT'],
x_prefix=app.config['NOPAQUE_PROXY_FIX_X_PREFIX'],
x_proto=app.config['NOPAQUE_PROXY_FIX_X_PROTO']
)
# endregion Middlewares
# region Extensions
docker_client.login( docker_client.login(
app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'], app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'], password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
registry=app.config['NOPAQUE_DOCKER_REGISTRY'] registry=app.config['NOPAQUE_DOCKER_REGISTRY']
) )
from .models import AnonymousUser, User
apifairy.init_app(app) apifairy.init_app(app)
assets.init_app(app) assets.init_app(app)
breadcrumbs.init_app(app)
db.init_app(app) db.init_app(app)
hashids.init_app(app) hashids.init_app(app)
login.init_app(app) login.init_app(app)
login.anonymous_user = AnonymousUser
login.login_view = 'auth.login'
login.user_loader(lambda user_id: User.query.get(int(user_id)))
ma.init_app(app) ma.init_app(app)
mail.init_app(app) mail.init_app(app)
migrate.init_app(app, db) migrate.init_app(app, db)
paranoid.init_app(app) paranoid.init_app(app)
paranoid.redirect_view = '/'
scheduler.init_app(app) scheduler.init_app(app)
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
# endregion Extensions
# region Blueprints from .admin import bp as admin_blueprint
from .blueprints.admin import bp as admin_blueprint default_breadcrumb_root(admin_blueprint, '.admin')
app.register_blueprint(admin_blueprint, url_prefix='/admin') app.register_blueprint(admin_blueprint, url_prefix='/admin')
from .blueprints.api import bp as api_blueprint from .api import bp as api_blueprint
app.register_blueprint(api_blueprint, url_prefix='/api') app.register_blueprint(api_blueprint, url_prefix='/api')
from .blueprints.auth import bp as auth_blueprint from .auth import bp as auth_blueprint
default_breadcrumb_root(auth_blueprint, '.')
app.register_blueprint(auth_blueprint) app.register_blueprint(auth_blueprint)
from .blueprints.contributions import bp as contributions_blueprint from .contributions import bp as contributions_blueprint
default_breadcrumb_root(contributions_blueprint, '.contributions')
app.register_blueprint(contributions_blueprint, url_prefix='/contributions') app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
from .blueprints.corpora import bp as corpora_blueprint from .corpora import bp as corpora_blueprint
from .corpora.cqi_over_sio import CQiNamespace
default_breadcrumb_root(corpora_blueprint, '.corpora')
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora') app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
socketio.on_namespace(CQiNamespace('/cqi_over_sio'))
from .blueprints.errors import bp as errors_bp from .errors import bp as errors_bp
app.register_blueprint(errors_bp) app.register_blueprint(errors_bp)
from .blueprints.jobs import bp as jobs_blueprint from .jobs import bp as jobs_blueprint
default_breadcrumb_root(jobs_blueprint, '.jobs')
app.register_blueprint(jobs_blueprint, url_prefix='/jobs') app.register_blueprint(jobs_blueprint, url_prefix='/jobs')
from .blueprints.main import bp as main_blueprint from .main import bp as main_blueprint
default_breadcrumb_root(main_blueprint, '.')
app.register_blueprint(main_blueprint, cli_group=None) app.register_blueprint(main_blueprint, cli_group=None)
from .blueprints.services import bp as services_blueprint from .services import bp as services_blueprint
default_breadcrumb_root(services_blueprint, '.services')
app.register_blueprint(services_blueprint, url_prefix='/services') app.register_blueprint(services_blueprint, url_prefix='/services')
from .blueprints.settings import bp as settings_blueprint from .settings import bp as settings_blueprint
default_breadcrumb_root(settings_blueprint, '.settings')
app.register_blueprint(settings_blueprint, url_prefix='/settings') app.register_blueprint(settings_blueprint, url_prefix='/settings')
from .blueprints.users import bp as users_blueprint from .users import bp as users_blueprint
app.register_blueprint(users_blueprint, cli_group='user', url_prefix='/users') default_breadcrumb_root(users_blueprint, '.users')
app.register_blueprint(users_blueprint, url_prefix='/users')
from .blueprints.workshops import bp as workshops_blueprint from .workshops import bp as workshops_blueprint
app.register_blueprint(workshops_blueprint, url_prefix='/workshops') app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
# endregion Blueprints
# region SocketIO Namespaces
from .blueprints.corpora.cqi_over_sio import CQiOverSocketIO
socketio.on_namespace(CQiOverSocketIO('/cqi_over_sio'))
# endregion SocketIO Namespaces
# region Database event Listeners
from .models.event_listeners import register_event_listeners
register_event_listeners()
# endregion Database event Listeners
# region Add scheduler jobs
if app.config['NOPAQUE_IS_PRIMARY_INSTANCE']:
from .jobs import handle_corpora
scheduler.add_job('handle_corpora', handle_corpora, seconds=3, trigger='interval')
from .jobs import handle_jobs
scheduler.add_job('handle_jobs', handle_jobs, seconds=3, trigger='interval')
# endregion Add scheduler jobs
return app return app

View File

@ -1,6 +1,6 @@
from flask import abort, request from flask import abort, request
from app.decorators import content_negotiation
from app import db from app import db
from app.decorators import content_negotiation
from app.models import User from app.models import User
from . import bp from . import bp

View File

@ -1,7 +1,8 @@
from flask import abort, flash, redirect, render_template, url_for from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from app import db, hashids from app import db, hashids
from app.models import Avatar, Corpus, Role, User from app.models import Avatar, Corpus, Role, User
from app.blueprints.users.settings.forms import ( from app.users.settings.forms import (
UpdateAvatarForm, UpdateAvatarForm,
UpdatePasswordForm, UpdatePasswordForm,
UpdateNotificationsForm, UpdateNotificationsForm,
@ -10,9 +11,14 @@ from app.blueprints.users.settings.forms import (
) )
from . import bp from . import bp
from .forms import UpdateUserForm from .forms import UpdateUserForm
from app.users.utils import (
user_endpoint_arguments_constructor as user_eac,
user_dynamic_list_constructor as user_dlc
)
@bp.route('') @bp.route('')
@register_breadcrumb(bp, '.', '<i class="material-icons left">admin_panel_settings</i>Administration')
def admin(): def admin():
return render_template( return render_template(
'admin/admin.html.j2', 'admin/admin.html.j2',
@ -21,6 +27,7 @@ def admin():
@bp.route('/corpora') @bp.route('/corpora')
@register_breadcrumb(bp, '.corpora', 'Corpora')
def corpora(): def corpora():
corpora = Corpus.query.all() corpora = Corpus.query.all()
return render_template( return render_template(
@ -31,6 +38,7 @@ def corpora():
@bp.route('/users') @bp.route('/users')
@register_breadcrumb(bp, '.users', '<i class="material-icons left">group</i>Users')
def users(): def users():
users = User.query.all() users = User.query.all()
return render_template( return render_template(
@ -41,6 +49,7 @@ def users():
@bp.route('/users/<hashid:user_id>') @bp.route('/users/<hashid:user_id>')
@register_breadcrumb(bp, '.users.entity', '', dynamic_list_constructor=user_dlc)
def user(user_id): def user(user_id):
user = User.query.get_or_404(user_id) user = User.query.get_or_404(user_id)
corpora = Corpus.query.filter(Corpus.user == user).all() corpora = Corpus.query.filter(Corpus.user == user).all()
@ -53,6 +62,7 @@ def user(user_id):
@bp.route('/users/<hashid:user_id>/settings', methods=['GET', 'POST']) @bp.route('/users/<hashid:user_id>/settings', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.users.entity.settings', '<i class="material-icons left">settings</i>Settings')
def user_settings(user_id): def user_settings(user_id):
user = User.query.get_or_404(user_id) user = User.query.get_or_404(user_id)
update_account_information_form = UpdateAccountInformationForm(user) update_account_information_form = UpdateAccountInformationForm(user)

View File

@ -5,8 +5,8 @@ from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError from werkzeug.exceptions import InternalServerError
from app import db, hashids from app import db, hashids
from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel
from .auth import auth_error_responses, token_auth
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema
from .auth import auth_error_responses, token_auth
bp = Blueprint('jobs', __name__) bp = Blueprint('jobs', __name__)
@ -77,7 +77,7 @@ def delete_job(job_id):
job = Job.query.get(job_id) job = Job.query.get(job_id)
if job is None: if job is None:
abort(404) abort(404)
if not (job.user == current_user or current_user.is_administrator): if not (job.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
try: try:
job.delete() job.delete()
@ -97,6 +97,6 @@ def get_job(job_id):
job = Job.query.get(job_id) job = Job.query.get(job_id)
if job is None: if job is None:
abort(404) abort(404)
if not (job.user == current_user or current_user.is_administrator): if not (job.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
return job return job

View File

@ -10,7 +10,7 @@ from app.models import (
User, User,
UserSettingJobStatusMailNotificationLevel UserSettingJobStatusMailNotificationLevel
) )
from app.blueprints.services import SERVICES from app.services import SERVICES

View File

@ -3,11 +3,11 @@ from apifairy import authenticate, body, response
from apifairy.decorators import other_responses from apifairy.decorators import other_responses
from flask import abort, Blueprint from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError from werkzeug.exceptions import InternalServerError
from app.email import create_message, send
from app import db from app import db
from app.email import create_message, send
from app.models import User from app.models import User
from .auth import auth_error_responses, token_auth
from .schemas import EmptySchema, UserSchema from .schemas import EmptySchema, UserSchema
from .auth import auth_error_responses, token_auth
bp = Blueprint('users', __name__) bp = Blueprint('users', __name__)
@ -60,7 +60,7 @@ def delete_user(user_id):
user = User.query.get(user_id) user = User.query.get(user_id)
if user is None: if user is None:
abort(404) abort(404)
if not (user == current_user or current_user.is_administrator): if not (user == current_user or current_user.is_administrator()):
abort(403) abort(403)
user.delete() user.delete()
db.session.commit() db.session.commit()
@ -78,7 +78,7 @@ def get_user(user_id):
user = User.query.get(user_id) user = User.query.get(user_id)
if user is None: if user is None:
abort(404) abort(404)
if not (user == current_user or current_user.is_administrator): if not (user == current_user or current_user.is_administrator()):
abort(403) abort(403)
return user return user
@ -94,6 +94,6 @@ def get_user_by_username(username):
user = User.query.filter(User.username == username).first() user = User.query.filter(User.username == username).first()
if user is None: if user is None:
abort(404) abort(404)
if not (user == current_user or current_user.is_administrator): if not (user == current_user or current_user.is_administrator()):
abort(403) abort(403)
return user return user

View File

@ -1,4 +1,5 @@
from flask import abort, flash, redirect, render_template, request, url_for from flask import abort, flash, redirect, render_template, request, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user, login_user, login_required, logout_user from flask_login import current_user, login_user, login_required, logout_user
from app import db from app import db
from app.email import create_message, send from app.email import create_message, send
@ -18,9 +19,7 @@ def before_request():
Checks if a user is unconfirmed when visiting specific sites. Redirects to Checks if a user is unconfirmed when visiting specific sites. Redirects to
unconfirmed view if user is unconfirmed. unconfirmed view if user is unconfirmed.
""" """
if not current_user.is_authenticated: if current_user.is_authenticated:
return
current_user.ping() current_user.ping()
db.session.commit() db.session.commit()
if (not current_user.confirmed if (not current_user.confirmed
@ -28,11 +27,10 @@ def before_request():
and request.blueprint != 'auth' and request.blueprint != 'auth'
and request.endpoint != 'static'): and request.endpoint != 'static'):
return redirect(url_for('auth.unconfirmed')) return redirect(url_for('auth.unconfirmed'))
if not current_user.terms_of_use_accepted:
return redirect(url_for('main.terms_of_use'))
@bp.route('/register', methods=['GET', 'POST']) @bp.route('/register', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.register', 'Register')
def register(): def register():
if current_user.is_authenticated: if current_user.is_authenticated:
return redirect(url_for('main.dashboard')) return redirect(url_for('main.dashboard'))
@ -69,6 +67,7 @@ def register():
@bp.route('/login', methods=['GET', 'POST']) @bp.route('/login', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.login', 'Login')
def login(): def login():
if current_user.is_authenticated: if current_user.is_authenticated:
return redirect(url_for('main.dashboard')) return redirect(url_for('main.dashboard'))
@ -99,6 +98,7 @@ def logout():
@bp.route('/unconfirmed') @bp.route('/unconfirmed')
@register_breadcrumb(bp, '.unconfirmed', 'Unconfirmed')
@login_required @login_required
def unconfirmed(): def unconfirmed():
if current_user.confirmed: if current_user.confirmed:
@ -141,6 +141,7 @@ def confirm(token):
@bp.route('/reset-password-request', methods=['GET', 'POST']) @bp.route('/reset-password-request', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.reset_password_request', 'Password Reset')
def reset_password_request(): def reset_password_request():
if current_user.is_authenticated: if current_user.is_authenticated:
return redirect(url_for('main.dashboard')) return redirect(url_for('main.dashboard'))
@ -170,6 +171,7 @@ def reset_password_request():
@bp.route('/reset-password/<token>', methods=['GET', 'POST']) @bp.route('/reset-password/<token>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.reset_password', 'Password Reset')
def reset_password(token): def reset_password(token):
if current_user.is_authenticated: if current_user.is_authenticated:
return redirect(url_for('main.dashboard')) return redirect(url_for('main.dashboard'))

View File

@ -1,49 +0,0 @@
from flask_login import current_user
from flask_socketio import disconnect, Namespace
from app import db, hashids
from app.decorators import socketio_admin_required
from app.models import User
class AdminNamespace(Namespace):
def on_connect(self):
# Check if the user is authenticated and is an administrator
if not (current_user.is_authenticated and current_user.is_administrator):
disconnect()
@socketio_admin_required
def on_set_user_confirmed(self, user_hashid: str, confirmed_value: bool):
# Decode the user hashid
user_id = hashids.decode(user_hashid)
# Validate user_id
if not isinstance(user_id, int):
return {
'code': 400,
'body': 'user_id is invalid'
}
# Validate confirmed_value
if not isinstance(confirmed_value, bool):
return {
'code': 400,
'body': 'confirmed_value is invalid'
}
# Load user from database
user = User.query.get(user_id)
if user is None:
return {
'code': 404,
'body': 'User not found'
}
# Update user confirmed status
user.confirmed = confirmed_value
db.session.commit()
return {
'code': 200,
'body': f'User "{user.username}" is now {"confirmed" if confirmed_value else "unconfirmed"}'
}

View File

@ -1,18 +0,0 @@
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('jobs', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes, json_routes

View File

@ -1,138 +0,0 @@
from flask import current_app
from flask_login import current_user
from flask_socketio import Namespace
from app import db, hashids, socketio
from app.extensions.flask_socketio import admin_required, login_required
from app.models import Job, JobStatus
class JobsNamespace(Namespace):
@login_required
def on_delete(self, job_hashid: str):
# Decode the job hashid
job_id = hashids.decode(job_hashid)
# Validate job_id
if not isinstance(job_id, int):
return {
'code': 400,
'body': 'job_id is invalid'
}
# Load job from database
job = Job.query.get(job_id)
if job is None:
return {
'code': 404,
'body': 'Job not found'
}
# Check if the current user is allowed to delete the job
if not (job.user == current_user or current_user.is_administrator):
return {
'code': 403,
'body': 'Forbidden'
}
# TODO: This should be a method in the Job model
def _delete_job(app, job_id):
with app.app_context():
job = Job.query.get(job_id)
job.delete()
db.session.commit()
# Delete the job in a background task
socketio.start_background_task(
target=_delete_job,
app=current_app._get_current_object(),
job_id=job_id
)
return {
'code': 202,
'body': f'Job "{job.title}" marked for deletion'
}
@admin_required
def on_get_log(self, job_hashid: str):
# Decode the job hashid
job_id = hashids.decode(job_hashid)
# Validate job_id
if not isinstance(job_id, int):
return {
'code': 400,
'body': 'job_id is invalid'
}
# Load job from database
job = Job.query.get(job_id)
if job is None:
return {
'code': 404,
'body': 'Job not found'
}
# Check if the job is already processed
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return {
'code': 409,
'body': 'Job is not done processing'
}
# Read the log file
with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
job_log = log_file.read()
return {
'code': 200,
'body': job_log
}
@login_required
def on_restart(self, job_hashid: str):
# Decode the job hashid
job_id = hashids.decode(job_hashid)
# Validate job_id
if not isinstance(job_id, int):
return {
'code': 400,
'body': 'job_id is invalid'
}
# Load job from database
job = Job.query.get(job_id)
if job is None:
return {
'code': 404,
'body': 'Job not found'
}
# Check if the current user is allowed to restart the job
if not (job.user == current_user or current_user.is_administrator):
return {
'code': 403,
'body': 'Forbidden'
}
# TODO: This should be a method in the Job model
def _restart_job(app, job_id):
with app.app_context():
job = Job.query.get(job_id)
job.restart()
db.session.commit()
# Restart the job in a background task
socketio.start_background_task(
target=_restart_job,
app=current_app._get_current_object(),
job_id=job_id
)
return {
'code': 202,
'body': f'Job "{job.title}" restarted'
}

View File

@ -1,12 +0,0 @@
from app.models import User
from app import db
from . import bp
@bp.cli.command('reset')
def reset():
''' Reset terms of use accept '''
for user in [x for x in User.query.all() if x.terms_of_use_accepted]:
print(f'Resetting user {user.username}')
user.terms_of_use_accepted = False
db.session.commit()

View File

@ -1,7 +1,9 @@
from flask import redirect, url_for from flask import redirect, url_for
from flask_breadcrumbs import register_breadcrumb
from . import bp from . import bp
@bp.route('') @bp.route('')
@register_breadcrumb(bp, '.', '<i class="material-icons left">new_label</i>My Contributions')
def contributions(): def contributions():
return redirect(url_for('main.dashboard', _anchor='contributions')) return redirect(url_for('main.dashboard', _anchor='contributions'))

View File

@ -1,7 +1,7 @@
from flask_wtf.file import FileField, FileRequired from flask_wtf.file import FileField, FileRequired
from wtforms import StringField, ValidationError from wtforms import StringField, ValidationError
from wtforms.validators import InputRequired, Length from wtforms.validators import InputRequired, Length
from app.blueprints.services import SERVICES from app.services import SERVICES
from ..forms import ContributionBaseForm, UpdateContributionBaseForm from ..forms import ContributionBaseForm, UpdateContributionBaseForm
@ -16,8 +16,8 @@ class CreateSpaCyNLPPipelineModelForm(ContributionBaseForm):
) )
def validate_spacy_model_file(self, field): def validate_spacy_model_file(self, field):
if not field.data.filename.lower().endswith(('.tar.gz', ('.whl'))): if not field.data.filename.lower().endswith('.tar.gz'):
raise ValidationError('.tar.gz or .whl files only!') raise ValidationError('.tar.gz files only!')
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs: if 'prefix' not in kwargs:

View File

@ -4,7 +4,7 @@ from threading import Thread
from app import db from app import db
from app.decorators import content_negotiation, permission_required from app.decorators import content_negotiation, permission_required
from app.models import SpaCyNLPPipelineModel from app.models import SpaCyNLPPipelineModel
from . import bp from .. import bp
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE']) @bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
@ -17,7 +17,7 @@ def delete_spacy_model(spacy_nlp_pipeline_model_id):
db.session.commit() db.session.commit()
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id) snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator): if not (snpm.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
thread = Thread( thread = Thread(
target=_delete_spacy_model, target=_delete_spacy_model,
@ -39,7 +39,7 @@ def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
if not isinstance(is_public, bool): if not isinstance(is_public, bool):
abort(400) abort(400)
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id) snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator): if not (snpm.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
snpm.is_public = is_public snpm.is_public = is_public
db.session.commit() db.session.commit()

View File

@ -1,4 +1,5 @@
from flask import abort, flash, redirect, render_template, url_for from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user from flask_login import current_user
from app import db from app import db
from app.models import SpaCyNLPPipelineModel from app.models import SpaCyNLPPipelineModel
@ -7,9 +8,13 @@ from .forms import (
CreateSpaCyNLPPipelineModelForm, CreateSpaCyNLPPipelineModelForm,
UpdateSpaCyNLPPipelineModelForm UpdateSpaCyNLPPipelineModelForm
) )
from .utils import (
spacy_nlp_pipeline_model_dlc as spacy_nlp_pipeline_model_dlc
)
@bp.route('/spacy-nlp-pipeline-models') @bp.route('/spacy-nlp-pipeline-models')
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models', 'SpaCy NLP Pipeline Models')
def spacy_nlp_pipeline_models(): def spacy_nlp_pipeline_models():
return render_template( return render_template(
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_models.html.j2', 'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_models.html.j2',
@ -18,6 +23,7 @@ def spacy_nlp_pipeline_models():
@bp.route('/spacy-nlp-pipeline-models/create', methods=['GET', 'POST']) @bp.route('/spacy-nlp-pipeline-models/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.create', 'Create')
def create_spacy_nlp_pipeline_model(): def create_spacy_nlp_pipeline_model():
form = CreateSpaCyNLPPipelineModelForm() form = CreateSpaCyNLPPipelineModelForm()
if form.is_submitted(): if form.is_submitted():
@ -51,9 +57,10 @@ def create_spacy_nlp_pipeline_model():
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST']) @bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.entity', '', dynamic_list_constructor=spacy_nlp_pipeline_model_dlc)
def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id): def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id):
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id) snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator): if not (snpm.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
form = UpdateSpaCyNLPPipelineModelForm(data=snpm.to_json_serializeable()) form = UpdateSpaCyNLPPipelineModelForm(data=snpm.to_json_serializeable())
if form.validate_on_submit(): if form.validate_on_submit():

View File

@ -0,0 +1,13 @@
from flask import request, url_for
from app.models import SpaCyNLPPipelineModel
def spacy_nlp_pipeline_model_dlc():
snpm_id = request.view_args['spacy_nlp_pipeline_model_id']
snpm = SpaCyNLPPipelineModel.query.get_or_404(snpm_id)
return [
{
'text': f'{snpm.title} {snpm.version}',
'url': url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=snpm_id)
}
]

View File

@ -1,6 +1,6 @@
from flask_wtf.file import FileField, FileRequired from flask_wtf.file import FileField, FileRequired
from wtforms import ValidationError from wtforms import ValidationError
from app.blueprints.services import SERVICES from app.services import SERVICES
from ..forms import ContributionBaseForm, UpdateContributionBaseForm from ..forms import ContributionBaseForm, UpdateContributionBaseForm

View File

@ -17,7 +17,7 @@ def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
db.session.commit() db.session.commit()
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id) topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (topm.user == current_user or current_user.is_administrator): if not (topm.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
thread = Thread( thread = Thread(
target=_delete_tesseract_ocr_pipeline_model, target=_delete_tesseract_ocr_pipeline_model,
@ -39,7 +39,7 @@ def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_i
if not isinstance(is_public, bool): if not isinstance(is_public, bool):
abort(400) abort(400)
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id) topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (topm.user == current_user or current_user.is_administrator): if not (topm.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
topm.is_public = is_public topm.is_public = is_public
db.session.commit() db.session.commit()

View File

@ -1,4 +1,5 @@
from flask import abort, flash, redirect, render_template, url_for from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user from flask_login import current_user
from app import db from app import db
from app.models import TesseractOCRPipelineModel from app.models import TesseractOCRPipelineModel
@ -7,9 +8,13 @@ from .forms import (
CreateTesseractOCRPipelineModelForm, CreateTesseractOCRPipelineModelForm,
UpdateTesseractOCRPipelineModelForm UpdateTesseractOCRPipelineModelForm
) )
from .utils import (
tesseract_ocr_pipeline_model_dlc as tesseract_ocr_pipeline_model_dlc
)
@bp.route('/tesseract-ocr-pipeline-models') @bp.route('/tesseract-ocr-pipeline-models')
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models', 'Tesseract OCR Pipeline Models')
def tesseract_ocr_pipeline_models(): def tesseract_ocr_pipeline_models():
return render_template( return render_template(
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_models.html.j2', 'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_models.html.j2',
@ -18,6 +23,7 @@ def tesseract_ocr_pipeline_models():
@bp.route('/tesseract-ocr-pipeline-models/create', methods=['GET', 'POST']) @bp.route('/tesseract-ocr-pipeline-models/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.create', 'Create')
def create_tesseract_ocr_pipeline_model(): def create_tesseract_ocr_pipeline_model():
form = CreateTesseractOCRPipelineModelForm() form = CreateTesseractOCRPipelineModelForm()
if form.is_submitted(): if form.is_submitted():
@ -50,9 +56,10 @@ def create_tesseract_ocr_pipeline_model():
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST']) @bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.entity', '', dynamic_list_constructor=tesseract_ocr_pipeline_model_dlc)
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id): def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id) topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (topm.user == current_user or current_user.is_administrator): if not (topm.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
form = UpdateTesseractOCRPipelineModelForm(data=topm.to_json_serializeable()) form = UpdateTesseractOCRPipelineModelForm(data=topm.to_json_serializeable())
if form.validate_on_submit(): if form.validate_on_submit():

View File

@ -0,0 +1,13 @@
from flask import request, url_for
from app.models import TesseractOCRPipelineModel
def tesseract_ocr_pipeline_model_dlc():
topm_id = request.view_args['tesseract_ocr_pipeline_model_id']
topm = TesseractOCRPipelineModel.query.get_or_404(topm_id)
return [
{
'text': f'{topm.title} {topm.version}',
'url': url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=topm_id)
}
]

View File

@ -1,34 +1,33 @@
from datetime import datetime
from flask import current_app from flask import current_app
from pathlib import Path
import json
import shutil
from app import db from app import db
from app.models import User, Corpus, CorpusFile from app.models import User, Corpus, CorpusFile
from datetime import datetime
import json
import os
import shutil
class SandpaperConverter: class SandpaperConverter:
def __init__(self, json_db_file: Path, data_dir: Path): def __init__(self, json_db_file, data_dir):
self.json_db_file = json_db_file self.json_db_file = json_db_file
self.data_dir = data_dir self.data_dir = data_dir
def run(self): def run(self):
with self.json_db_file.open('r') as f: with open(self.json_db_file, 'r') as f:
json_db: list[dict] = json.load(f) json_db = json.loads(f.read())
for json_user in json_db: for json_user in json_db:
if not json_user['confirmed']: if not json_user['confirmed']:
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}') current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
continue continue
user_dir = self.data_dir / f'{json_user["id"]}' user_dir = os.path.join(self.data_dir, str(json_user['id']))
self.convert_user(json_user, user_dir) self.convert_user(json_user, user_dir)
db.session.commit() db.session.commit()
def convert_user(self, json_user: dict, user_dir: Path): def convert_user(self, json_user, user_dir):
current_app.logger.info(f'Create User {json_user["username"]}...') current_app.logger.info(f'Create User {json_user["username"]}...')
try: user = User(
user = User.create(
confirmed=json_user['confirmed'], confirmed=json_user['confirmed'],
email=json_user['email'], email=json_user['email'],
last_seen=datetime.fromtimestamp(json_user['last_seen']), last_seen=datetime.fromtimestamp(json_user['last_seen']),
@ -36,34 +35,47 @@ class SandpaperConverter:
password_hash=json_user['password_hash'], # TODO: Needs to be added manually password_hash=json_user['password_hash'], # TODO: Needs to be added manually
username=json_user['username'] username=json_user['username']
) )
except OSError: db.session.add(user)
db.session.flush(objects=[user])
db.session.refresh(user)
try:
user.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise Exception('Internal Server Error') raise Exception('Internal Server Error')
for json_corpus in json_user['corpora'].values(): for json_corpus in json_user['corpora'].values():
if not json_corpus['files'].values(): if not json_corpus['files'].values():
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}') current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
continue continue
corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}' corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
self.convert_corpus(json_corpus, user, corpus_dir) self.convert_corpus(json_corpus, user, corpus_dir)
current_app.logger.info('Done') current_app.logger.info('Done')
def convert_corpus(self, json_corpus: dict, user: User, corpus_dir: Path): def convert_corpus(self, json_corpus, user, corpus_dir):
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...') current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
try: corpus = Corpus(
corpus = Corpus.create(
user=user, user=user,
creation_date=datetime.fromtimestamp(json_corpus['creation_date']), creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
description=json_corpus['description'], description=json_corpus['description'],
title=json_corpus['title'] title=json_corpus['title']
) )
except OSError: db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
try:
corpus.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise Exception('Internal Server Error') raise Exception('Internal Server Error')
for json_corpus_file in json_corpus['files'].values(): for json_corpus_file in json_corpus['files'].values():
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir) self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
current_app.logger.info('Done') current_app.logger.info('Done')
def convert_corpus_file(self, json_corpus_file: dict, corpus: Corpus, corpus_dir: Path): def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...') current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
corpus_file = CorpusFile( corpus_file = CorpusFile(
corpus=corpus, corpus=corpus,
@ -87,13 +99,13 @@ class SandpaperConverter:
db.session.refresh(corpus_file) db.session.refresh(corpus_file)
try: try:
shutil.copy2( shutil.copy2(
corpus_dir / json_corpus_file['filename'], os.path.join(corpus_dir, json_corpus_file['filename']),
corpus_file.path corpus_file.path
) )
except: except:
current_app.logger.warning( current_app.logger.warning(
'Can not convert corpus file: ' 'Can not convert corpus file: '
f'{corpus_dir / json_corpus_file["filename"]}' f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
' -> ' ' -> '
f'{corpus_file.path}' f'{corpus_file.path}'
) )

View File

@ -1,7 +1,7 @@
from flask import current_app from app.models import Corpus, CorpusStatus
import os
import shutil import shutil
from app import db from app import db
from app.models import Corpus, CorpusStatus
from . import bp from . import bp
@ -18,17 +18,10 @@ def reset():
] ]
for corpus in [x for x in Corpus.query.all() if x.status in status]: for corpus in [x for x in Corpus.query.all() if x.status in status]:
print(f'Resetting corpus {corpus}') print(f'Resetting corpus {corpus}')
corpus_cwb_dir = corpus.path / 'cwb' shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
corpus_cwb_data_dir = corpus_cwb_dir / 'data' os.mkdir(os.path.join(corpus.path, 'cwb'))
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry' os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
try: os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
shutil.rmtree(corpus.path / 'cwb', ignore_errors=True)
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
current_app.logger.error(e)
raise
corpus.status = CorpusStatus.UNPREPARED corpus.status = CorpusStatus.UNPREPARED
corpus.num_analysis_sessions = 0 corpus.num_analysis_sessions = 0
db.session.commit() db.session.commit()

View File

@ -7,7 +7,7 @@ from flask_login import current_user
from flask_socketio import Namespace from flask_socketio import Namespace
from inspect import signature from inspect import signature
from threading import Lock from threading import Lock
from typing import Callable from typing import Callable, Dict, List, Optional
from app import db, docker_client, hashids, socketio from app import db, docker_client, hashids, socketio
from app.decorators import socketio_login_required from app.decorators import socketio_login_required
from app.models import Corpus, CorpusStatus from app.models import Corpus, CorpusStatus
@ -19,7 +19,7 @@ This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec". Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
Basic concept: Basic concept:
1. A client connects to the namespace. 1. A client connects to the "/cqi_over_sio" namespace.
2. The client emits the "init" event and provides a corpus id for the corpus 2. The client emits the "init" event and provides a corpus id for the corpus
that should be analysed in this session. that should be analysed in this session.
1.1 The analysis session counter of the corpus is incremented. 1.1 The analysis session counter of the corpus is incremented.
@ -28,17 +28,17 @@ Basic concept:
1.4 Connect the CQiClient to the server. 1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient, the Lock and the corpus id in the session for 1.5 Save the CQiClient, the Lock and the corpus id in the session for
subsequential use. subsequential use.
3. The client emits "exec" events, within which it provides the name of a CQi 2. The client emits the "exec" event provides the name of a CQi API function
API function and the corresponding arguments. arguments (optional).
3.1 The "exec" event handler will execute the function, make sure that - The event "exec" handler will execute the function, make sure that the
the result is serializable and returns the result back to the client. result is serializable and returns the result back to the client.
4. The client disconnects from the namespace 4. Wait for more events
4.1 The analysis session counter of the corpus is decremented. 5. The client disconnects from the "/cqi_over_sio" namespace
4.2 The CQiClient and (Mutex) Lock belonging to it are teared down. 1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
''' '''
CQI_API_FUNCTION_NAMES: List[str] = [
CQI_API_FUNCTION_NAMES: list[str] = [
'ask_feature_cl_2_3', 'ask_feature_cl_2_3',
'ask_feature_cqi_1_0', 'ask_feature_cqi_1_0',
'ask_feature_cqp_2_3', 'ask_feature_cqp_2_3',
@ -86,7 +86,7 @@ CQI_API_FUNCTION_NAMES: list[str] = [
] ]
class CQiOverSocketIO(Namespace): class CQiNamespace(Namespace):
@socketio_login_required @socketio_login_required
def on_connect(self): def on_connect(self):
pass pass
@ -94,12 +94,12 @@ class CQiOverSocketIO(Namespace):
@socketio_login_required @socketio_login_required
def on_init(self, db_corpus_hashid: str): def on_init(self, db_corpus_hashid: str):
db_corpus_id: int = hashids.decode(db_corpus_hashid) db_corpus_id: int = hashids.decode(db_corpus_hashid)
db_corpus: Corpus | None = Corpus.query.get(db_corpus_id) db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
if db_corpus is None: if db_corpus is None:
return {'code': 404, 'msg': 'Not Found'} return {'code': 404, 'msg': 'Not Found'}
if not (db_corpus.user == current_user if not (db_corpus.user == current_user
or current_user.is_following_corpus(db_corpus) or current_user.is_following_corpus(db_corpus)
or current_user.is_administrator): or current_user.is_administrator()):
return {'code': 403, 'msg': 'Forbidden'} return {'code': 403, 'msg': 'Forbidden'}
if db_corpus.status not in [ if db_corpus.status not in [
CorpusStatus.BUILT, CorpusStatus.BUILT,
@ -135,7 +135,7 @@ class CQiOverSocketIO(Namespace):
return {'code': 200, 'msg': 'OK'} return {'code': 200, 'msg': 'OK'}
@socketio_login_required @socketio_login_required
def on_exec(self, fn_name: str, fn_args: dict = {}): def on_exec(self, fn_name: str, fn_args: Dict = {}):
try: try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock'] cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
@ -199,7 +199,7 @@ class CQiOverSocketIO(Namespace):
except (BrokenPipeError, CQiException): except (BrokenPipeError, CQiException):
pass pass
cqi_client_lock.release() cqi_client_lock.release()
db_corpus: Corpus | None = Corpus.query.get(db_corpus_id) db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
if db_corpus is None: if db_corpus is None:
return return
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1

View File

@ -8,15 +8,17 @@ from cqi.models.attributes import (
) )
from cqi.status import StatusOk as CQiStatusOk from cqi.status import StatusOk as CQiStatusOk
from flask import session from flask import session
from typing import Dict, List
import gzip import gzip
import json import json
import math import math
import os
from app import db from app import db
from app.models import Corpus from app.models import Corpus
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
CQI_EXTENSION_FUNCTION_NAMES: list[str] = [ CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
'ext_corpus_update_db', 'ext_corpus_update_db',
'ext_corpus_static_data', 'ext_corpus_static_data',
'ext_corpus_paginate_corpus', 'ext_corpus_paginate_corpus',
@ -36,19 +38,19 @@ def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
return CQiStatusOk() return CQiStatusOk()
def ext_corpus_static_data(corpus: str) -> dict: def ext_corpus_static_data(corpus: str) -> Dict:
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id'] db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id) db_corpus: Corpus = Corpus.query.get(db_corpus_id)
static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz' static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
if static_data_file_path.exists(): if os.path.exists(static_data_file_path):
with static_data_file_path.open('rb') as f: with open(static_data_file_path, 'rb') as f:
return f.read() return f.read()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus) cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
cqi_p_attrs: list[CQiPositionalAttribute] = cqi_corpus.positional_attributes.list() cqi_p_attrs: List[CQiPositionalAttribute] = cqi_corpus.positional_attributes.list()
cqi_s_attrs: list[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list() cqi_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list()
static_data = { static_data = {
'corpus': { 'corpus': {
@ -63,19 +65,19 @@ def ext_corpus_static_data(corpus: str) -> dict:
for p_attr in cqi_p_attrs: for p_attr in cqi_p_attrs:
print(f'corpus.freqs.{p_attr.name}') print(f'corpus.freqs.{p_attr.name}')
static_data['corpus']['freqs'][p_attr.name] = [] static_data['corpus']['freqs'][p_attr.name] = []
p_attr_id_list: list[int] = list(range(p_attr.lexicon_size)) p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list)) static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list))
del p_attr_id_list del p_attr_id_list
print(f'p_attrs.{p_attr.name}') print(f'p_attrs.{p_attr.name}')
static_data['p_attrs'][p_attr.name] = [] static_data['p_attrs'][p_attr.name] = []
cpos_list: list[int] = list(range(cqi_corpus.size)) cpos_list: List[int] = list(range(cqi_corpus.size))
static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list)) static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list))
del cpos_list del cpos_list
print(f'values.p_attrs.{p_attr.name}') print(f'values.p_attrs.{p_attr.name}')
static_data['values']['p_attrs'][p_attr.name] = [] static_data['values']['p_attrs'][p_attr.name] = []
p_attr_id_list: list[int] = list(range(p_attr.lexicon_size)) p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list)) static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list))
del p_attr_id_list del p_attr_id_list
@ -126,23 +128,23 @@ def ext_corpus_static_data(corpus: str) -> dict:
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds') print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound] static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {} static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
cpos_list: list[int] = list(range(lbound, rbound + 1)) cpos_list: List[int] = list(range(lbound, rbound + 1))
for p_attr in cqi_p_attrs: for p_attr in cqi_p_attrs:
p_attr_ids: list[int] = [] p_attr_ids: List[int] = []
p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list)) p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list))
print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}') print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids)) static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
del p_attr_ids del p_attr_ids
del cpos_list del cpos_list
sub_s_attrs: list[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr}) sub_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
print(f's_attrs.{s_attr.name}.values') print(f's_attrs.{s_attr.name}.values')
static_data['s_attrs'][s_attr.name]['values'] = [ static_data['s_attrs'][s_attr.name]['values'] = [
sub_s_attr.name[(len(s_attr.name) + 1):] sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs for sub_s_attr in sub_s_attrs
] ]
s_attr_id_list: list[int] = list(range(s_attr.size)) s_attr_id_list: List[int] = list(range(s_attr.size))
sub_s_attr_values: list[str] = [] sub_s_attr_values: List[str] = []
for sub_s_attr in sub_s_attrs: for sub_s_attr in sub_s_attrs:
tmp = [] tmp = []
tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list)) tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list))
@ -172,7 +174,7 @@ def ext_corpus_paginate_corpus(
corpus: str, corpus: str,
page: int = 1, page: int = 1,
per_page: int = 20 per_page: int = 20
) -> dict: ) -> Dict:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus) cqi_corpus = cqi_client.corpora.get(corpus)
# Sanity checks # Sanity checks
@ -218,7 +220,7 @@ def ext_cqp_paginate_subcorpus(
context: int = 50, context: int = 50,
page: int = 1, page: int = 1,
per_page: int = 20 per_page: int = 20
) -> dict: ) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1) corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_corpus = cqi_client.corpora.get(corpus_name)
@ -265,7 +267,7 @@ def ext_cqp_partial_export_subcorpus(
subcorpus: str, subcorpus: str,
match_id_list: list, match_id_list: list,
context: int = 50 context: int = 50
) -> dict: ) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1) corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_corpus = cqi_client.corpora.get(corpus_name)
@ -277,7 +279,7 @@ def ext_cqp_partial_export_subcorpus(
def ext_cqp_export_subcorpus( def ext_cqp_export_subcorpus(
subcorpus: str, subcorpus: str,
context: int = 50 context: int = 50
) -> dict: ) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1) corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_corpus = cqi_client.corpora.get(corpus_name)

View File

@ -1,12 +1,13 @@
from cqi.models.corpora import Corpus as CQiCorpus from cqi.models.corpora import Corpus as CQiCorpus
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
from typing import Dict, List
def lookups_by_cpos(corpus: CQiCorpus, cpos_list: list[int]) -> dict: def lookups_by_cpos(corpus: CQiCorpus, cpos_list: List[int]) -> Dict:
lookups = {} lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list} lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list(): for attr in corpus.positional_attributes.list():
cpos_attr_values: list[str] = attr.values_by_cpos(cpos_list) cpos_attr_values: List[str] = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list): for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i] lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
for attr in corpus.structural_attributes.list(): for attr in corpus.structural_attributes.list():
@ -14,7 +15,7 @@ def lookups_by_cpos(corpus: CQiCorpus, cpos_list: list[int]) -> dict:
# attr.has_values == False # attr.has_values == False
if attr.has_values: if attr.has_values:
continue continue
cpos_attr_ids: list[int] = attr.ids_by_cpos(cpos_list) cpos_attr_ids: List[int] = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list): for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1: if cpos_attr_ids[i] == -1:
continue continue
@ -38,9 +39,9 @@ def lookups_by_cpos(corpus: CQiCorpus, cpos_list: list[int]) -> dict:
def partial_export_subcorpus( def partial_export_subcorpus(
subcorpus: CQiSubcorpus, subcorpus: CQiSubcorpus,
match_id_list: list[int], match_id_list: List[int],
context: int = 25 context: int = 25
) -> dict: ) -> Dict:
if subcorpus.size == 0: if subcorpus.size == 0:
return {"matches": []} return {"matches": []}
match_boundaries = [] match_boundaries = []
@ -90,7 +91,7 @@ def export_subcorpus(
context: int = 25, context: int = 25,
cutoff: float = float('inf'), cutoff: float = float('inf'),
offset: int = 0 offset: int = 0
) -> dict: ) -> Dict:
if subcorpus.size == 0: if subcorpus.size == 0:
return {"matches": []} return {"matches": []}
first_match = max(0, offset) first_match = max(0, offset)

View File

@ -10,7 +10,7 @@ def corpus_follower_permission_required(*permissions):
def decorated_function(*args, **kwargs): def decorated_function(*args, **kwargs):
corpus_id = kwargs.get('corpus_id') corpus_id = kwargs.get('corpus_id')
corpus = Corpus.query.get_or_404(corpus_id) corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator): if not (corpus.user == current_user or current_user.is_administrator()):
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first() cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if cfa is None: if cfa is None:
abort(403) abort(403)
@ -26,7 +26,7 @@ def corpus_owner_or_admin_required(f):
def decorated_function(*args, **kwargs): def decorated_function(*args, **kwargs):
corpus_id = kwargs.get('corpus_id') corpus_id = kwargs.get('corpus_id')
corpus = Corpus.query.get_or_404(corpus_id) corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator): if not (corpus.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
return f(*args, **kwargs) return f(*args, **kwargs)
return decorated_function return decorated_function

View File

@ -15,7 +15,7 @@ def get_corpus(corpus_hashid):
if not ( if not (
corpus.is_public corpus.is_public
or corpus.user == current_user or corpus.user == current_user
or current_user.is_administrator or current_user.is_administrator()
): ):
return {'options': {'status': 403, 'statusText': 'Forbidden'}} return {'options': {'status': 403, 'statusText': 'Forbidden'}}
return { return {
@ -38,7 +38,7 @@ def subscribe_corpus(corpus_hashid):
if not ( if not (
corpus.is_public corpus.is_public
or corpus.user == current_user or corpus.user == current_user
or current_user.is_administrator or current_user.is_administrator()
): ):
return {'options': {'status': 403, 'statusText': 'Forbidden'}} return {'options': {'status': 403, 'statusText': 'Forbidden'}}
join_room(f'/corpora/{corpus.hashid}') join_room(f'/corpora/{corpus.hashid}')

View File

@ -1,7 +1,7 @@
from flask import current_app from flask import abort, current_app
from threading import Thread from threading import Thread
from app.decorators import content_negotiation
from app import db from app import db
from app.decorators import content_negotiation
from app.models import CorpusFile from app.models import CorpusFile
from ..decorators import corpus_follower_permission_required from ..decorators import corpus_follower_permission_required
from . import bp from . import bp

View File

@ -6,19 +6,25 @@ from flask import (
send_from_directory, send_from_directory,
url_for url_for
) )
from flask_breadcrumbs import register_breadcrumb
import os
from app import db from app import db
from app.models import Corpus, CorpusFile, CorpusStatus from app.models import Corpus, CorpusFile, CorpusStatus
from ..decorators import corpus_follower_permission_required from ..decorators import corpus_follower_permission_required
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
from . import bp from . import bp
from .forms import CreateCorpusFileForm, UpdateCorpusFileForm from .forms import CreateCorpusFileForm, UpdateCorpusFileForm
from .utils import corpus_file_dynamic_list_constructor as corpus_file_dlc
@bp.route('/<hashid:corpus_id>/files') @bp.route('/<hashid:corpus_id>/files')
@register_breadcrumb(bp, '.entity.files', 'Files', endpoint_arguments_constructor=corpus_eac)
def corpus_files(corpus_id): def corpus_files(corpus_id):
return redirect(url_for('.corpus', _anchor='files', corpus_id=corpus_id)) return redirect(url_for('.corpus', _anchor='files', corpus_id=corpus_id))
@bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST']) @bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.entity.files.create', 'Create', endpoint_arguments_constructor=corpus_eac)
@corpus_follower_permission_required('MANAGE_FILES') @corpus_follower_permission_required('MANAGE_FILES')
def create_corpus_file(corpus_id): def create_corpus_file(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id) corpus = Corpus.query.get_or_404(corpus_id)
@ -60,6 +66,7 @@ def create_corpus_file(corpus_id):
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST']) @bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.entity.files.entity', '', dynamic_list_constructor=corpus_file_dlc)
@corpus_follower_permission_required('MANAGE_FILES') @corpus_follower_permission_required('MANAGE_FILES')
def corpus_file(corpus_id, corpus_file_id): def corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404() corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
@ -85,9 +92,9 @@ def corpus_file(corpus_id, corpus_file_id):
def download_corpus_file(corpus_id, corpus_file_id): def download_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404() corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
return send_from_directory( return send_from_directory(
corpus_file.path.parent, os.path.dirname(corpus_file.path),
corpus_file.path.name, os.path.basename(corpus_file.path),
as_attachment=True, as_attachment=True,
download_name=corpus_file.filename, attachment_filename=corpus_file.filename,
mimetype=corpus_file.mimetype mimetype=corpus_file.mimetype
) )

View File

@ -0,0 +1,15 @@
from flask import request, url_for
from app.models import CorpusFile
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
def corpus_file_dynamic_list_constructor():
corpus_id = request.view_args['corpus_id']
corpus_file_id = request.view_args['corpus_file_id']
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
return [
{
'text': f'{corpus_file.author}: {corpus_file.title} ({corpus_file.publishing_year})',
'url': url_for('.corpus_file', corpus_id=corpus_id, corpus_file_id=corpus_file_id)
}
]

View File

@ -58,7 +58,7 @@ def delete_corpus_follower(corpus_id, follower_id):
current_user.id == follower_id current_user.id == follower_id
or current_user == cfa.corpus.user or current_user == cfa.corpus.user
or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS') or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
or current_user.is_administrator): or current_user.is_administrator()):
abort(403) abort(403)
if current_user.id == follower_id: if current_user.id == follower_id:
flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus') flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')

View File

@ -1,4 +1,5 @@
from flask import abort, flash, redirect, render_template, url_for from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user from flask_login import current_user
from app import db from app import db
from app.models import ( from app.models import (
@ -10,14 +11,20 @@ from app.models import (
from . import bp from . import bp
from .decorators import corpus_follower_permission_required from .decorators import corpus_follower_permission_required
from .forms import CreateCorpusForm from .forms import CreateCorpusForm
from .utils import (
corpus_endpoint_arguments_constructor as corpus_eac,
corpus_dynamic_list_constructor as corpus_dlc
)
@bp.route('') @bp.route('')
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">I</i>My Corpora')
def corpora(): def corpora():
return redirect(url_for('main.dashboard', _anchor='corpora')) return redirect(url_for('main.dashboard', _anchor='corpora'))
@bp.route('/create', methods=['GET', 'POST']) @bp.route('/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.create', 'Create')
def create_corpus(): def create_corpus():
form = CreateCorpusForm() form = CreateCorpusForm()
if form.validate_on_submit(): if form.validate_on_submit():
@ -40,6 +47,7 @@ def create_corpus():
@bp.route('/<hashid:corpus_id>') @bp.route('/<hashid:corpus_id>')
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=corpus_dlc)
def corpus(corpus_id): def corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id) corpus = Corpus.query.get_or_404(corpus_id)
cfrs = CorpusFollowerRole.query.all() cfrs = CorpusFollowerRole.query.all()
@ -47,13 +55,13 @@ def corpus(corpus_id):
users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all() users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all()
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first() cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if cfa is None: if cfa is None:
if corpus.user == current_user or current_user.is_administrator: if corpus.user == current_user or current_user.is_administrator():
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first() cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
else: else:
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first() cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
else: else:
cfr = cfa.role cfr = cfa.role
if corpus.user == current_user or current_user.is_administrator: if corpus.user == current_user or current_user.is_administrator():
return render_template( return render_template(
'corpora/corpus.html.j2', 'corpora/corpus.html.j2',
title=corpus.title, title=corpus.title,
@ -79,6 +87,7 @@ def corpus(corpus_id):
@bp.route('/<hashid:corpus_id>/analysis') @bp.route('/<hashid:corpus_id>/analysis')
@corpus_follower_permission_required('VIEW') @corpus_follower_permission_required('VIEW')
@register_breadcrumb(bp, '.entity.analysis', 'Analysis', endpoint_arguments_constructor=corpus_eac)
def analysis(corpus_id): def analysis(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id) corpus = Corpus.query.get_or_404(corpus_id)
return render_template( return render_template(
@ -88,22 +97,24 @@ def analysis(corpus_id):
) )
@bp.route('/<hashid:corpus_id>/follow/<token>') # @bp.route('/<hashid:corpus_id>/follow/<token>')
def follow_corpus(corpus_id, token): # def follow_corpus(corpus_id, token):
corpus = Corpus.query.get_or_404(corpus_id) # corpus = Corpus.query.get_or_404(corpus_id)
if current_user.follow_corpus_by_token(token): # if current_user.follow_corpus_by_token(token):
db.session.commit() # db.session.commit()
flash(f'You are following "{corpus.title}" now', category='corpus') # flash(f'You are following "{corpus.title}" now', category='corpus')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id)) # return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
abort(403) # abort(403)
@bp.route('/import', methods=['GET', 'POST']) @bp.route('/import', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.import', 'Import')
def import_corpus(): def import_corpus():
abort(503) abort(503)
@bp.route('/<hashid:corpus_id>/export') @bp.route('/<hashid:corpus_id>/export')
@corpus_follower_permission_required('VIEW') @corpus_follower_permission_required('VIEW')
@register_breadcrumb(bp, '.entity.export', 'Export', endpoint_arguments_constructor=corpus_eac)
def export_corpus(corpus_id): def export_corpus(corpus_id):
abort(503) abort(503)

17
app/corpora/utils.py Normal file
View File

@ -0,0 +1,17 @@
from flask import request, url_for
from app.models import Corpus
def corpus_endpoint_arguments_constructor():
return {'corpus_id': request.view_args['corpus_id']}
def corpus_dynamic_list_constructor():
corpus_id = request.view_args['corpus_id']
corpus = Corpus.query.get_or_404(corpus_id)
return [
{
'text': f'<i class="material-icons left">book</i>{corpus.title}',
'url': url_for('.corpus', corpus_id=corpus_id)
}
]

11
app/daemon/__init__.py Normal file
View File

@ -0,0 +1,11 @@
from app import db
from flask import Flask
from .corpus_utils import check_corpora
from .job_utils import check_jobs
def daemon(app: Flask):
with app.app_context():
check_corpora()
check_jobs()
db.session.commit()

View File

@ -1,4 +1,4 @@
from app import db, docker_client, scheduler from app import docker_client
from app.models import Corpus, CorpusStatus from app.models import Corpus, CorpusStatus
from flask import current_app from flask import current_app
import docker import docker
@ -6,11 +6,7 @@ import os
import shutil import shutil
def job(): def check_corpora():
with scheduler.app.app_context():
_handle_corpora()
def _handle_corpora():
corpora = Corpus.query.all() corpora = Corpus.query.all()
for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]: for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
_create_build_corpus_service(corpus) _create_build_corpus_service(corpus)
@ -26,7 +22,6 @@ def _handle_corpora():
_create_cqpserver_container(corpus) _create_cqpserver_container(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]: for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
_remove_cqpserver_container(corpus) _remove_cqpserver_container(corpus)
db.session.commit()
def _create_build_corpus_service(corpus): def _create_build_corpus_service(corpus):
''' # Docker service settings # ''' ''' # Docker service settings # '''

View File

@ -1,4 +1,4 @@
from app import db, docker_client, hashids, scheduler from app import db, docker_client, hashids
from app.models import ( from app.models import (
Job, Job,
JobResult, JobResult,
@ -15,11 +15,7 @@ import os
import shutil import shutil
def job(): def check_jobs():
with scheduler.app.app_context():
_handle_jobs()
def _handle_jobs():
jobs = Job.query.all() jobs = Job.query.all()
for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]: for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
_create_job_service(job) _create_job_service(job)
@ -27,7 +23,6 @@ def _handle_jobs():
_checkout_job_service(job) _checkout_job_service(job)
for job in [x for x in jobs if x.status == JobStatus.CANCELING]: for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
_remove_job_service(job) _remove_job_service(job)
db.session.commit()
def _create_job_service(job): def _create_job_service(job):
''' # Docker service settings # ''' ''' # Docker service settings # '''

View File

@ -1,7 +1,8 @@
from flask import abort, request from flask import abort, current_app, request
from flask_login import current_user from flask_login import current_user
from functools import wraps from functools import wraps
from typing import Optional from threading import Thread
from typing import List, Union
from werkzeug.exceptions import NotAcceptable from werkzeug.exceptions import NotAcceptable
from app.models import Permission from app.models import Permission
@ -23,21 +24,22 @@ def admin_required(f):
def socketio_login_required(f): def socketio_login_required(f):
@wraps(f) @wraps(f)
def wrapper(*args, **kwargs): def decorated_function(*args, **kwargs):
if current_user.is_authenticated: if current_user.is_authenticated:
return f(*args, **kwargs) return f(*args, **kwargs)
return {'code': 401, 'body': 'Unauthorized'} else:
return wrapper return {'code': 401, 'msg': 'Unauthorized'}
return decorated_function
def socketio_permission_required(permission): def socketio_permission_required(permission):
def decorator(f): def decorator(f):
@wraps(f) @wraps(f)
def wrapper(*args, **kwargs): def decorated_function(*args, **kwargs):
if not current_user.can(permission): if not current_user.can(permission):
return {'code': 403, 'body': 'Forbidden'} return {'code': 403, 'msg': 'Forbidden'}
return f(*args, **kwargs) return f(*args, **kwargs)
return wrapper return decorated_function
return decorator return decorator
@ -45,9 +47,27 @@ def socketio_admin_required(f):
return socketio_permission_required(Permission.ADMINISTRATE)(f) return socketio_permission_required(Permission.ADMINISTRATE)(f)
def background(f):
'''
' This decorator executes a function in a Thread.
' Decorated functions need to be executed within a code block where an
' app context exists.
'
' NOTE: An app object is passed as a keyword argument to the decorated
' function.
'''
@wraps(f)
def wrapped(*args, **kwargs):
kwargs['app'] = current_app._get_current_object()
thread = Thread(target=f, args=args, kwargs=kwargs)
thread.start()
return thread
return wrapped
def content_negotiation( def content_negotiation(
produces: Optional[str | list[str]] = None, produces: Union[str, List[str], None] = None,
consumes: Optional[str | list[str]] = None consumes: Union[str, List[str], None] = None
): ):
def decorator(f): def decorator(f):
@wraps(f) @wraps(f)

View File

@ -1,32 +1,25 @@
from flask import current_app, Flask, render_template from flask import current_app, render_template
from flask_mail import Message from flask_mail import Message
from threading import Thread from threading import Thread
from app import mail from app import mail
def create_message( def create_message(recipient, subject, template, **kwargs):
recipient: str, subject_prefix: str = current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX']
subject: str, msg: Message = Message(
template: str, body=render_template(f'{template}.txt.j2', **kwargs),
**context html=render_template(f'{template}.html.j2', **kwargs),
) -> Message:
message = Message(
body=render_template(f'{template}.txt.j2', **context),
html=render_template(f'{template}.html.j2', **context),
recipients=[recipient], recipients=[recipient],
subject=f'[nopaque] {subject}' subject=f'{subject_prefix} {subject}'
) )
return message return msg
def send(message: Message) -> Thread: def send(msg, *args, **kwargs):
def _send(app: Flask, message: Message): def _send(app, msg):
with app.app_context(): with app.app_context():
mail.send(message) mail.send(msg)
thread = Thread( thread = Thread(target=_send, args=[current_app._get_current_object(), msg])
target=_send,
args=[current_app._get_current_object(), message]
)
thread.start() thread.start()
return thread return thread

View File

@ -1,2 +0,0 @@
from .types import ContainerColumn
from .types import IntEnumColumn

View File

@ -1,42 +0,0 @@
import json
from app import db
class ContainerColumn(db.TypeDecorator):
impl = db.String
def __init__(self, container_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.container_type = container_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.container_type):
return json.dumps(value)
elif isinstance(value, str) and isinstance(json.loads(value), self.container_type):
return value
else:
return TypeError()
def process_result_value(self, value, dialect):
return json.loads(value)
class IntEnumColumn(db.TypeDecorator):
impl = db.Integer
def __init__(self, enum_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.enum_type = enum_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.enum_type) and isinstance(value.value, int):
return value.value
elif isinstance(value, int):
return self.enum_type(value).value
elif isinstance(value, str):
return self.enum_type[value].value
else:
return TypeError()
def process_result_value(self, value, dialect):
return self.enum_type(value)

View File

@ -1,2 +1,18 @@
from .handle_corpora import job as handle_corpora from flask import Blueprint
from .handle_jobs import job as handle_jobs from flask_login import login_required
bp = Blueprint('jobs', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes, json_routes

View File

@ -1,6 +1,7 @@
from flask import abort, current_app from flask import abort, current_app
from flask_login import current_user from flask_login import current_user
from threading import Thread from threading import Thread
import os
from app import db from app import db
from app.decorators import admin_required, content_negotiation from app.decorators import admin_required, content_negotiation
from app.models import Job, JobStatus from app.models import Job, JobStatus
@ -17,7 +18,7 @@ def delete_job(job_id):
db.session.commit() db.session.commit()
job = Job.query.get_or_404(job_id) job = Job.query.get_or_404(job_id)
if not (job.user == current_user or current_user.is_administrator): if not (job.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
thread = Thread( thread = Thread(
target=_delete_job, target=_delete_job,
@ -38,7 +39,7 @@ def job_log(job_id):
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
response = {'errors': {'message': 'Job status is not completed or failed'}} response = {'errors': {'message': 'Job status is not completed or failed'}}
return response, 409 return response, 409
with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file: with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
log = log_file.read() log = log_file.read()
response_data = { response_data = {
'jobLog': log 'jobLog': log
@ -56,7 +57,7 @@ def restart_job(job_id):
db.session.commit() db.session.commit()
job = Job.query.get_or_404(job_id) job = Job.query.get_or_404(job_id)
if not (job.user == current_user or current_user.is_administrator): if not (job.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
if job.status == JobStatus.FAILED: if job.status == JobStatus.FAILED:
response = {'errors': {'message': 'Job status is not "failed"'}} response = {'errors': {'message': 'Job status is not "failed"'}}

View File

@ -5,20 +5,25 @@ from flask import (
send_from_directory, send_from_directory,
url_for url_for
) )
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user from flask_login import current_user
import os
from app.models import Job, JobInput, JobResult from app.models import Job, JobInput, JobResult
from . import bp from . import bp
from .utils import job_dynamic_list_constructor as job_dlc
@bp.route('') @bp.route('')
def jobs(): @register_breadcrumb(bp, '.', '<i class="nopaque-icons left">J</i>My Jobs')
def corpora():
return redirect(url_for('main.dashboard', _anchor='jobs')) return redirect(url_for('main.dashboard', _anchor='jobs'))
@bp.route('/<hashid:job_id>') @bp.route('/<hashid:job_id>')
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=job_dlc)
def job(job_id): def job(job_id):
job = Job.query.get_or_404(job_id) job = Job.query.get_or_404(job_id)
if not (job.user == current_user or current_user.is_administrator): if not (job.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
return render_template( return render_template(
'jobs/job.html.j2', 'jobs/job.html.j2',
@ -30,13 +35,13 @@ def job(job_id):
@bp.route('/<hashid:job_id>/inputs/<hashid:job_input_id>/download') @bp.route('/<hashid:job_id>/inputs/<hashid:job_input_id>/download')
def download_job_input(job_id, job_input_id): def download_job_input(job_id, job_input_id):
job_input = JobInput.query.filter_by(job_id=job_id, id=job_input_id).first_or_404() job_input = JobInput.query.filter_by(job_id=job_id, id=job_input_id).first_or_404()
if not (job_input.job.user == current_user or current_user.is_administrator): if not (job_input.job.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
return send_from_directory( return send_from_directory(
job_input.path.parent, os.path.dirname(job_input.path),
job_input.path.name, os.path.basename(job_input.path),
as_attachment=True, as_attachment=True,
download_name=job_input.filename, attachment_filename=job_input.filename,
mimetype=job_input.mimetype mimetype=job_input.mimetype
) )
@ -44,12 +49,12 @@ def download_job_input(job_id, job_input_id):
@bp.route('/<hashid:job_id>/results/<hashid:job_result_id>/download') @bp.route('/<hashid:job_id>/results/<hashid:job_result_id>/download')
def download_job_result(job_id, job_result_id): def download_job_result(job_id, job_result_id):
job_result = JobResult.query.filter_by(job_id=job_id, id=job_result_id).first_or_404() job_result = JobResult.query.filter_by(job_id=job_id, id=job_result_id).first_or_404()
if not (job_result.job.user == current_user or current_user.is_administrator): if not (job_result.job.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
return send_from_directory( return send_from_directory(
job_result.path.parent, os.path.dirname(job_result.path),
job_result.path.name, os.path.basename(job_result.path),
as_attachment=True, as_attachment=True,
download_name=job_result.filename, attachment_filename=job_result.filename,
mimetype=job_result.mimetype mimetype=job_result.mimetype
) )

13
app/jobs/utils.py Normal file
View File

@ -0,0 +1,13 @@
from flask import request, url_for
from app.models import Job
def job_dynamic_list_constructor():
job_id = request.view_args['job_id']
job = Job.query.get_or_404(job_id)
return [
{
'text': f'<i class="nopaque-icons left service-icons" data-service="{job.service}"></i>{job.title}',
'url': url_for('.job', job_id=job_id)
}
]

View File

@ -1,9 +1,7 @@
from flask import current_app from flask import current_app
from flask_migrate import upgrade from flask_migrate import upgrade
from pathlib import Path import os
from app import db
from app.models import ( from app.models import (
Corpus,
CorpusFollowerRole, CorpusFollowerRole,
Role, Role,
SpaCyNLPPipelineModel, SpaCyNLPPipelineModel,
@ -16,22 +14,25 @@ from . import bp
@bp.cli.command('deploy') @bp.cli.command('deploy')
def deploy(): def deploy():
''' Run deployment tasks. ''' ''' Run deployment tasks. '''
# Make default directories
print('Make default directories') print('Make default directories')
base_dir = current_app.config['NOPAQUE_DATA_DIR'] base_dir = current_app.config['NOPAQUE_DATA_DIR']
default_dirs: list[Path] = [ default_dirs = [
base_dir / 'tmp', os.path.join(base_dir, 'tmp'),
base_dir / 'users' os.path.join(base_dir, 'users')
] ]
for default_dir in default_dirs: for dir in default_dirs:
if not default_dir.exists(): if os.path.exists(dir):
default_dir.mkdir() if not os.path.isdir(dir):
if not default_dir.is_dir(): raise NotADirectoryError(f'{dir} is not a directory')
raise NotADirectoryError(f'{default_dir} is not a directory') else:
os.mkdir(dir)
# migrate database to latest revision
print('Migrate database to latest revision') print('Migrate database to latest revision')
upgrade() upgrade()
# Insert/Update default database values
print('Insert/Update default Roles') print('Insert/Update default Roles')
Role.insert_defaults() Role.insert_defaults()
print('Insert/Update default Users') print('Insert/Update default Users')
@ -43,9 +44,4 @@ def deploy():
print('Insert/Update default TesseractOCRPipelineModels') print('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRPipelineModel.insert_defaults() TesseractOCRPipelineModel.insert_defaults()
print('Stop running analysis sessions')
for corpus in Corpus.query.filter(Corpus.num_analysis_sessions > 0).all():
corpus.num_analysis_sessions = 0
db.session.commit()
# TODO: Implement checks for if the nopaque network exists # TODO: Implement checks for if the nopaque network exists

View File

@ -1,11 +1,14 @@
from flask import flash, redirect, render_template, url_for from flask import flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user, login_required, login_user from flask_login import current_user, login_required, login_user
from app.blueprints.auth.forms import LoginForm from app.auth.forms import LoginForm
from app.models import Corpus, User from app.models import Corpus, User
from sqlalchemy import or_
from . import bp from . import bp
@bp.route('/', methods=['GET', 'POST']) @bp.route('/', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.', '<i class="material-icons">home</i>')
def index(): def index():
form = LoginForm() form = LoginForm()
if form.validate_on_submit(): if form.validate_on_submit():
@ -24,6 +27,7 @@ def index():
@bp.route('/faq') @bp.route('/faq')
@register_breadcrumb(bp, '.faq', 'Frequently Asked Questions')
def faq(): def faq():
return render_template( return render_template(
'main/faq.html.j2', 'main/faq.html.j2',
@ -32,6 +36,7 @@ def faq():
@bp.route('/dashboard') @bp.route('/dashboard')
@register_breadcrumb(bp, '.dashboard', '<i class="material-icons left">dashboard</i>Dashboard')
@login_required @login_required
def dashboard(): def dashboard():
return render_template( return render_template(
@ -40,15 +45,8 @@ def dashboard():
) )
@bp.route('/manual')
def manual():
return render_template(
'main/manual.html.j2',
title='Manual'
)
@bp.route('/news') @bp.route('/news')
@register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News')
def news(): def news():
return render_template( return render_template(
'main/news.html.j2', 'main/news.html.j2',
@ -57,6 +55,7 @@ def news():
@bp.route('/privacy_policy') @bp.route('/privacy_policy')
@register_breadcrumb(bp, '.privacy_policy', 'Private statement (GDPR)')
def privacy_policy(): def privacy_policy():
return render_template( return render_template(
'main/privacy_policy.html.j2', 'main/privacy_policy.html.j2',
@ -65,6 +64,7 @@ def privacy_policy():
@bp.route('/terms_of_use') @bp.route('/terms_of_use')
@register_breadcrumb(bp, '.terms_of_use', 'Terms of Use')
def terms_of_use(): def terms_of_use():
return render_template( return render_template(
'main/terms_of_use.html.j2', 'main/terms_of_use.html.j2',
@ -73,6 +73,7 @@ def terms_of_use():
@bp.route('/social-area') @bp.route('/social-area')
@register_breadcrumb(bp, '.social_area', '<i class="material-icons left">group</i>Social Area')
@login_required @login_required
def social_area(): def social_area():
print('test') print('test')

1819
app/models.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +0,0 @@
from .anonymous_user import *
from .avatar import *
from .corpus_file import *
from .corpus_follower_association import *
from .corpus_follower_role import *
from .corpus import *
from .job_input import *
from .job_result import *
from .job import *
from .role import *
from .spacy_nlp_pipeline_model import *
from .tesseract_ocr_pipeline_model import *
from .token import *
from .user import *

View File

@ -1,10 +0,0 @@
from flask_login import AnonymousUserMixin
class AnonymousUser(AnonymousUserMixin):
def can(self, permissions):
return False
@property
def is_administrator(self):
return False

View File

@ -1,40 +0,0 @@
from flask import current_app
from flask_hashids import HashidMixin
from pathlib import Path
from app import db
from .file_mixin import FileMixin
class Avatar(HashidMixin, FileMixin, db.Model):
__tablename__ = 'avatars'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Relationships
user = db.relationship('User', back_populates='avatar')
@property
def path(self) -> Path:
return self.user.path / 'avatar'
# return os.path.join(self.user.path, 'avatar')
def delete(self):
try:
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
**self.file_mixin_to_json_serializeable()
}
if backrefs:
json_serializeable['user'] = \
self.user.to_json_serializeable(backrefs=True)
if relationships:
pass
return json_serializeable

View File

@ -1,199 +0,0 @@
from datetime import datetime
from enum import IntEnum
from flask import current_app, url_for
from flask_hashids import HashidMixin
from sqlalchemy.ext.associationproxy import association_proxy
from pathlib import Path
import shutil
import xml.etree.ElementTree as ET
from app import db
from app.converters.vrt import normalize_vrt_file
from app.extensions.sqlalchemy_extras import IntEnumColumn
from .corpus_follower_association import CorpusFollowerAssociation
class CorpusStatus(IntEnum):
UNPREPARED = 1
SUBMITTED = 2
QUEUED = 3
BUILDING = 4
BUILT = 5
FAILED = 6
STARTING_ANALYSIS_SESSION = 7
RUNNING_ANALYSIS_SESSION = 8
CANCELING_ANALYSIS_SESSION = 9
@staticmethod
def get(corpus_status: 'CorpusStatus | int | str') -> 'CorpusStatus':
if isinstance(corpus_status, CorpusStatus):
return corpus_status
if isinstance(corpus_status, int):
return CorpusStatus(corpus_status)
if isinstance(corpus_status, str):
return CorpusStatus[corpus_status]
raise TypeError('corpus_status must be CorpusStatus, int, or str')
class Corpus(HashidMixin, db.Model):
'''
Class to define a corpus.
'''
__tablename__ = 'corpora'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
description = db.Column(db.String(255))
status = db.Column(
IntEnumColumn(CorpusStatus),
default=CorpusStatus.UNPREPARED
)
title = db.Column(db.String(32))
num_analysis_sessions = db.Column(db.Integer, default=0)
num_tokens = db.Column(db.Integer, default=0)
is_public = db.Column(db.Boolean, default=False)
# Relationships
files = db.relationship(
'CorpusFile',
back_populates='corpus',
lazy='dynamic',
cascade='all, delete-orphan'
)
corpus_follower_associations = db.relationship(
'CorpusFollowerAssociation',
back_populates='corpus',
cascade='all, delete-orphan'
)
followers = association_proxy(
'corpus_follower_associations',
'follower',
creator=lambda u: CorpusFollowerAssociation(follower=u)
)
user = db.relationship('User', back_populates='corpora')
# "static" attributes
max_num_tokens = 2_147_483_647
def __repr__(self):
return f'<Corpus {self.title}>'
@property
def analysis_url(self):
return url_for('corpora.analysis', corpus_id=self.id)
@property
def jsonpatch_path(self):
return f'{self.user.jsonpatch_path}/corpora/{self.hashid}'
@property
def path(self) -> Path:
return self.user.path / 'corpora' / f'{self.id}'
@property
def url(self):
return url_for('corpora.corpus', corpus_id=self.id)
@property
def user_hashid(self):
return self.user.hashid
@staticmethod
def create(**kwargs):
corpus = Corpus(**kwargs)
db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
corpus_files_dir = corpus.path / 'files'
corpus_cwb_dir = corpus.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
corpus.path.mkdir()
corpus_files_dir.mkdir()
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise
return corpus
def build(self):
corpus_cwb_dir = self.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
shutil.rmtree(corpus_cwb_dir, ignore_errors=True)
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
current_app.logger.error(e)
self.status = CorpusStatus.FAILED
raise
corpus_element = ET.fromstring('<corpus>\n</corpus>')
for corpus_file in self.files:
normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt'
try:
normalize_vrt_file(corpus_file.path, normalized_vrt_path)
except:
self.status = CorpusStatus.FAILED
return
element_tree = ET.parse(normalized_vrt_path)
text_element = element_tree.getroot()
text_element.set('author', corpus_file.author)
text_element.set('title', corpus_file.title)
text_element.set(
'publishing_year',
f'{corpus_file.publishing_year}'
)
text_element.set('address', corpus_file.address or 'NULL')
text_element.set('booktitle', corpus_file.booktitle or 'NULL')
text_element.set('chapter', corpus_file.chapter or 'NULL')
text_element.set('editor', corpus_file.editor or 'NULL')
text_element.set('institution', corpus_file.institution or 'NULL')
text_element.set('journal', corpus_file.journal or 'NULL')
text_element.set('pages', f'{corpus_file.pages}' or 'NULL')
text_element.set('publisher', corpus_file.publisher or 'NULL')
text_element.set('school', corpus_file.school or 'NULL')
text_element.tail = '\n'
# corpus_element.insert(1, text_element)
corpus_element.append(text_element)
ET.ElementTree(corpus_element).write(
corpus_cwb_dir / 'corpus.vrt',
encoding='utf-8'
)
self.status = CorpusStatus.SUBMITTED
def delete(self):
shutil.rmtree(self.path, ignore_errors=True)
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'creation_date': f'{self.creation_date.isoformat()}Z',
'description': self.description,
'max_num_tokens': self.max_num_tokens,
'num_analysis_sessions': self.num_analysis_sessions,
'num_tokens': self.num_tokens,
'status': self.status.name,
'title': self.title,
'is_public': self.is_public
}
if backrefs:
json_serializeable['user'] = \
self.user.to_json_serializeable(backrefs=True)
if relationships:
json_serializeable['corpus_follower_associations'] = {
x.hashid: x.to_json_serializeable()
for x in self.corpus_follower_associations
}
json_serializeable['files'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.files
}
return json_serializeable

View File

@ -1,102 +0,0 @@
from flask import current_app, url_for
from flask_hashids import HashidMixin
from pathlib import Path
from app import db
from .corpus import CorpusStatus
from .file_mixin import FileMixin
class CorpusFile(FileMixin, HashidMixin, db.Model):
__tablename__ = 'corpus_files'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
# Fields
author = db.Column(db.String(255))
description = db.Column(db.String(255))
publishing_year = db.Column(db.Integer)
title = db.Column(db.String(255))
address = db.Column(db.String(255))
booktitle = db.Column(db.String(255))
chapter = db.Column(db.String(255))
editor = db.Column(db.String(255))
institution = db.Column(db.String(255))
journal = db.Column(db.String(255))
pages = db.Column(db.String(255))
publisher = db.Column(db.String(255))
school = db.Column(db.String(255))
# Relationships
corpus = db.relationship(
'Corpus',
back_populates='files'
)
@property
def download_url(self):
return url_for(
'corpora.download_corpus_file',
corpus_id=self.corpus_id,
corpus_file_id=self.id
)
@property
def jsonpatch_path(self):
return f'{self.corpus.jsonpatch_path}/files/{self.hashid}'
@property
def path(self) -> Path:
return self.corpus.path / 'files' / f'{self.id}'
@property
def url(self):
return url_for(
'corpora.corpus_file',
corpus_id=self.corpus_id,
corpus_file_id=self.id
)
@property
def user_hashid(self):
return self.corpus.user.hashid
@property
def user_id(self):
return self.corpus.user_id
def delete(self):
try:
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
self.corpus.status = CorpusStatus.UNPREPARED
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'address': self.address,
'author': self.author,
'description': self.description,
'booktitle': self.booktitle,
'chapter': self.chapter,
'editor': self.editor,
'institution': self.institution,
'journal': self.journal,
'pages': self.pages,
'publisher': self.publisher,
'publishing_year': self.publishing_year,
'school': self.school,
'title': self.title,
**self.file_mixin_to_json_serializeable(
backrefs=backrefs,
relationships=relationships
)
}
if backrefs:
json_serializeable['corpus'] = \
self.corpus.to_json_serializeable(backrefs=True)
if relationships:
pass
return json_serializeable

View File

@ -1,47 +0,0 @@
from flask_hashids import HashidMixin
from app import db
from .corpus_follower_role import CorpusFollowerRole
class CorpusFollowerAssociation(HashidMixin, db.Model):
__tablename__ = 'corpus_follower_associations'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
follower_id = db.Column(db.Integer, db.ForeignKey('users.id'))
role_id = db.Column(db.Integer, db.ForeignKey('corpus_follower_roles.id'))
# Relationships
corpus = db.relationship(
'Corpus',
back_populates='corpus_follower_associations'
)
follower = db.relationship(
'User',
back_populates='corpus_follower_associations'
)
role = db.relationship(
'CorpusFollowerRole',
back_populates='corpus_follower_associations'
)
def __init__(self, **kwargs):
if 'role' not in kwargs:
kwargs['role'] = CorpusFollowerRole.query.filter_by(default=True).first()
super().__init__(**kwargs)
def __repr__(self):
return f'<CorpusFollowerAssociation {self.follower.__repr__()} ~ {self.role.__repr__()} ~ {self.corpus.__repr__()}>'
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'corpus': self.corpus.to_json_serializeable(backrefs=True),
'follower': self.follower.to_json_serializeable(),
'role': self.role.to_json_serializeable()
}
if backrefs:
pass
if relationships:
pass
return json_serializeable

View File

@ -1,106 +0,0 @@
from flask_hashids import HashidMixin
from enum import IntEnum
from app import db
class CorpusFollowerPermission(IntEnum):
VIEW = 1
MANAGE_FILES = 2
MANAGE_FOLLOWERS = 4
MANAGE_CORPUS = 8
@staticmethod
def get(corpus_follower_permission: 'CorpusFollowerPermission | int | str') -> 'CorpusFollowerPermission':
if isinstance(corpus_follower_permission, CorpusFollowerPermission):
return corpus_follower_permission
if isinstance(corpus_follower_permission, int):
return CorpusFollowerPermission(corpus_follower_permission)
if isinstance(corpus_follower_permission, str):
return CorpusFollowerPermission[corpus_follower_permission]
raise TypeError('corpus_follower_permission must be CorpusFollowerPermission, int, or str')
class CorpusFollowerRole(HashidMixin, db.Model):
__tablename__ = 'corpus_follower_roles'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Fields
name = db.Column(db.String(64), unique=True)
default = db.Column(db.Boolean, default=False, index=True)
permissions = db.Column(db.Integer, default=0)
# Relationships
corpus_follower_associations = db.relationship(
'CorpusFollowerAssociation',
back_populates='role'
)
def __repr__(self):
return f'<CorpusFollowerRole {self.name}>'
def has_permission(self, permission: CorpusFollowerPermission | int | str):
perm = CorpusFollowerPermission.get(permission)
return self.permissions & perm.value == perm.value
def add_permission(self, permission: CorpusFollowerPermission | int | str):
perm = CorpusFollowerPermission.get(permission)
if not self.has_permission(perm):
self.permissions += perm.value
def remove_permission(self, permission: CorpusFollowerPermission | int | str):
perm = CorpusFollowerPermission.get(permission)
if self.has_permission(perm):
self.permissions -= perm.value
def reset_permissions(self):
self.permissions = 0
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'default': self.default,
'name': self.name,
'permissions': [
x.name
for x in CorpusFollowerPermission
if self.has_permission(x)
]
}
if backrefs:
pass
if relationships:
json_serializeable['corpus_follower_association'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.corpus_follower_association
}
return json_serializeable
@staticmethod
def insert_defaults():
roles = {
'Anonymous': [],
'Viewer': [
CorpusFollowerPermission.VIEW
],
'Contributor': [
CorpusFollowerPermission.VIEW,
CorpusFollowerPermission.MANAGE_FILES
],
'Administrator': [
CorpusFollowerPermission.VIEW,
CorpusFollowerPermission.MANAGE_FILES,
CorpusFollowerPermission.MANAGE_FOLLOWERS,
CorpusFollowerPermission.MANAGE_CORPUS
]
}
default_role_name = 'Viewer'
for role_name, permissions in roles.items():
role = CorpusFollowerRole.query.filter_by(name=role_name).first()
if role is None:
role = CorpusFollowerRole(name=role_name)
role.reset_permissions()
for permission in permissions:
role.add_permission(permission)
role.default = role.name == default_role_name
db.session.add(role)
db.session.commit()

View File

@ -1,133 +0,0 @@
from datetime import datetime
from enum import Enum
from app import db, mail, socketio
from app.email import create_message
from .corpus_file import CorpusFile
from .corpus_follower_association import CorpusFollowerAssociation
from .corpus import Corpus
from .job_input import JobInput
from .job_result import JobResult
from .job import Job, JobStatus
from .spacy_nlp_pipeline_model import SpaCyNLPPipelineModel
from .tesseract_ocr_pipeline_model import TesseractOCRPipelineModel
from .user import UserSettingJobStatusMailNotificationLevel
def register_event_listeners():
resources = [
Corpus,
CorpusFile,
Job,
JobInput,
JobResult,
SpaCyNLPPipelineModel,
TesseractOCRPipelineModel
]
for resource in resources:
db.event.listen(resource, 'after_delete', resource_after_delete)
db.event.listen(resource, 'after_insert', resource_after_insert)
db.event.listen(resource, 'after_update', resource_after_update)
db.event.listen(CorpusFollowerAssociation, 'after_delete', cfa_after_delete)
db.event.listen(CorpusFollowerAssociation, 'after_insert', cfa_after_insert)
db.event.listen(Job, 'after_update', job_after_update)
def resource_after_delete(mapper, connection, resource):
jsonpatch = [
{
'op': 'remove',
'path': resource.jsonpatch_path
}
]
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
def cfa_after_delete(mapper, connection, cfa):
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
jsonpatch = [
{
'op': 'remove',
'path': jsonpatch_path
}
]
room = f'/users/{cfa.corpus.user.hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
def resource_after_insert(mapper, connection, resource):
jsonpatch_value = resource.to_json_serializeable()
for attr in mapper.relationships:
jsonpatch_value[attr.key] = {}
jsonpatch = [
{
'op': 'add',
'path': resource.jsonpatch_path,
'value': jsonpatch_value
}
]
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
def cfa_after_insert(mapper, connection, cfa):
jsonpatch_value = cfa.to_json_serializeable()
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
jsonpatch = [
{
'op': 'add',
'path': jsonpatch_path,
'value': jsonpatch_value
}
]
room = f'/users/{cfa.corpus.user.hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
def resource_after_update(mapper, connection, resource):
jsonpatch = []
for attr in db.inspect(resource).attrs:
if attr.key in mapper.relationships:
continue
if not attr.load_history().has_changes():
continue
jsonpatch_path = f'{resource.jsonpatch_path}/{attr.key}'
if isinstance(attr.value, datetime):
jsonpatch_value = f'{attr.value.isoformat()}Z'
elif isinstance(attr.value, Enum):
jsonpatch_value = attr.value.name
else:
jsonpatch_value = attr.value
jsonpatch.append(
{
'op': 'replace',
'path': jsonpatch_path,
'value': jsonpatch_value
}
)
if jsonpatch:
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
def job_after_update(mapper, connection, job):
for attr in db.inspect(job).attrs:
if attr.key != 'status':
continue
if not attr.load_history().has_changes():
return
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE:
return
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END:
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return
msg = create_message(
job.user.email,
f'Status update for your Job "{job.title}"',
'tasks/email/notification',
job=job
)
mail.send(msg)

View File

@ -1,40 +0,0 @@
from datetime import datetime
from flask import current_app
from werkzeug.utils import secure_filename
from app import db
class FileMixin:
'''
Mixin for db.Model classes. All file related models should use this.
'''
creation_date = db.Column(db.DateTime, default=datetime.utcnow)
filename = db.Column(db.String(255))
mimetype = db.Column(db.String(255))
def file_mixin_to_json_serializeable(self, backrefs=False, relationships=False):
return {
'creation_date': f'{self.creation_date.isoformat()}Z',
'filename': self.filename,
'mimetype': self.mimetype
}
@classmethod
def create(cls, file_storage, **kwargs):
filename = kwargs.pop('filename', file_storage.filename)
mimetype = kwargs.pop('mimetype', file_storage.mimetype)
obj = cls(
filename=secure_filename(filename),
mimetype=mimetype,
**kwargs
)
db.session.add(obj)
db.session.flush(objects=[obj])
db.session.refresh(obj)
try:
file_storage.save(obj.path)
except (AttributeError, OSError) as e:
current_app.logger.error(e)
db.session.rollback()
raise e
return obj

View File

@ -1,171 +0,0 @@
from datetime import datetime
from enum import IntEnum
from flask import current_app, url_for
from flask_hashids import HashidMixin
from time import sleep
from pathlib import Path
import shutil
from app import db
from app.extensions.sqlalchemy_extras import ContainerColumn, IntEnumColumn
class JobStatus(IntEnum):
INITIALIZING = 1
SUBMITTED = 2
QUEUED = 3
RUNNING = 4
CANCELING = 5
CANCELED = 6
COMPLETED = 7
FAILED = 8
@staticmethod
def get(job_status: 'JobStatus | int | str') -> 'JobStatus':
if isinstance(job_status, JobStatus):
return job_status
if isinstance(job_status, int):
return JobStatus(job_status)
if isinstance(job_status, str):
return JobStatus[job_status]
raise TypeError('job_status must be JobStatus, int, or str')
class Job(HashidMixin, db.Model):
'''
Class to define Jobs.
'''
__tablename__ = 'jobs'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
creation_date = \
db.Column(db.DateTime(), default=datetime.utcnow)
description = db.Column(db.String(255))
end_date = db.Column(db.DateTime())
service = db.Column(db.String(64))
service_args = db.Column(ContainerColumn(dict, 255))
service_version = db.Column(db.String(16))
status = db.Column(
IntEnumColumn(JobStatus),
default=JobStatus.INITIALIZING
)
title = db.Column(db.String(32))
# Relationships
inputs = db.relationship(
'JobInput',
back_populates='job',
cascade='all, delete-orphan',
lazy='dynamic'
)
results = db.relationship(
'JobResult',
back_populates='job',
cascade='all, delete-orphan',
lazy='dynamic'
)
user = db.relationship(
'User',
back_populates='jobs'
)
def __repr__(self):
return f'<Job {self.title}>'
@property
def jsonpatch_path(self):
return f'{self.user.jsonpatch_path}/jobs/{self.hashid}'
@property
def path(self) -> Path:
return self.user.path / 'jobs' / f'{self.id}'
@property
def url(self):
return url_for('jobs.job', job_id=self.id)
@property
def user_hashid(self):
return self.user.hashid
@staticmethod
def create(**kwargs):
job = Job(**kwargs)
db.session.add(job)
db.session.flush(objects=[job])
db.session.refresh(job)
job_inputs_dir = job.path / 'inputs'
job_pipeline_data_dir = job.path / 'pipeline_data'
job_results_dir = job.path / 'results'
try:
job.path.mkdir()
job_inputs_dir.mkdir()
job_pipeline_data_dir.mkdir()
job_results_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise
return job
def delete(self):
''' Delete the job and its inputs and results from the database. '''
if self.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: # noqa
self.status = JobStatus.CANCELING
db.session.commit()
while self.status != JobStatus.CANCELED:
# In case the daemon handled a job in any way
if self.status != JobStatus.CANCELING:
self.status = JobStatus.CANCELING
db.session.commit()
sleep(1)
db.session.refresh(self)
try:
shutil.rmtree(self.path)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise e
db.session.delete(self)
def restart(self):
''' Restart a job - only if the status is failed '''
if self.status != JobStatus.FAILED:
raise Exception('Job status is not "failed"')
shutil.rmtree(self.path / 'results', ignore_errors=True)
shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True)
for result in self.results:
db.session.delete(result)
self.end_date = None
self.status = JobStatus.SUBMITTED
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'creation_date': f'{self.creation_date.isoformat()}Z',
'description': self.description,
'end_date': (
None if self.end_date is None
else f'{self.end_date.isoformat()}Z'
),
'service': self.service,
'service_args': self.service_args,
'service_version': self.service_version,
'status': self.status.name,
'title': self.title
}
if backrefs:
json_serializeable['user'] = \
self.user.to_json_serializeable(backrefs=True)
if relationships:
json_serializeable['inputs'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.inputs
}
json_serializeable['results'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.results
}
return json_serializeable

View File

@ -1,65 +0,0 @@
from flask import url_for
from flask_hashids import HashidMixin
from pathlib import Path
from app import db
from .file_mixin import FileMixin
class JobInput(FileMixin, HashidMixin, db.Model):
__tablename__ = 'job_inputs'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
# Relationships
job = db.relationship(
'Job',
back_populates='inputs'
)
def __repr__(self):
return f'<JobInput {self.filename}>'
@property
def content_url(self):
return url_for(
'jobs.download_job_input',
job_id=self.job.id,
job_input_id=self.id
)
@property
def jsonpatch_path(self):
return f'{self.job.jsonpatch_path}/inputs/{self.hashid}'
@property
def path(self) -> Path:
return self.job.path / 'inputs' / f'{self.id}'
@property
def url(self):
return url_for(
'jobs.job',
job_id=self.job_id,
_anchor=f'job-{self.job.hashid}-input-{self.hashid}'
)
@property
def user_hashid(self):
return self.job.user.hashid
@property
def user_id(self):
return self.job.user.id
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
**self.file_mixin_to_json_serializeable()
}
if backrefs:
json_serializeable['job'] = \
self.job.to_json_serializeable(backrefs=True)
if relationships:
pass
return json_serializeable

View File

@ -1,71 +0,0 @@
from flask import url_for
from flask_hashids import HashidMixin
from pathlib import Path
from app import db
from .file_mixin import FileMixin
class JobResult(FileMixin, HashidMixin, db.Model):
__tablename__ = 'job_results'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
# Fields
description = db.Column(db.String(255))
# Relationships
job = db.relationship(
'Job',
back_populates='results'
)
def __repr__(self):
return f'<JobResult {self.filename}>'
@property
def download_url(self):
return url_for(
'jobs.download_job_result',
job_id=self.job_id,
job_result_id=self.id
)
@property
def jsonpatch_path(self):
return f'{self.job.jsonpatch_path}/results/{self.hashid}'
@property
def path(self) -> Path:
return self.job.path / 'results' / f'{self.id}'
@property
def url(self):
return url_for(
'jobs.job',
job_id=self.job_id,
_anchor=f'job-{self.job.hashid}-result-{self.hashid}'
)
@property
def user_hashid(self):
return self.job.user.hashid
@property
def user_id(self):
return self.job.user.id
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'description': self.description,
**self.file_mixin_to_json_serializeable(
backrefs=backrefs,
relationships=relationships
)
}
if backrefs:
json_serializeable['job'] = \
self.job.to_json_serializeable(backrefs=True)
if relationships:
pass
return json_serializeable

View File

@ -1,99 +0,0 @@
from enum import IntEnum
from flask_hashids import HashidMixin
from app import db
class Permission(IntEnum):
'''
Defines User permissions as integers by the power of 2. User permission
can be evaluated using the bitwise operator &.
'''
ADMINISTRATE = 1
CONTRIBUTE = 2
USE_API = 4
@staticmethod
def get(permission: 'Permission | int | str') -> 'Permission':
if isinstance(permission, Permission):
return permission
if isinstance(permission, int):
return Permission(permission)
if isinstance(permission, str):
return Permission[permission]
raise TypeError('permission must be Permission, int, or str')
class Role(HashidMixin, db.Model):
__tablename__ = 'roles'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Fields
name = db.Column(db.String(64), unique=True)
default = db.Column(db.Boolean, default=False, index=True)
permissions = db.Column(db.Integer, default=0)
# Relationships
users = db.relationship('User', back_populates='role', lazy='dynamic')
def __repr__(self):
return f'<Role {self.name}>'
def has_permission(self, permission: Permission | int | str):
p = Permission.get(permission)
return self.permissions & p.value == p.value
def add_permission(self, permission: Permission | int | str):
p = Permission.get(permission)
if not self.has_permission(p):
self.permissions += p.value
def remove_permission(self, permission: Permission | int | str):
p = Permission.get(permission)
if self.has_permission(p):
self.permissions -= p.value
def reset_permissions(self):
self.permissions = 0
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'default': self.default,
'name': self.name,
'permissions': [
x.name for x in Permission
if self.has_permission(x.value)
]
}
if backrefs:
pass
if relationships:
json_serializeable['users'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.users
}
return json_serializeable
@staticmethod
def insert_defaults():
roles = {
'User': [],
'API user': [Permission.USE_API],
'Contributor': [Permission.CONTRIBUTE],
'Administrator': [
Permission.ADMINISTRATE,
Permission.CONTRIBUTE,
Permission.USE_API
],
'System user': []
}
default_role_name = 'User'
for role_name, permissions in roles.items():
role = Role.query.filter_by(name=role_name).first()
if role is None:
role = Role(name=role_name)
role.reset_permissions()
for permission in permissions:
role.add_permission(permission)
role.default = role.name == default_role_name
db.session.add(role)
db.session.commit()

View File

@ -1,136 +0,0 @@
from flask import current_app, url_for
from flask_hashids import HashidMixin
from tqdm import tqdm
from pathlib import Path
import requests
import yaml
from app import db
from app.extensions.sqlalchemy_extras import ContainerColumn
from .file_mixin import FileMixin
from .user import User
class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
__tablename__ = 'spacy_nlp_pipeline_models'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
title = db.Column(db.String(64))
description = db.Column(db.String(255))
version = db.Column(db.String(16))
compatible_service_versions = db.Column(ContainerColumn(list, 255))
publisher = db.Column(db.String(128))
publisher_url = db.Column(db.String(512))
publishing_url = db.Column(db.String(512))
publishing_year = db.Column(db.Integer)
pipeline_name = db.Column(db.String(64))
is_public = db.Column(db.Boolean, default=False)
# Relationships
user = db.relationship('User', back_populates='spacy_nlp_pipeline_models')
@property
def path(self) -> Path:
return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}'
@property
def jsonpatch_path(self):
return f'{self.user.jsonpatch_path}/spacy_nlp_pipeline_models/{self.hashid}'
@property
def url(self):
return url_for(
'contributions.spacy_nlp_pipeline_model',
spacy_nlp_pipeline_model_id=self.id
)
@property
def user_hashid(self):
return self.user.hashid
@staticmethod
def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first()
default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml'
with default_records_file.open('r') as f:
default_records = yaml.safe_load(f)
for m in default_records:
model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description']
model.filename = m['url'].split('/')[-1]
model.publisher = m['publisher']
model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url']
model.publishing_year = m['publishing_year']
model.is_public = True
model.title = m['title']
model.version = m['version']
model.pipeline_name = m['pipeline_name']
else:
model = SpaCyNLPPipelineModel(
compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
filename=m['url'].split('/')[-1],
publisher=m['publisher'],
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
publishing_year=m['publishing_year'],
is_public=True,
title=m['title'],
user=nopaque_user,
version=m['version'],
pipeline_name=m['pipeline_name']
)
db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
if not model.path.exists() or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
unit="B",
unit_scale=True,
unit_divisor=1024,
total=int(r.headers['Content-Length'])
)
pbar.clear()
with open(model.path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
pbar.update(len(chunk))
f.write(chunk)
pbar.close()
db.session.commit()
def delete(self):
try:
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'compatible_service_versions': self.compatible_service_versions,
'description': self.description,
'publisher': self.publisher,
'publisher_url': self.publisher_url,
'publishing_url': self.publishing_url,
'publishing_year': self.publishing_year,
'pipeline_name': self.pipeline_name,
'is_public': self.is_public,
'title': self.title,
'version': self.version,
**self.file_mixin_to_json_serializeable()
}
if backrefs:
json_serializeable['user'] = \
self.user.to_json_serializeable(backrefs=True)
if relationships:
pass
return json_serializeable

View File

@ -1,132 +0,0 @@
from flask import current_app, url_for
from flask_hashids import HashidMixin
from tqdm import tqdm
from pathlib import Path
import requests
import yaml
from app import db
from app.extensions.sqlalchemy_extras import ContainerColumn
from .file_mixin import FileMixin
from .user import User
class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
__tablename__ = 'tesseract_ocr_pipeline_models'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
title = db.Column(db.String(64))
description = db.Column(db.String(255))
version = db.Column(db.String(16))
compatible_service_versions = db.Column(ContainerColumn(list, 255))
publisher = db.Column(db.String(128))
publisher_url = db.Column(db.String(512))
publishing_url = db.Column(db.String(512))
publishing_year = db.Column(db.Integer)
is_public = db.Column(db.Boolean, default=False)
# Relationships
user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models')
@property
def path(self) -> Path:
return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}'
@property
def jsonpatch_path(self):
return f'{self.user.jsonpatch_path}/tesseract_ocr_pipeline_models/{self.hashid}'
@property
def url(self):
return url_for(
'contributions.tesseract_ocr_pipeline_model',
tesseract_ocr_pipeline_model_id=self.id
)
@property
def user_hashid(self):
return self.user.hashid
@staticmethod
def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first()
default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml'
with default_records_file.open('r') as f:
default_records = yaml.safe_load(f)
for m in default_records:
model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description']
model.filename = f'{model.id}.traineddata'
model.publisher = m['publisher']
model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url']
model.publishing_year = m['publishing_year']
model.is_public = True
model.title = m['title']
model.version = m['version']
else:
model = TesseractOCRPipelineModel(
compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
publisher=m['publisher'],
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
publishing_year=m['publishing_year'],
is_public=True,
title=m['title'],
user=nopaque_user,
version=m['version']
)
db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
model.filename = f'{model.id}.traineddata'
if not model.path.exists() or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
unit="B",
unit_scale=True,
unit_divisor=1024,
total=int(r.headers['Content-Length'])
)
pbar.clear()
with open(model.path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
pbar.update(len(chunk))
f.write(chunk)
pbar.close()
db.session.commit()
def delete(self):
try:
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'compatible_service_versions': self.compatible_service_versions,
'description': self.description,
'publisher': self.publisher,
'publisher_url': self.publisher_url,
'publishing_url': self.publishing_url,
'publishing_year': self.publishing_year,
'is_public': self.is_public,
'title': self.title,
'version': self.version,
**self.file_mixin_to_json_serializeable()
}
if backrefs:
json_serializeable['user'] = \
self.user.to_json_serializeable(backrefs=True)
if relationships:
pass
return json_serializeable

View File

@ -1,48 +0,0 @@
from datetime import datetime, timedelta
from app import db
class Token(db.Model):
__tablename__ = 'tokens'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
access_token = db.Column(db.String(64), index=True)
access_expiration = db.Column(db.DateTime)
refresh_token = db.Column(db.String(64), index=True)
refresh_expiration = db.Column(db.DateTime)
# Relationships
user = db.relationship('User', back_populates='tokens')
def expire(self):
self.access_expiration = datetime.utcnow()
self.refresh_expiration = datetime.utcnow()
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'access_token': self.access_token,
'access_expiration': (
None if self.access_expiration is None
else f'{self.access_expiration.isoformat()}Z'
),
'refresh_token': self.refresh_token,
'refresh_expiration': (
None if self.refresh_expiration is None
else f'{self.refresh_expiration.isoformat()}Z'
)
}
if backrefs:
json_serializeable['user'] = \
self.user.to_json_serializeable(backrefs=True)
if relationships:
pass
return json_serializeable
@staticmethod
def clean():
"""Remove any tokens that have been expired for more than a day."""
yesterday = datetime.utcnow() - timedelta(days=1)
Token.query.filter(Token.refresh_expiration < yesterday).delete()

View File

@ -1,453 +0,0 @@
from datetime import datetime, timedelta
from enum import IntEnum
from flask import current_app, url_for
from flask_hashids import HashidMixin
from flask_login import UserMixin
from sqlalchemy.ext.associationproxy import association_proxy
from pathlib import Path
from werkzeug.security import generate_password_hash, check_password_hash
import jwt
import re
import secrets
import shutil
from app import db, hashids
from app.extensions.sqlalchemy_extras import IntEnumColumn
from .corpus import Corpus
from .corpus_follower_association import CorpusFollowerAssociation
from .corpus_follower_role import CorpusFollowerRole
from .role import Permission, Role
from .token import Token
class ProfilePrivacySettings(IntEnum):
SHOW_EMAIL = 1
SHOW_LAST_SEEN = 2
SHOW_MEMBER_SINCE = 4
@staticmethod
def get(profile_privacy_setting: 'ProfilePrivacySettings | int | str') -> 'ProfilePrivacySettings':
if isinstance(profile_privacy_setting, ProfilePrivacySettings):
return profile_privacy_setting
if isinstance(profile_privacy_setting, int):
return ProfilePrivacySettings(profile_privacy_setting)
if isinstance(profile_privacy_setting, str):
return ProfilePrivacySettings[profile_privacy_setting]
raise TypeError('profile_privacy_setting must be ProfilePrivacySettings, int, or str')
class UserSettingJobStatusMailNotificationLevel(IntEnum):
NONE = 1
END = 2
ALL = 3
class User(HashidMixin, UserMixin, db.Model):
__tablename__ = 'users'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
role_id = db.Column(db.Integer, db.ForeignKey('roles.id'))
# Fields
email = db.Column(db.String(254), index=True, unique=True)
username = db.Column(db.String(64), index=True, unique=True)
username_pattern = re.compile(r'^[A-Za-zÄÖÜäöüß0-9_.]*$')
password_hash = db.Column(db.String(128))
confirmed = db.Column(db.Boolean, default=False)
terms_of_use_accepted = db.Column(db.Boolean, default=False)
member_since = db.Column(db.DateTime(), default=datetime.utcnow)
setting_job_status_mail_notification_level = db.Column(
IntEnumColumn(UserSettingJobStatusMailNotificationLevel),
default=UserSettingJobStatusMailNotificationLevel.END
)
last_seen = db.Column(db.DateTime())
full_name = db.Column(db.String(64))
about_me = db.Column(db.String(256))
location = db.Column(db.String(64))
website = db.Column(db.String(128))
organization = db.Column(db.String(128))
is_public = db.Column(db.Boolean, default=False)
profile_privacy_settings = db.Column(db.Integer(), default=0)
# Relationships
avatar = db.relationship(
'Avatar',
back_populates='user',
cascade='all, delete-orphan',
uselist=False
)
corpora = db.relationship(
'Corpus',
back_populates='user',
cascade='all, delete-orphan',
lazy='dynamic'
)
corpus_follower_associations = db.relationship(
'CorpusFollowerAssociation',
back_populates='follower',
cascade='all, delete-orphan'
)
followed_corpora = association_proxy(
'corpus_follower_associations',
'corpus',
creator=lambda c: CorpusFollowerAssociation(corpus=c)
)
jobs = db.relationship(
'Job',
back_populates='user',
cascade='all, delete-orphan',
lazy='dynamic'
)
role = db.relationship(
'Role',
back_populates='users'
)
spacy_nlp_pipeline_models = db.relationship(
'SpaCyNLPPipelineModel',
back_populates='user',
cascade='all, delete-orphan',
lazy='dynamic'
)
tesseract_ocr_pipeline_models = db.relationship(
'TesseractOCRPipelineModel',
back_populates='user',
cascade='all, delete-orphan',
lazy='dynamic'
)
tokens = db.relationship(
'Token',
back_populates='user',
cascade='all, delete-orphan',
lazy='dynamic'
)
def __init__(self, **kwargs):
if 'role' not in kwargs:
kwargs['role'] = (
Role.query.filter_by(name='Administrator').first()
if kwargs['email'] == current_app.config['NOPAQUE_ADMIN']
else Role.query.filter_by(default=True).first()
)
super().__init__(**kwargs)
def __repr__(self):
return f'<User {self.username}>'
@property
def is_administrator(self):
return self.can(Permission.ADMINISTRATE)
@property
def jsonpatch_path(self):
return f'/users/{self.hashid}'
@property
def password(self):
raise AttributeError('password is not a readable attribute')
@password.setter
def password(self, password):
#pbkdf2:sha256
self.password_hash = generate_password_hash(password, method='pbkdf2')
@property
def path(self) -> Path:
return current_app.config.get('NOPAQUE_DATA_DIR') / 'users' / f'{self.id}'
@staticmethod
def create(**kwargs):
user = User(**kwargs)
db.session.add(user)
db.session.flush(objects=[user])
db.session.refresh(user)
user_spacy_nlp_pipeline_models_dir = user.path / 'spacy_nlp_pipeline_models'
user_tesseract_ocr_pipeline_models_dir = user.path / 'tesseract_ocr_pipeline_models'
user_corpora_dir = user.path / 'corpora'
user_jobs_dir = user.path / 'jobs'
try:
user.path.mkdir()
user_spacy_nlp_pipeline_models_dir.mkdir()
user_tesseract_ocr_pipeline_models_dir.mkdir()
user_corpora_dir.mkdir()
user_jobs_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise
return user
@staticmethod
def insert_defaults():
nopaque_user = User.query.filter_by(username='nopaque').first()
system_user_role = Role.query.filter_by(name='System user').first()
if nopaque_user is None:
nopaque_user = User.create(
username='nopaque',
role=system_user_role
)
db.session.add(nopaque_user)
elif nopaque_user.role != system_user_role:
nopaque_user.role = system_user_role
db.session.commit()
@staticmethod
def reset_password(token, new_password):
try:
payload = jwt.decode(
token,
current_app.config['SECRET_KEY'],
algorithms=['HS256'],
issuer=current_app.config['SERVER_NAME'],
options={'require': ['exp', 'iat', 'iss', 'purpose', 'sub']}
)
except jwt.PyJWTError:
return False
if payload.get('purpose') != 'User.reset_password':
return False
user_hashid = payload.get('sub')
user_id = hashids.decode(user_hashid)
user = User.query.get(user_id)
if user is None:
return False
user.password = new_password
db.session.add(user)
return True
@staticmethod
def verify_access_token(access_token, refresh_token=None):
token = Token.query.filter(Token.access_token == access_token).first()
if token is not None:
if token.access_expiration > datetime.utcnow():
token.user.ping()
db.session.commit()
if token.user.role.name != 'System user':
return token.user
@staticmethod
def verify_refresh_token(refresh_token, access_token):
token = Token.query.filter((Token.refresh_token == refresh_token) & (Token.access_token == access_token)).first()
if token is not None:
if token.refresh_expiration > datetime.utcnow():
return token
# someone tried to refresh with an expired token
# revoke all tokens from this user as a precaution
token.user.revoke_auth_tokens()
db.session.commit()
def can(self, permission):
return self.role is not None and self.role.has_permission(permission)
def confirm(self, confirmation_token):
try:
payload = jwt.decode(
confirmation_token,
current_app.config['SECRET_KEY'],
algorithms=['HS256'],
issuer=current_app.config['SERVER_NAME'],
options={'require': ['exp', 'iat', 'iss', 'purpose', 'sub']}
)
except jwt.PyJWTError:
return False
if payload.get('purpose') != 'user.confirm':
return False
if payload.get('sub') != self.hashid:
return False
self.confirmed = True
db.session.add(self)
return True
def delete(self):
shutil.rmtree(self.path, ignore_errors=True)
db.session.delete(self)
def generate_auth_token(self):
return Token(
access_token=secrets.token_urlsafe(),
access_expiration=datetime.utcnow() + timedelta(minutes=15),
refresh_token=secrets.token_urlsafe(),
refresh_expiration=datetime.utcnow() + timedelta(days=7),
user=self
)
def generate_confirm_token(self, expiration=3600):
now = datetime.utcnow()
payload = {
'exp': now + timedelta(seconds=expiration),
'iat': now,
'iss': current_app.config['SERVER_NAME'],
'purpose': 'user.confirm',
'sub': self.hashid
}
return jwt.encode(
payload,
current_app.config['SECRET_KEY'],
algorithm='HS256'
)
def generate_reset_password_token(self, expiration=3600):
now = datetime.utcnow()
payload = {
'exp': now + timedelta(seconds=expiration),
'iat': now,
'iss': current_app.config['SERVER_NAME'],
'purpose': 'User.reset_password',
'sub': self.hashid
}
return jwt.encode(
payload,
current_app.config['SECRET_KEY'],
algorithm='HS256'
)
def ping(self):
self.last_seen = datetime.utcnow()
def revoke_auth_tokens(self):
for token in self.tokens:
db.session.delete(token)
def verify_password(self, password):
if self.role.name == 'System user':
return False
return check_password_hash(self.password_hash, password)
#region Profile Privacy settings
def has_profile_privacy_setting(self, setting):
s = ProfilePrivacySettings.get(setting)
return self.profile_privacy_settings & s.value == s.value
def add_profile_privacy_setting(self, setting):
s = ProfilePrivacySettings.get(setting)
if not self.has_profile_privacy_setting(s):
self.profile_privacy_settings += s.value
def remove_profile_privacy_setting(self, setting):
s = ProfilePrivacySettings.get(setting)
if self.has_profile_privacy_setting(s):
self.profile_privacy_settings -= s.value
def reset_profile_privacy_settings(self):
self.profile_privacy_settings = 0
#endregion Profile Privacy settings
def follow_corpus(self, corpus, role=None):
if role is None:
cfr = CorpusFollowerRole.query.filter_by(default=True).first()
else:
cfr = role
if self.is_following_corpus(corpus):
cfa = CorpusFollowerAssociation.query.filter_by(corpus=corpus, follower=self).first()
if cfa.role != cfr:
cfa.role = cfr
else:
cfa = CorpusFollowerAssociation(corpus=corpus, role=cfr, follower=self)
db.session.add(cfa)
def unfollow_corpus(self, corpus):
if not self.is_following_corpus(corpus):
return
self.followed_corpora.remove(corpus)
def is_following_corpus(self, corpus):
return corpus in self.followed_corpora
def generate_follow_corpus_token(self, corpus_hashid, role_name, expiration=7):
now = datetime.utcnow()
payload = {
'exp': expiration,
'iat': now,
'iss': current_app.config['SERVER_NAME'],
'purpose': 'User.follow_corpus',
'role_name': role_name,
'sub': corpus_hashid
}
return jwt.encode(
payload,
current_app.config['SECRET_KEY'],
algorithm='HS256'
)
def follow_corpus_by_token(self, token):
try:
payload = jwt.decode(
token,
current_app.config['SECRET_KEY'],
algorithms=['HS256'],
issuer=current_app.config['SERVER_NAME'],
options={'require': ['exp', 'iat', 'iss', 'purpose', 'role_name', 'sub']}
)
except jwt.PyJWTError:
return False
if payload.get('purpose') != 'User.follow_corpus':
return False
corpus_hashid = payload.get('sub')
corpus_id = hashids.decode(corpus_hashid)
corpus = Corpus.query.get_or_404(corpus_id)
if corpus is None:
return False
role_name = payload.get('role_name')
role = CorpusFollowerRole.query.filter_by(name=role_name).first()
if role is None:
return False
self.follow_corpus(corpus, role)
# db.session.add(self)
return True
def to_json_serializeable(self, backrefs=False, relationships=False, filter_by_privacy_settings=False):
json_serializeable = {
'id': self.hashid,
'confirmed': self.confirmed,
'avatar': url_for('users.user_avatar', user_id=self.id),
'email': self.email,
'last_seen': (
None if self.last_seen is None
else f'{self.last_seen.isoformat()}Z'
),
'member_since': f'{self.member_since.isoformat()}Z',
'username': self.username,
'full_name': self.full_name,
'about_me': self.about_me,
'website': self.website,
'location': self.location,
'organization': self.organization,
'job_status_mail_notification_level': \
self.setting_job_status_mail_notification_level.name,
'profile_privacy_settings': {
'is_public': self.is_public,
'show_email': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL),
'show_last_seen': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_LAST_SEEN),
'show_member_since': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_MEMBER_SINCE)
}
}
if backrefs:
json_serializeable['role'] = \
self.role.to_json_serializeable(backrefs=True)
if relationships:
json_serializeable['corpus_follower_associations'] = {
x.hashid: x.to_json_serializeable()
for x in self.corpus_follower_associations
}
json_serializeable['corpora'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.corpora
}
json_serializeable['jobs'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.jobs
}
json_serializeable['tesseract_ocr_pipeline_models'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.tesseract_ocr_pipeline_models
}
json_serializeable['spacy_nlp_pipeline_models'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.spacy_nlp_pipeline_models
}
if filter_by_privacy_settings:
if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL):
json_serializeable.pop('email')
if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_LAST_SEEN):
json_serializeable.pop('last_seen')
if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_MEMBER_SINCE):
json_serializeable.pop('member_since')
return json_serializeable

View File

@ -1,11 +1,12 @@
from flask import Blueprint from flask import Blueprint
from flask_login import login_required from flask_login import login_required
from pathlib import Path import os
import yaml import yaml
services_file = Path(__file__).parent / 'services.yml' services_file = \
with services_file.open('r') as f: os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml')
with open(services_file, 'r') as f:
SERVICES = yaml.safe_load(f) SERVICES = yaml.safe_load(f)
bp = Blueprint('services', __name__) bp = Blueprint('services', __name__)

View File

@ -1,4 +1,5 @@
from flask import abort, current_app, flash, redirect, render_template, request, url_for from flask import abort, current_app, flash, Markup, redirect, render_template, request, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user from flask_login import current_user
import requests import requests
from app import db, hashids from app import db, hashids
@ -19,11 +20,13 @@ from .forms import (
@bp.route('/services') @bp.route('/services')
@register_breadcrumb(bp, '.', 'Services')
def services(): def services():
return redirect(url_for('main.dashboard')) return redirect(url_for('main.dashboard'))
@bp.route('/file-setup-pipeline', methods=['GET', 'POST']) @bp.route('/file-setup-pipeline', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.file_setup_pipeline', '<i class="nopaque-icons service-icons left" data-service="file-setup-pipeline"></i>File Setup')
def file_setup_pipeline(): def file_setup_pipeline():
service = 'file-setup-pipeline' service = 'file-setup-pipeline'
service_manifest = SERVICES[service] service_manifest = SERVICES[service]
@ -53,7 +56,7 @@ def file_setup_pipeline():
abort(500) abort(500)
job.status = JobStatus.SUBMITTED job.status = JobStatus.SUBMITTED
db.session.commit() db.session.commit()
message = f'Job "<a href="{job.url}">{job.title}</a>" created' message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job') flash(message, 'job')
return {}, 201, {'Location': job.url} return {}, 201, {'Location': job.url}
return render_template( return render_template(
@ -64,6 +67,7 @@ def file_setup_pipeline():
@bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST']) @bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.tesseract_ocr_pipeline', '<i class="nopaque-icons service-icons left" data-service="tesseract-ocr-pipeline"></i>Tesseract OCR Pipeline')
def tesseract_ocr_pipeline(): def tesseract_ocr_pipeline():
service_name = 'tesseract-ocr-pipeline' service_name = 'tesseract-ocr-pipeline'
service_manifest = SERVICES[service_name] service_manifest = SERVICES[service_name]
@ -96,7 +100,7 @@ def tesseract_ocr_pipeline():
abort(500) abort(500)
job.status = JobStatus.SUBMITTED job.status = JobStatus.SUBMITTED
db.session.commit() db.session.commit()
message = f'Job "<a href="{job.url}">{job.title}</a>" created' message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job') flash(message, 'job')
return {}, 201, {'Location': job.url} return {}, 201, {'Location': job.url}
tesseract_ocr_pipeline_models = [ tesseract_ocr_pipeline_models = [
@ -114,6 +118,7 @@ def tesseract_ocr_pipeline():
@bp.route('/transkribus-htr-pipeline', methods=['GET', 'POST']) @bp.route('/transkribus-htr-pipeline', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.transkribus_htr_pipeline', '<i class="nopaque-icons service-icons left" data-service="transkribus-htr-pipeline"></i>Transkribus HTR Pipeline')
def transkribus_htr_pipeline(): def transkribus_htr_pipeline():
if not current_app.config.get('NOPAQUE_TRANSKRIBUS_ENABLED'): if not current_app.config.get('NOPAQUE_TRANSKRIBUS_ENABLED'):
abort(404) abort(404)
@ -159,7 +164,7 @@ def transkribus_htr_pipeline():
abort(500) abort(500)
job.status = JobStatus.SUBMITTED job.status = JobStatus.SUBMITTED
db.session.commit() db.session.commit()
message = f'Job "<a href="{job.url}">{job.title}</a>" created' message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job') flash(message, 'job')
return {}, 201, {'Location': job.url} return {}, 201, {'Location': job.url}
return render_template( return render_template(
@ -171,6 +176,7 @@ def transkribus_htr_pipeline():
@bp.route('/spacy-nlp-pipeline', methods=['GET', 'POST']) @bp.route('/spacy-nlp-pipeline', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.spacy_nlp_pipeline', '<i class="nopaque-icons service-icons left" data-service="spacy-nlp-pipeline"></i>SpaCy NLP Pipeline')
def spacy_nlp_pipeline(): def spacy_nlp_pipeline():
service = 'spacy-nlp-pipeline' service = 'spacy-nlp-pipeline'
service_manifest = SERVICES[service] service_manifest = SERVICES[service]
@ -204,7 +210,7 @@ def spacy_nlp_pipeline():
abort(500) abort(500)
job.status = JobStatus.SUBMITTED job.status = JobStatus.SUBMITTED
db.session.commit() db.session.commit()
message = f'Job "<a href="{job.url}">{job.title}</a>" created' message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job') flash(message, 'job')
return {}, 201, {'Location': job.url} return {}, 201, {'Location': job.url}
return render_template( return render_template(
@ -217,6 +223,7 @@ def spacy_nlp_pipeline():
@bp.route('/corpus-analysis') @bp.route('/corpus-analysis')
@register_breadcrumb(bp, '.corpus_analysis', '<i class="nopaque-icons service-icons left" data-service="corpus-analysis"></i>Corpus Analysis')
def corpus_analysis(): def corpus_analysis():
return render_template( return render_template(
'services/corpus_analysis.html.j2', 'services/corpus_analysis.html.j2',

View File

@ -59,8 +59,3 @@ spacy-nlp-pipeline:
- 'encoding_detection' - 'encoding_detection'
publishing_year: 2022 publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1' url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'encoding_detection'
publishing_year: 2024
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'

Some files were not shown because too many files have changed in this diff Show More