From 5a06a6b241e19e8b3fa33d154740f0da9a631d1b Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Fri, 13 Nov 2020 10:01:51 +0100 Subject: [PATCH] More exception handling. Remove unused database models. New common view structure! --- .env.tpl | 135 +++--- docker-compose.yml | 1 + web/Dockerfile | 5 +- web/app/__init__.py | 2 +- web/app/admin/__init__.py | 2 +- web/app/admin/forms.py | 1 - web/app/admin/views.py | 14 +- web/app/auth/__init__.py | 2 +- web/app/auth/forms.py | 2 +- web/app/auth/views.py | 107 +++-- web/app/corpora/views.py | 390 ++++++++---------- web/app/email.py | 4 +- web/app/jobs/__init__.py | 2 +- web/app/jobs/views.py | 30 +- web/app/main/__init__.py | 2 +- web/app/main/views.py | 25 +- web/app/models.py | 181 +++----- web/app/query_results/views.py | 150 ------- web/app/services/__init__.py | 2 +- web/app/services/views.py | 61 ++- web/app/settings/forms.py | 2 +- web/app/settings/views.py | 19 +- web/app/tasks/__init__.py | 14 +- web/app/tasks/corpus_utils.py | 131 +++--- web/app/tasks/job_utils.py | 90 ++-- web/app/templates/auth/login.html.j2 | 10 +- web/app/templates/auth/register.html.j2 | 12 +- web/app/templates/auth/reset_password.html.j2 | 8 +- .../auth/reset_password_request.html.j2 | 6 +- web/app/templates/corpora/add_corpus.html.j2 | 8 +- .../templates/corpora/add_corpus_file.html.j2 | 14 +- .../templates/corpora/analyse_corpus.html.j2 | 2 +- web/app/templates/corpora/corpus_file.html.j2 | 12 +- .../templates/corpora/import_corpus.html.j2 | 12 +- .../query_results/add_query_result.html.j2 | 10 +- web/app/templates/main/index.html.j2 | 10 +- web/app/templates/nopaque.html.j2 | 6 +- web/app/templates/services/file-setup.html.j2 | 12 +- web/app/templates/services/nlp.html.j2 | 18 +- web/app/templates/services/ocr.html.j2 | 18 +- .../tasks/email/notification.html.j2 | 7 +- .../templates/tasks/email/notification.txt.j2 | 7 +- web/boot.sh | 1 + web/config.py | 139 +++---- web/nopaque.py | 11 +- 45 files changed, 692 insertions(+), 1005 deletions(-) delete mode 100644 web/app/query_results/views.py diff --git a/.env.tpl b/.env.tpl index 2c13eb3e..e7c2cea7 100644 --- a/.env.tpl +++ b/.env.tpl @@ -9,128 +9,116 @@ # NOTE: Use `.` as # HOST_MQ_DIR= -# Example: 999 -# HINT: Use this bash command `getent group docker | cut -d: -f3` -HOST_DOCKER_GID= +# Example: 1000 +# HINT: Use this bash command `id -u` +HOST_UID= # Example: 1000 # HINT: Use this bash command `id -g` HOST_GID= -# DEFAULT: ./nopaqued.log -# NOTES: Use `.` as , -# This file must be present on container startup -# HOST_NOPAQUE_DAEMON_LOG_FILE= +# Example: 999 +# HINT: Use this bash command `getent group docker | cut -d: -f3` +HOST_DOCKER_GID= # DEFAULT: ./nopaque.log # NOTES: Use `.` as , # This file must be present on container startup -# HOST_NOPAQUE_LOG_FILE= - -# Example: 1000 -# HINT: Use this bash command `id -u` -HOST_UID= +# HOST_LOG_FILE= ################################################################################ -# Cookies # +# Flask # +# https://flask.palletsprojects.com/en/1.1.x/config/ # ################################################################################ -# CHOOSE ONE: False, True -# DEFAULT: False -# HINT: Set to true if you redirect http to https -# NOPAQUE_REMEMBER_COOKIE_SECURE= +# DEFAULT: hard to guess string +# HINT: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"` +# SECRET_KEY= # CHOOSE ONE: False, True # DEFAULT: False # HINT: Set to true if you redirect http to https -# NOPAQUE_SESSION_COOKIE_SECURE= +# SESSION_COOKIE_SECURE= ################################################################################ -# Database # -# DATABASE_URI blueprint: # -# - dialect[+driver]://username:password@host[:port]/database # -# - sqlite is not supported # -# - values in square brackets are optional # +# Flask-Login # +# https://flask-login.readthedocs.io/en/latest/ # ################################################################################ -# DEFAULT: postgresql://nopaque:nopaque@db/nopaque -# NOPAQUE_DATABASE_URL= - -# DEFAULT: postgresql://nopaque:nopaque@db/nopaque_dev -# NOPAQUE_DEV_DATABASE_URL= - -# DEFAULT: postgresql://nopaque:nopaque@db/nopaque_test -# NOPAQUE_TEST_DATABASE_URL= +# CHOOSE ONE: False, True +# DEFAULT: False +# HINT: Set to true if you redirect http to https +# REMEMBER_COOKIE_SECURE= ################################################################################ -# Email # +# Flask-Mail # +# https://pythonhosted.org/Flask-Mail/ # ################################################################################ # EXAMPLE: nopaque Admin -NOPAQUE_SMTP_DEFAULT_SENDER= +MAIL_DEFAULT_SENDER= -NOPAQUE_SMTP_PASSWORD= +MAIL_PASSWORD= # EXAMPLE: smtp.example.com -NOPAQUE_SMTP_SERVER= +MAIL_SERVER= # EXAMPLE: 587 -NOPAQUE_SMTP_PORT= +MAIL_PORT= # CHOOSE ONE: False, True # DEFAULT: False -# NOPAQUE_SMTP_USE_SSL= +# MAIL_USE_SSL= # CHOOSE ONE: False, True # DEFAULT: False -# NOPAQUE_SMTP_USE_TLS= +# MAIL_USE_TLS= # EXAMPLE: nopaque@example.com -NOPAQUE_SMTP_USERNAME= +MAIL_USERNAME= ################################################################################ -# General # +# Flask-SQLAlchemy # +# https://flask-sqlalchemy.palletsprojects.com/en/2.x/config/ # ################################################################################ +# DEFAULT with development config: postgresql://nopaque:nopaque@db/nopaque_dev +# DEFAULT with production config: postgresql://nopaque:nopaque@db/nopaque +# DEFAULT with testing config: postgresql://nopaque:nopaque@db/nopaque_test +# SQLALCHEMY_DATABASE_URI= + + +################################################################################ +# nopaque # +################################################################################ +# If an account is registered with this email adress gets automatically +# assigned the administrator role. # EXAMPLE: admin.nopaque@example.com -NOPAQUE_ADMIN_EMAIL_ADRESS= +NOPAQUE_ADMIN= # DEFAULT: development # CHOOSE ONE: development, production, testing # NOPAQUE_CONFIG= +# This email adress is used for the contact button in the nopaque footer. If +# not set, no contact button is displayed. # DEFAULT: None # EXAMPLE: contact.nopaque@example.com -# NOPAQUE_CONTACT_EMAIL_ADRESS= +# NOPAQUE_CONTACT= # DEFAULT: /mnt/nopaque -# NOTE: This must be a network share and it must be available on all Docker Swarm nodes +# NOTE: This must be a network share and it must be available on all Docker +# Swarm nodes # NOPAQUE_DATA_DIR= -# DEFAULT: localhost -# NOPAQUE_DOMAIN= - # DEFAULT: 0.0.0.0 # NOPAQUE_HOST= # DEFAULT: 5000 # NOPAQUE_PORT= -# CHOOSE ONE: http, https -# DEFAULT: http -# NOPAQUE_PROTOCOL= - -# DEFAULT: hard to guess string -# HINT: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"` -# NOPAQUE_SECRET_KEY= - - -################################################################################ -# Logging # -################################################################################ -# DEFAULT: /home/nopaqued/nopaqued.log ~ /home/nopaqued/nopaqued.log -# NOTE: Use `.` as -# NOPAQUE_DAEMON_LOG_FILE= +# transport://[userid:password]@hostname[:port]/[virtual_host] +NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI= # DEFAULT: %Y-%m-%d %H:%M:%S # NOPAQUE_LOG_DATE_FORMAT= @@ -146,37 +134,22 @@ NOPAQUE_ADMIN_EMAIL_ADRESS= # CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG # NOPAQUE_LOG_LEVEL= - -################################################################################ -# Message queue # -# MESSAGE_QUEUE_URI blueprint: # -# - transport://[userid:password]@hostname[:port]/[virtual_host] # -# - values in square brackets are optional # -################################################################################ -# DEFAULT: None -# HINT: A message queue is not required when using a single server process -# NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI= - - -################################################################################ -# Proxy fix # -################################################################################ # DEFAULT: 0 # Number of values to trust for X-Forwarded-For -# NOPAQUE_NUM_PROXIES_X_FOR= +# NOPAQUE_PROXY_FIX_X_FOR= # DEFAULT: 0 # Number of values to trust for X-Forwarded-Host -# NOPAQUE_NUM_PROXIES_X_HOST= +# NOPAQUE_PROXY_FIX_X_HOST= # DEFAULT: 0 # Number of values to trust for X-Forwarded-Port -# NOPAQUE_NUM_PROXIES_X_PORT= +# NOPAQUE_PROXY_FIX_X_PORT= # DEFAULT: 0 # Number of values to trust for X-Forwarded-Prefix -# NOPAQUE_NUM_PROXIES_X_PREFIX= +# NOPAQUE_PROXY_FIX_X_PREFIX= # DEFAULT: 0 # Number of values to trust for X-Forwarded-Proto -# NOPAQUE_NUM_PROXIES_X_PROTO= +# NOPAQUE_PROXY_FIX_X_PROTO= diff --git a/docker-compose.yml b/docker-compose.yml index 57f8b5bd..c1d75151 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,5 +28,6 @@ services: image: nopaque:development restart: unless-stopped volumes: + - "/var/run/docker.sock:/var/run/docker.sock" - "${NOPAQUE_DATA_DIR:-/mnt/nopaque}:${NOPAQUE_DATA_DIR:-/mnt/nopaque}" - "${HOST_NOPAQUE_LOG_FILE-./nopaque.log}:${NOPAQUE_LOG_FILE:-/home/nopaque/nopaque.log}" diff --git a/web/Dockerfile b/web/Dockerfile index 4d8037e1..3681b701 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -21,8 +21,9 @@ RUN apt-get update \ && rm -r /var/lib/apt/lists/* -RUN groupadd --gid ${GID} --system nopaque \ - && useradd --create-home --gid ${GID} --no-log-init --system --uid ${UID} nopaque +RUN groupadd --gid ${DOCKER_GID} --system docker \ + && groupadd --gid ${GID} --system nopaque \ + && useradd --create-home --gid ${GID} --groups ${DOCKER_GID} --no-log-init --system --uid ${UID} nopaque USER nopaque WORKDIR /home/nopaque diff --git a/web/app/__init__.py b/web/app/__init__.py index 75108bd3..9a399ddc 100644 --- a/web/app/__init__.py +++ b/web/app/__init__.py @@ -26,7 +26,7 @@ def create_app(config_name): mail.init_app(app) paranoid.init_app(app) socketio.init_app( - app, message_queue=config[config_name].SOCKETIO_MESSAGE_QUEUE_URI) + app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) with app.app_context(): from . import events diff --git a/web/app/admin/__init__.py b/web/app/admin/__init__.py index 40fd29a4..9bb011f8 100644 --- a/web/app/admin/__init__.py +++ b/web/app/admin/__init__.py @@ -2,4 +2,4 @@ from flask import Blueprint admin = Blueprint('admin', __name__) -from . import views # noqa +from . import views diff --git a/web/app/admin/forms.py b/web/app/admin/forms.py index 42706bab..a0f796d6 100644 --- a/web/app/admin/forms.py +++ b/web/app/admin/forms.py @@ -12,4 +12,3 @@ class EditGeneralSettingsAdminForm(EditGeneralSettingsForm): super().__init__(*args, user=user, **kwargs) self.role.choices = [(role.id, role.name) for role in Role.query.order_by(Role.name).all()] - self.user = user diff --git a/web/app/admin/views.py b/web/app/admin/views.py index c3f4e875..798c0df8 100644 --- a/web/app/admin/views.py +++ b/web/app/admin/views.py @@ -29,12 +29,11 @@ def user(user_id): @admin_required def delete_user(user_id): settings_tasks.delete_user(user_id) - flash('User has been deleted!') + flash('User has been marked for deletion!') return redirect(url_for('.users')) -@admin.route('/users//edit_general_settings', - methods=['GET', 'POST']) +@admin.route('/users//edit_general_settings', methods=['GET', 'POST']) # noqa @login_required @admin_required def edit_general_settings(user_id): @@ -46,16 +45,13 @@ def edit_general_settings(user_id): user.username = form.username.data user.confirmed = form.confirmed.data user.role = Role.query.get(form.role.data) - db.session.add(user) db.session.commit() - flash('The profile has been updated.') - return redirect(url_for('admin.edit_general_settings', user_id=user.id)) + flash('Settings have been updated.') + return redirect(url_for('.edit_general_settings', user_id=user.id)) form.confirmed.data = user.confirmed form.dark_mode.data = user.setting_dark_mode form.email.data = user.email form.role.data = user.role_id form.username.data = user.username return render_template('admin/edit_general_settings.html.j2', - form=form, - title='General settings', - user=user) + form=form, title='General settings', user=user) diff --git a/web/app/auth/__init__.py b/web/app/auth/__init__.py index 893d7071..a45dc3f3 100644 --- a/web/app/auth/__init__.py +++ b/web/app/auth/__init__.py @@ -2,4 +2,4 @@ from flask import Blueprint auth = Blueprint('auth', __name__) -from . import views # noqa +from . import views diff --git a/web/app/auth/forms.py b/web/app/auth/forms.py index 3344096b..98dab654 100644 --- a/web/app/auth/forms.py +++ b/web/app/auth/forms.py @@ -18,7 +18,7 @@ class RegistrationForm(FlaskForm): username = StringField( 'Username', validators=[DataRequired(), Length(1, 64), - Regexp(current_app.config['ALLOWED_USERNAME_REGEX'], + Regexp(current_app.config['NOPAQUE_USERNAME_REGEX'], message='Usernames must have only letters, numbers,' ' dots or underscores')] ) diff --git a/web/app/auth/views.py b/web/app/auth/views.py index c1fe37ed..b6355642 100644 --- a/web/app/auth/views.py +++ b/web/app/auth/views.py @@ -1,5 +1,5 @@ -from flask import (current_app, flash, redirect, render_template, request, - url_for) +from datetime import datetime +from flask import abort, flash, redirect, render_template, request, url_for from flask_login import current_user, login_user, login_required, logout_user from . import auth from .forms import (LoginForm, ResetPasswordForm, ResetPasswordRequestForm, @@ -7,8 +7,8 @@ from .forms import (LoginForm, ResetPasswordForm, ResetPasswordRequestForm, from .. import db from ..email import create_message, send from ..models import User +import logging import os -import shutil @auth.before_app_request @@ -18,11 +18,12 @@ def before_request(): unconfirmed view if user is unconfirmed. """ if current_user.is_authenticated: - current_user.ping() - if not current_user.confirmed \ - and request.endpoint \ - and request.blueprint != 'auth' \ - and request.endpoint != 'static': + current_user.last_seen = datetime.utcnow() + db.session.commit() + if (not current_user.confirmed + and request.endpoint + and request.blueprint != 'auth' + and request.endpoint != 'static'): return redirect(url_for('auth.unconfirmed')) @@ -30,20 +31,19 @@ def before_request(): def login(): if current_user.is_authenticated: return redirect(url_for('main.dashboard')) - login_form = LoginForm(prefix='login-form') - if login_form.validate_on_submit(): - user = User.query.filter_by(username=login_form.user.data).first() + form = LoginForm(prefix='login-form') + if form.validate_on_submit(): + user = User.query.filter_by(username=form.user.data).first() if user is None: - user = User.query.filter_by(email=login_form.user.data).first() - if user is not None and user.verify_password(login_form.password.data): - login_user(user, login_form.remember_me.data) + user = User.query.filter_by(email=form.user.data.lower()).first() + if user is not None and user.verify_password(form.password.data): + login_user(user, form.remember_me.data) next = request.args.get('next') if next is None or not next.startswith('/'): next = url_for('main.dashboard') return redirect(next) flash('Invalid email/username or password.') - return render_template('auth/login.html.j2', login_form=login_form, - title='Log in') + return render_template('auth/login.html.j2', form=form, title='Log in') @auth.route('/logout') @@ -58,26 +58,28 @@ def logout(): def register(): if current_user.is_authenticated: return redirect(url_for('main.dashboard')) - registration_form = RegistrationForm(prefix='registration-form') - if registration_form.validate_on_submit(): - user = User(email=registration_form.email.data.lower(), - password=registration_form.password.data, - username=registration_form.username.data) + form = RegistrationForm(prefix='registration-form') + if form.validate_on_submit(): + user = User(email=form.email.data.lower(), + password=form.password.data, + username=form.username.data) db.session.add(user) db.session.commit() - user_dir = os.path.join(current_app.config['DATA_DIR'], - str(user.id)) - if os.path.exists(user_dir): - shutil.rmtree(user_dir) - os.mkdir(user_dir) - token = user.generate_confirmation_token() - msg = create_message(user.email, 'Confirm Your Account', - 'auth/email/confirm', token=token, user=user) - send(msg) - flash('A confirmation email has been sent to you by email.') - return redirect(url_for('auth.login')) - return render_template('auth/register.html.j2', - registration_form=registration_form, + try: + os.makedirs(user.path) + except OSError: + logging.error('Make dir {} led to an OSError!'.format(user.path)) + db.session.delete(user) + db.session.commit() + abort(500) + else: + token = user.generate_confirmation_token() + msg = create_message(user.email, 'Confirm Your Account', + 'auth/email/confirm', token=token, user=user) + send(msg) + flash('A confirmation email has been sent to you by email.') + return redirect(url_for('.login')) + return render_template('auth/register.html.j2', form=form, title='Register') @@ -92,7 +94,7 @@ def confirm(token): return redirect(url_for('main.dashboard')) else: flash('The confirmation link is invalid or has expired.') - return redirect(url_for('auth.unconfirmed')) + return redirect(url_for('.unconfirmed')) @auth.route('/unconfirmed') @@ -119,39 +121,32 @@ def resend_confirmation(): def reset_password_request(): if current_user.is_authenticated: return redirect(url_for('main.dashboard')) - reset_password_request_form = ResetPasswordRequestForm( - prefix='reset-password-request-form') - if reset_password_request_form.validate_on_submit(): - submitted_email = reset_password_request_form.email.data - user = User.query.filter_by(email=submitted_email.lower()).first() - if user: + form = ResetPasswordRequestForm(prefix='reset-password-request-form') + if form.validate_on_submit(): + user = User.query.filter_by(email=form.email.data.lower()).first() + if user is not None: token = user.generate_reset_token() msg = create_message(user.email, 'Reset Your Password', 'auth/email/reset_password', token=token, user=user) send(msg) - flash('An email with instructions to reset your password has been ' - 'sent to you.') - return redirect(url_for('auth.login')) - return render_template( - 'auth/reset_password_request.html.j2', - reset_password_request_form=reset_password_request_form, - title='Password Reset') + flash('An email with instructions to reset your password has been sent to you.') # noqa + return redirect(url_for('.login')) + return render_template('auth/reset_password_request.html.j2', form=form, + title='Password Reset') @auth.route('/reset/', methods=['GET', 'POST']) def reset_password(token): if current_user.is_authenticated: return redirect(url_for('main.dashboard')) - reset_password_form = ResetPasswordForm(prefix='reset-password-form') - if reset_password_form.validate_on_submit(): - if User.reset_password(token, reset_password_form.password.data): + form = ResetPasswordForm(prefix='reset-password-form') + if form.validate_on_submit(): + if User.reset_password(token, form.password.data): db.session.commit() flash('Your password has been updated.') - return redirect(url_for('auth.login')) + return redirect(url_for('.login')) else: return redirect(url_for('main.index')) - return render_template('auth/reset_password.html.j2', - reset_password_form=reset_password_form, - title='Password Reset', - token=token) + return render_template('auth/reset_password.html.j2', form=form, + title='Password Reset', token=token) diff --git a/web/app/corpora/views.py b/web/app/corpora/views.py index a6bef316..13874243 100644 --- a/web/app/corpora/views.py +++ b/web/app/corpora/views.py @@ -1,4 +1,4 @@ -from flask import (abort, current_app, flash, make_response, redirect, request, +from flask import (abort, flash, make_response, redirect, request, render_template, url_for, send_from_directory) from flask_login import current_user, login_required from . import corpora @@ -11,6 +11,7 @@ from jsonschema import validate from .. import db from ..models import Corpus, CorpusFile, QueryResult import json +import logging import os import shutil import glob @@ -22,106 +23,92 @@ from .import_corpus import check_zip_contents @corpora.route('/add', methods=['GET', 'POST']) @login_required def add_corpus(): - add_corpus_form = AddCorpusForm() - if add_corpus_form.validate_on_submit(): + form = AddCorpusForm() + if form.validate_on_submit(): corpus = Corpus(creator=current_user, - description=add_corpus_form.description.data, - status='unprepared', title=add_corpus_form.title.data) + description=form.description.data, + title=form.title.data) db.session.add(corpus) db.session.commit() - dir = os.path.join(current_app.config['DATA_DIR'], - str(corpus.user_id), 'corpora', str(corpus.id)) try: - os.makedirs(dir) + os.makedirs(corpus.path) except OSError: - flash('[ERROR]: Could not add corpus!', 'corpus') - corpus.delete() - else: - url = url_for('corpora.corpus', corpus_id=corpus.id) - flash('[{}] added'.format(url, corpus.title), - 'corpus') - return redirect(url_for('corpora.corpus', corpus_id=corpus.id)) - return render_template('corpora/add_corpus.html.j2', - add_corpus_form=add_corpus_form, + logging.error('Make dir {} led to an OSError!'.format(corpus.path)) + db.session.delete(corpus) + db.session.commit() + abort(500) + flash('Corpus "{}" added!'.format(corpus.title), 'corpus') + return redirect(url_for('.corpus', corpus_id=corpus.id)) + return render_template('corpora/add_corpus.html.j2', form=form, title='Add corpus') @corpora.route('/import', methods=['GET', 'POST']) @login_required def import_corpus(): - import_corpus_form = ImportCorpusForm() - if import_corpus_form.is_submitted(): - if not import_corpus_form.validate(): - return make_response(import_corpus_form.errors, 400) + form = ImportCorpusForm() + if form.is_submitted(): + if not form.validate(): + return make_response(form.errors, 400) corpus = Corpus(creator=current_user, - description=import_corpus_form.description.data, - status='unprepared', - title=import_corpus_form.title.data) + description=form.description.data, + title=form.title.data) db.session.add(corpus) db.session.commit() - dir = os.path.join(current_app.config['DATA_DIR'], - str(corpus.user_id), 'corpora', str(corpus.id)) try: - os.makedirs(dir) + os.makedirs(corpus.path) except OSError: - flash('[ERROR]: Could not import corpus!', 'corpus') - corpus.delete() + logging.error('Make dir {} led to an OSError!'.format(corpus.path)) + db.session.delete(corpus) + db.session.commit() + flash('Internal Server Error', 'error') + return make_response( + {'redirect_url': url_for('.import_corpus')}, 500) + # Upload zip + archive_file = os.path.join(corpus.path, form.file.data.filename) + form.file.data.save(archive_file) + # Some checks to verify it is a valid exported corpus + with ZipFile(archive_file, 'r') as zip: + contents = zip.namelist() + if set(check_zip_contents).issubset(contents): + # Unzip + shutil.unpack_archive(archive_file, corpus.path) + # Register vrt files to corpus + vrts = glob.glob(corpus.path + '/*.vrt') + for file in vrts: + element_tree = ET.parse(file) + text_node = element_tree.find('text') + corpus_file = CorpusFile( + address=text_node.get('address', 'NULL'), + author=text_node.get('author', 'NULL'), + booktitle=text_node.get('booktitle', 'NULL'), + chapter=text_node.get('chapter', 'NULL'), + corpus=corpus, + editor=text_node.get('editor', 'NULL'), + filename=os.path.basename(file), + institution=text_node.get('institution', 'NULL'), + journal=text_node.get('journal', 'NULL'), + pages=text_node.get('pages', 'NULL'), + publisher=text_node.get('publisher', 'NULL'), + publishing_year=text_node.get('publishing_year', ''), + school=text_node.get('school', 'NULL'), + title=text_node.get('title', 'NULL') + ) + db.session.add(corpus_file) + # finish import and redirect to imported corpus + corpus.status = 'prepared' + db.session.commit() + os.remove(archive_file) + flash('Corpus "{}" imported!'.format(corpus.title), 'corpus') + return make_response( + {'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) else: - # Upload zip - archive_file = os.path.join(current_app.config['DATA_DIR'], dir, - import_corpus_form.file.data.filename) - corpus_dir = os.path.dirname(archive_file) - import_corpus_form.file.data.save(archive_file) - # Some checks to verify it is a valid exported corpus - with ZipFile(archive_file, 'r') as zip: - contents = zip.namelist() - if set(check_zip_contents).issubset(contents): - # Unzip - shutil.unpack_archive(archive_file, corpus_dir) - # Register vrt files to corpus - vrts = glob.glob(corpus_dir + '/*.vrt') - for file in vrts: - element_tree = ET.parse(file) - text_node = element_tree.find('text') - corpus_file = CorpusFile( - address=text_node.get('address', 'NULL'), - author=text_node.get('author', 'NULL'), - booktitle=text_node.get('booktitle', 'NULL'), - chapter=text_node.get('chapter', 'NULL'), - corpus=corpus, - dir=dir, - editor=text_node.get('editor', 'NULL'), - filename=os.path.basename(file), - institution=text_node.get('institution', 'NULL'), - journal=text_node.get('journal', 'NULL'), - pages=text_node.get('pages', 'NULL'), - publisher=text_node.get('publisher', 'NULL'), - publishing_year=text_node.get('publishing_year', ''), - school=text_node.get('school', 'NULL'), - title=text_node.get('title', 'NULL')) - db.session.add(corpus_file) - # finish import and got to imported corpus - url = url_for('corpora.corpus', corpus_id=corpus.id) - corpus.status = 'prepared' - db.session.commit() - os.remove(archive_file) - flash('[{}] imported'.format(url, - corpus.title), - 'corpus') - return make_response( - {'redirect_url': url_for('corpora.corpus', - corpus_id=corpus.id)}, - 201) - else: - # If imported zip is not valid delete corpus and give feedback - corpus.delete() - db.session.commit() - flash('Imported corpus is not valid.', 'error') - return make_response( - {'redirect_url': url_for('corpora.import_corpus')}, - 201) - return render_template('corpora/import_corpus.html.j2', - import_corpus_form=import_corpus_form, + # If imported zip is not valid delete corpus and give feedback + flash('Can not import corpus "{}" not imported: Invalid archive file!', 'error') # noqa + tasks.delete_corpus(corpus.id) + return make_response( + {'redirect_url': url_for('.import_corpus')}, 201) + return render_template('corpora/import_corpus.html.j2', form=form, title='Import Corpus') @@ -131,17 +118,9 @@ def corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) - corpus_files = [dict(filename=corpus_file.filename, - author=corpus_file.author, - title=corpus_file.title, - publishing_year=corpus_file.publishing_year, - corpus_id=corpus.id, - id=corpus_file.id) - for corpus_file in corpus.files] - return render_template('corpora/corpus.html.j2', - corpus=corpus, - corpus_files=corpus_files, - title='Corpus') + corpus_files = [corpus_file.to_dict() for corpus_file in corpus.files] + return render_template('corpora/corpus.html.j2', corpus=corpus, + corpus_files=corpus_files, title='Corpus') @corpora.route('//export') @@ -150,12 +129,11 @@ def export_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) + # TODO: Check what happens here dir = os.path.dirname(corpus.archive_file) filename = os.path.basename(corpus.archive_file) - return send_from_directory(directory=dir, - filename=filename, - mimetype='zip', - as_attachment=True) + return send_from_directory(as_attachment=True, directory=dir, + filename=filename, mimetype='zip') @corpora.route('//analyse') @@ -168,7 +146,8 @@ def analyse_corpus(corpus_id): display_options_form = DisplayOptionsForm( prefix='display-options-form', result_context=request.args.get('context', 20), - results_per_page=request.args.get('results_per_page', 30)) + results_per_page=request.args.get('results_per_page', 30) + ) query_form = QueryForm(prefix='query-form', query=request.args.get('query')) query_download_form = QueryDownloadForm(prefix='query-download-form') @@ -177,12 +156,12 @@ def analyse_corpus(corpus_id): return render_template( 'corpora/analyse_corpus.html.j2', corpus=corpus, - corpus_id=corpus_id, display_options_form=display_options_form, + inspect_display_options_form=inspect_display_options_form, query_form=query_form, query_download_form=query_download_form, - inspect_display_options_form=inspect_display_options_form, - title='Corpus analysis') + title='Corpus analysis' + ) @corpora.route('//delete') @@ -191,8 +170,8 @@ def delete_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) + flash('Corpus "{}" marked for deletion!'.format(corpus.title), 'corpus') tasks.delete_corpus(corpus_id) - flash('Corpus deleted!', 'corpus') return redirect(url_for('main.dashboard')) @@ -202,43 +181,33 @@ def add_corpus_file(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) - add_corpus_file_form = AddCorpusFileForm(corpus, - prefix='add-corpus-file-form') - if add_corpus_file_form.is_submitted(): - if not add_corpus_file_form.validate(): - return make_response(add_corpus_file_form.errors, 400) + form = AddCorpusFileForm(corpus, prefix='add-corpus-file-form') + if form.is_submitted(): + if not form.validate(): + return make_response(form.errors, 400) # Save the file - dir = os.path.join(str(corpus.user_id), 'corpora', str(corpus.id)) - add_corpus_file_form.file.data.save( - os.path.join(current_app.config['DATA_DIR'], dir, - add_corpus_file_form.file.data.filename)) - corpus_file = CorpusFile( - address=add_corpus_file_form.address.data, - author=add_corpus_file_form.author.data, - booktitle=add_corpus_file_form.booktitle.data, - chapter=add_corpus_file_form.chapter.data, - corpus=corpus, - dir=dir, - editor=add_corpus_file_form.editor.data, - filename=add_corpus_file_form.file.data.filename, - institution=add_corpus_file_form.institution.data, - journal=add_corpus_file_form.journal.data, - pages=add_corpus_file_form.pages.data, - publisher=add_corpus_file_form.publisher.data, - publishing_year=add_corpus_file_form.publishing_year.data, - school=add_corpus_file_form.school.data, - title=add_corpus_file_form.title.data) + form.file.data.save(os.path.join(corpus.path, form.file.data.filename)) + corpus_file = CorpusFile(address=form.address.data, + author=form.author.data, + booktitle=form.booktitle.data, + chapter=form.chapter.data, + corpus=corpus, + editor=form.editor.data, + filename=form.file.data.filename, + institution=form.institution.data, + journal=form.journal.data, + pages=form.pages.data, + publisher=form.publisher.data, + publishing_year=form.publishing_year.data, + school=form.school.data, + title=form.title.data) db.session.add(corpus_file) corpus.status = 'unprepared' db.session.commit() - flash('Corpus file added!', 'corpus') - return make_response( - {'redirect_url': url_for('corpora.corpus', corpus_id=corpus.id)}, - 201) - return render_template('corpora/add_corpus_file.html.j2', - corpus=corpus, - add_corpus_file_form=add_corpus_file_form, - title='Add corpus file') + flash('Corpus file "{}" added!'.format(corpus_file.filename), 'corpus') + return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) # noqa + return render_template('corpora/add_corpus_file.html.j2', corpus=corpus, + form=form, title='Add corpus file') @corpora.route('//files//delete') @@ -250,9 +219,9 @@ def delete_corpus_file(corpus_id, corpus_file_id): if not (corpus_file.corpus.creator == current_user or current_user.is_administrator()): abort(403) + flash('Corpus file "{}" marked for deletion!'.format(corpus_file.filename), 'corpus') # noqa tasks.delete_corpus_file(corpus_file_id) - flash('Corpus file deleted!', 'corpus') - return redirect(url_for('corpora.corpus', corpus_id=corpus_id)) + return redirect(url_for('.corpus', corpus_id=corpus_id)) @corpora.route('//files//download') @@ -264,9 +233,8 @@ def download_corpus_file(corpus_id, corpus_file_id): if not (corpus_file.corpus.creator == current_user or current_user.is_administrator()): abort(403) - dir = os.path.join(current_app.config['DATA_DIR'], - corpus_file.dir) - return send_from_directory(as_attachment=True, directory=dir, + return send_from_directory(as_attachment=True, + directory=corpus_file.corpus.path, filename=corpus_file.filename) @@ -274,48 +242,45 @@ def download_corpus_file(corpus_id, corpus_file_id): methods=['GET', 'POST']) @login_required def corpus_file(corpus_id, corpus_file_id): - corpus = Corpus.query.get_or_404(corpus_id) corpus_file = CorpusFile.query.get_or_404(corpus_file_id) - if not corpus_file.corpus_id == corpus_id: + if corpus_file.corpus_id != corpus_id: abort(404) if not (corpus_file.corpus.creator == current_user or current_user.is_administrator()): abort(403) - edit_corpus_file_form = EditCorpusFileForm(prefix='edit-corpus-file-form') - if edit_corpus_file_form.validate_on_submit(): - corpus_file.address = edit_corpus_file_form.address.data - corpus_file.author = edit_corpus_file_form.author.data - corpus_file.booktitle = edit_corpus_file_form.booktitle.data - corpus_file.chapter = edit_corpus_file_form.chapter.data - corpus_file.editor = edit_corpus_file_form.editor.data - corpus_file.institution = edit_corpus_file_form.institution.data - corpus_file.journal = edit_corpus_file_form.journal.data - corpus_file.pages = edit_corpus_file_form.pages.data - corpus_file.publisher = edit_corpus_file_form.publisher.data - corpus_file.publishing_year = \ - edit_corpus_file_form.publishing_year.data - corpus_file.school = edit_corpus_file_form.school.data - corpus_file.title = edit_corpus_file_form.title.data + form = EditCorpusFileForm(prefix='edit-corpus-file-form') + if form.validate_on_submit(): + corpus_file.address = form.address.data + corpus_file.author = form.author.data + corpus_file.booktitle = form.booktitle.data + corpus_file.chapter = form.chapter.data + corpus_file.editor = form.editor.data + corpus_file.institution = form.institution.data + corpus_file.journal = form.journal.data + corpus_file.pages = form.pages.data + corpus_file.publisher = form.publisher.data + corpus_file.publishing_year = form.publishing_year.data + corpus_file.school = form.school.data + corpus_file.title = form.title.data corpus.status = 'unprepared' db.session.commit() - flash('Corpus file edited!', 'corpus') - return redirect(url_for('corpora.corpus', corpus_id=corpus_id)) + flash('Corpus file "{}" edited!'.format(corpus_file.filename), 'corpus') # noqa + return redirect(url_for('.corpus', corpus_id=corpus_id)) # If no form is submitted or valid, fill out fields with current values - edit_corpus_file_form.address.data = corpus_file.address - edit_corpus_file_form.author.data = corpus_file.author - edit_corpus_file_form.booktitle.data = corpus_file.booktitle - edit_corpus_file_form.chapter.data = corpus_file.chapter - edit_corpus_file_form.editor.data = corpus_file.editor - edit_corpus_file_form.institution.data = corpus_file.institution - edit_corpus_file_form.journal.data = corpus_file.journal - edit_corpus_file_form.pages.data = corpus_file.pages - edit_corpus_file_form.publisher.data = corpus_file.publisher - edit_corpus_file_form.publishing_year.data = corpus_file.publishing_year - edit_corpus_file_form.school.data = corpus_file.school - edit_corpus_file_form.title.data = corpus_file.title - return render_template('corpora/corpus_file.html.j2', - corpus_file=corpus_file, corpus=corpus, - edit_corpus_file_form=edit_corpus_file_form, + form.address.data = corpus_file.address + form.author.data = corpus_file.author + form.booktitle.data = corpus_file.booktitle + form.chapter.data = corpus_file.chapter + form.editor.data = corpus_file.editor + form.institution.data = corpus_file.institution + form.journal.data = corpus_file.journal + form.pages.data = corpus_file.pages + form.publisher.data = corpus_file.publisher + form.publishing_year.data = corpus_file.publishing_year + form.school.data = corpus_file.school + form.title.data = corpus_file.title + return render_template('corpora/corpus_file.html.j2', corpus=corpus, + corpus_file=corpus_file, form=form, title='Edit corpus file') @@ -327,10 +292,10 @@ def prepare_corpus(corpus_id): abort(403) if corpus.files.all(): tasks.build_corpus(corpus_id) - flash('Building Corpus...', 'corpus') + flash('Corpus "{}" has been marked to get build!', 'corpus') else: - flash('Can not build corpus, please add corpus file(s).', 'corpus') - return redirect(url_for('corpora.corpus', corpus_id=corpus_id)) + flash('Can not build corpus "{}": No corpus file(s)!', 'error') + return redirect(url_for('.corpus', corpus_id=corpus_id)) # Following are view functions to add, view etc. exported results. @@ -340,35 +305,29 @@ def add_query_result(): ''' View to import a result as a json file. ''' - add_query_result_form = AddQueryResultForm(prefix='add-query-result-form') - if add_query_result_form.is_submitted(): - if not add_query_result_form.validate(): - return make_response(add_query_result_form.errors, 400) - query_result = QueryResult( - creator=current_user, - description=add_query_result_form.description.data, - filename=add_query_result_form.file.data.filename, - title=add_query_result_form.title.data - ) + form = AddQueryResultForm(prefix='add-query-result-form') + if form.is_submitted(): + if not form.validate(): + return make_response(form.errors, 400) + query_result = QueryResult(creator=current_user, + description=form.description.data, + filename=form.file.data.filename, + title=form.title.data) db.session.add(query_result) db.session.commit() - # create paths to save the uploaded json file - query_result_dir = os.path.join(current_app.config['DATA_DIR'], - str(current_user.id), - 'query_results', - str(query_result.id)) try: - os.makedirs(query_result_dir) - except Exception: + os.makedirs(query_result.path) + except OSError: + logging.error('Make dir {} led to an OSError!'.format(query_result.path)) # noqa db.session.delete(query_result) db.session.commit() flash('Internal Server Error', 'error') - redirect_url = url_for('corpora.add_query_result') - return make_response({'redirect_url': redirect_url}, 500) + return make_response( + {'redirect_url': url_for('.add_query_result')}, 500) # save the uploaded file - query_result_file_path = os.path.join(query_result_dir, + query_result_file_path = os.path.join(query_result.path, query_result.filename) - add_query_result_form.file.data.save(query_result_file_path) + form.file.data.save(query_result_file_path) # parse json from file with open(query_result_file_path, 'r') as file: query_result_file_content = json.load(file) @@ -381,19 +340,16 @@ def add_query_result(): except Exception: tasks.delete_query_result(query_result.id) flash('Uploaded file is invalid', 'result') - redirect_url = url_for('corpora.add_query_result') - return make_response({'redirect_url': redirect_url}, 201) + return make_response( + {'redirect_url': url_for('.add_query_result')}, 201) query_result_file_content.pop('matches') query_result_file_content.pop('cpos_lookup') query_result.query_metadata = query_result_file_content db.session.commit() flash('Query result added!', 'result') - redirect_url = url_for('corpora.query_result', - query_result_id=query_result.id) - return make_response({'redirect_url': redirect_url}, 201) + return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa return render_template('corpora/query_results/add_query_result.html.j2', - add_query_result_form=add_query_result_form, - title='Add query result') + form=form, title='Add query result') @corpora.route('/result/') @@ -404,8 +360,7 @@ def query_result(query_result_id): or current_user.is_administrator()): abort(403) return render_template('corpora/query_results/query_result.html.j2', - query_result=query_result, - title='Query result') + query_result=query_result, title='Query result') @corpora.route('/result//inspect') @@ -427,13 +382,7 @@ def inspect_query_result(query_result_id): inspect_display_options_form = InspectDisplayOptionsForm( prefix='inspect-display-options-form' ) - query_result_file_path = os.path.join( - current_app.config['DATA_DIR'], - str(current_user.id), - 'query_results', - str(query_result.id), - query_result.filename - ) + query_result_file_path = os.path.join(query_result.path, query_result.filename) # noqa with open(query_result_file_path, 'r') as query_result_file: query_result_file_content = json.load(query_result_file) return render_template('corpora/query_results/inspect.html.j2', @@ -452,8 +401,8 @@ def delete_query_result(query_result_id): if not (query_result.creator == current_user or current_user.is_administrator()): abort(403) + flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa tasks.delete_query_result(query_result_id) - flash('Query result deleted!', 'result') return redirect(url_for('services.service', service="corpus_analysis")) @@ -464,10 +413,5 @@ def download_query_result(query_result_id): if not (query_result.creator == current_user or current_user.is_administrator()): abort(403) - query_result_dir = os.path.join(current_app.config['DATA_DIR'], - str(current_user.id), - 'query_results', - str(query_result.id)) - return send_from_directory(as_attachment=True, - directory=query_result_dir, + return send_from_directory(as_attachment=True, directory=query_result.path, filename=query_result.filename) diff --git a/web/app/email.py b/web/app/email.py index 4969b05e..4d9f0036 100644 --- a/web/app/email.py +++ b/web/app/email.py @@ -1,11 +1,11 @@ -from flask import render_template +from flask import current_app, render_template from flask_mail import Message from . import mail from .decorators import background def create_message(recipient, subject, template, **kwargs): - msg = Message('[nopaque] {}'.format(subject), recipients=[recipient]) + msg = Message('{} {}'.format(current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX'], subject), recipients=[recipient]) # noqa msg.body = render_template('{}.txt.j2'.format(template), **kwargs) msg.html = render_template('{}.html.j2'.format(template), **kwargs) return msg diff --git a/web/app/jobs/__init__.py b/web/app/jobs/__init__.py index 43e34614..07e0e1bb 100644 --- a/web/app/jobs/__init__.py +++ b/web/app/jobs/__init__.py @@ -2,4 +2,4 @@ from flask import Blueprint jobs = Blueprint('jobs', __name__) -from . import views # noqa +from . import views diff --git a/web/app/jobs/views.py b/web/app/jobs/views.py index a92013f7..739f153c 100644 --- a/web/app/jobs/views.py +++ b/web/app/jobs/views.py @@ -1,11 +1,10 @@ -from flask import (abort, current_app, flash, redirect, render_template, +from flask import (abort, flash, redirect, render_template, send_from_directory, url_for) from flask_login import current_user, login_required from . import jobs from . import tasks from ..decorators import admin_required from ..models import Job, JobInput, JobResult -import os @jobs.route('/') @@ -14,13 +13,8 @@ def job(job_id): job = Job.query.get_or_404(job_id) if not (job.creator == current_user or current_user.is_administrator()): abort(403) - job_inputs = [dict(filename=input.filename, - id=input.id, - job_id=job.id) - for input in job.inputs] - return render_template('jobs/job.html.j2', - job=job, - job_inputs=job_inputs, + job_inputs = [job_input.to_dict() for job_input in job.inputs] + return render_template('jobs/job.html.j2', job=job, job_inputs=job_inputs, title='Job') @@ -31,7 +25,7 @@ def delete_job(job_id): if not (job.creator == current_user or current_user.is_administrator()): abort(403) tasks.delete_job(job_id) - flash('Job has been deleted!', 'job') + flash('Job has been marked for deletion!', 'job') return redirect(url_for('main.dashboard')) @@ -44,9 +38,8 @@ def download_job_input(job_id, job_input_id): if not (job_input.job.creator == current_user or current_user.is_administrator()): abort(403) - dir = os.path.join(current_app.config['DATA_DIR'], - job_input.dir) - return send_from_directory(as_attachment=True, directory=dir, + return send_from_directory(as_attachment=True, + directory=job_input.job.path, filename=job_input.filename) @@ -56,11 +49,11 @@ def download_job_input(job_id, job_input_id): def restart(job_id): job = Job.query.get_or_404(job_id) if job.status != 'failed': - flash('Could not restart job: status is not "failed"', 'error') + flash('Can not restart job "{}": Status is not "failed"'.format(job.title), 'error') # noqa else: tasks.restart_job(job_id) - flash('Job has been restarted!', 'job') - return redirect(url_for('jobs.job', job_id=job_id)) + flash('Job "{}" has been marked to get restarted!'.format(job.title), 'job') # noqa + return redirect(url_for('.job', job_id=job_id)) @jobs.route('//results//download') @@ -72,7 +65,6 @@ def download_job_result(job_id, job_result_id): if not (job_result.job.creator == current_user or current_user.is_administrator()): abort(403) - dir = os.path.join(current_app.config['DATA_DIR'], - job_result.dir) - return send_from_directory(as_attachment=True, directory=dir, + return send_from_directory(as_attachment=True, + directory=job_result.job.path, filename=job_result.filename) diff --git a/web/app/main/__init__.py b/web/app/main/__init__.py index 74430247..d658fca7 100644 --- a/web/app/main/__init__.py +++ b/web/app/main/__init__.py @@ -2,4 +2,4 @@ from flask import Blueprint main = Blueprint('main', __name__) -from . import views # noqa +from . import views diff --git a/web/app/main/views.py b/web/app/main/views.py index fcc0ed5d..6f3816de 100644 --- a/web/app/main/views.py +++ b/web/app/main/views.py @@ -7,17 +7,16 @@ from ..models import User @main.route('/', methods=['GET', 'POST']) def index(): - login_form = LoginForm(prefix='login-form') - if login_form.validate_on_submit(): - user = User.query.filter_by(username=login_form.user.data).first() + form = LoginForm(prefix='login-form') + if form.validate_on_submit(): + user = User.query.filter_by(username=form.user.data).first() if user is None: - user = User.query.filter_by(email=login_form.user.data).first() - if user is not None and user.verify_password(login_form.password.data): - login_user(user, login_form.remember_me.data) - return redirect(url_for('main.dashboard')) + user = User.query.filter_by(email=form.user.data.lower()).first() + if user is not None and user.verify_password(form.password.data): + login_user(user, form.remember_me.data) + return redirect(url_for('.dashboard')) flash('Invalid email/username or password.') - return render_template('main/index.html.j2', login_form=login_form, - title='nopaque') + return render_template('main/index.html.j2', form=form, title='nopaque') @main.route('/about_and_faq') @@ -31,7 +30,6 @@ def dashboard(): return render_template('main/dashboard.html.j2', title='Dashboard') - @main.route('/news') def news(): return render_template('main/news.html.j2', title='News') @@ -40,12 +38,9 @@ def news(): @main.route('/privacy_policy') def privacy_policy(): return render_template('main/privacy_policy.html.j2', - title=('Information on the processing of personal' - ' data for the nopaque platform (GDPR)')) + title='Privacy statement (GDPR)') @main.route('/terms_of_use') def terms_of_use(): - return render_template('main/terms_of_use.html.j2', - title='General Terms of Use of the platform ' - 'nopaque') + return render_template('main/terms_of_use.html.j2', title='Terms of Use') diff --git a/web/app/models.py b/web/app/models.py index 4f72960a..6af02a2e 100644 --- a/web/app/models.py +++ b/web/app/models.py @@ -7,6 +7,7 @@ from werkzeug.security import generate_password_hash, check_password_hash from werkzeug.utils import secure_filename import xml.etree.ElementTree as ET from . import db, login_manager +import logging import os import shutil @@ -54,7 +55,7 @@ class Role(db.Model): ''' String representation of the Role. For human readability. ''' - return ''.format(role_name=self.name) + return ''.format(self.name) def add_permission(self, perm): ''' @@ -138,6 +139,18 @@ class User(UserMixin, db.Model): cascade='save-update, merge, delete', lazy='dynamic') + @property + def path(self): + return os.path.join(current_app.config['NOPAQUE_DATA_DIR'], str(self.id)) + + @property + def password(self): + raise AttributeError('password is not a readable attribute') + + @password.setter + def password(self, password): + self.password_hash = generate_password_hash(password) + def to_dict(self): return {'id': self.id, 'role_id': self.role_id, @@ -162,7 +175,7 @@ class User(UserMixin, db.Model): ''' String representation of the User. For human readability. ''' - return ''.format(username=self.username) + return ''.format(self.username) def __init__(self, **kwargs): super(User, self).__init__(**kwargs) @@ -220,14 +233,6 @@ class User(UserMixin, db.Model): db.session.add(user) return True - @property - def password(self): - raise AttributeError('password is not a readable attribute') - - @password.setter - def password(self, password): - self.password_hash = generate_password_hash(password) - def verify_password(self, password): return check_password_hash(self.password_hash, password) @@ -244,17 +249,11 @@ class User(UserMixin, db.Model): ''' return self.can(Permission.ADMIN) - def ping(self): - self.last_seen = datetime.utcnow() - db.session.add(self) - def delete(self): ''' Delete the user and its corpora and jobs from database and filesystem. ''' - user_dir = os.path.join(current_app.config['DATA_DIR'], - str(self.id)) - shutil.rmtree(user_dir, ignore_errors=True) + shutil.rmtree(self.path, ignore_errors=True) db.session.delete(self) @@ -280,14 +279,17 @@ class JobInput(db.Model): # Foreign keys job_id = db.Column(db.Integer, db.ForeignKey('jobs.id')) # Fields - dir = db.Column(db.String(255)) filename = db.Column(db.String(255)) + @property + def path(self): + return os.path.join(self.job.path, self.filename) + def __repr__(self): ''' String representation of the JobInput. For human readability. ''' - return ''.format(filename=self.filename) + return ''.format(self.filename) def to_dict(self): return {'id': self.id, @@ -305,14 +307,17 @@ class JobResult(db.Model): # Foreign keys job_id = db.Column(db.Integer, db.ForeignKey('jobs.id')) # Fields - dir = db.Column(db.String(255)) filename = db.Column(db.String(255)) + @property + def path(self): + return os.path.join(self.job.path, self.filename) + def __repr__(self): ''' String representation of the JobResult. For human readability. ''' - return ''.format(filename=self.filename) + return ''.format(self.filename) def to_dict(self): return {'id': self.id, @@ -351,19 +356,16 @@ class Job(db.Model): cascade='save-update, merge, delete') results = db.relationship('JobResult', backref='job', lazy='dynamic', cascade='save-update, merge, delete') - notification_data = db.relationship('NotificationData', - cascade='save-update, merge, delete', - uselist=False, - back_populates='job') # One-to-One relationship - notification_email_data = db.relationship('NotificationEmailData', - cascade='save-update, merge, delete', - back_populates='job') + + @property + def path(self): + return os.path.join(self.creator.path, 'jobs', str(self.id)) def __repr__(self): ''' String representation of the Job. For human readability. ''' - return ''.format(job_title=self.title) + return ''.format(self.title) def create_secure_filename(self): ''' @@ -385,11 +387,7 @@ class Job(db.Model): db.session.commit() sleep(1) db.session.refresh(self) - job_dir = os.path.join(current_app.config['DATA_DIR'], - str(self.user_id), - 'jobs', - str(self.id)) - shutil.rmtree(job_dir, ignore_errors=True) + shutil.rmtree(self.path, ignore_errors=True) db.session.delete(self) def restart(self): @@ -399,12 +397,8 @@ class Job(db.Model): if self.status != 'failed': raise Exception('Could not restart job: status is not "failed"') - job_dir = os.path.join(current_app.config['DATA_DIR'], - str(self.user_id), - 'jobs', - str(self.id)) - shutil.rmtree(os.path.join(job_dir, 'output'), ignore_errors=True) - shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True) + shutil.rmtree(os.path.join(self.path, 'output'), ignore_errors=True) + shutil.rmtree(os.path.join(self.path, 'pyflow.data'), ignore_errors=True) # noqa self.end_date = None self.status = 'submitted' @@ -425,63 +419,6 @@ class Job(db.Model): for result in self.results}} -class NotificationData(db.Model): - ''' - Class to define notification data used for sending a notification mail with - nopaque_notify. - ''' - __tablename__ = 'notification_data' - # Primary key - id = db.Column(db.Integer, primary_key=True) - # Foreign Key - job_id = db.Column(db.Integer, db.ForeignKey('jobs.id')) - # relationships - job = db.relationship('Job', back_populates='notification_data') - # Fields - notified_on = db.Column(db.String(16), default=None) - - def __repr__(self): - ''' - String representation of the NotificationData. For human readability. - ''' - return ''.format(id=self.id) - - def to_dict(self): - return {'id': self.id, - 'job_id': self.job_id, - 'job': self.job, - 'notified': self.notified} - - -class NotificationEmailData(db.Model): - ''' - Class to define data that will be used to send a corresponding Notification - via email. - ''' - __tablename__ = 'notification_email_data' - # Primary Key - id = db.Column(db.Integer, primary_key=True) - # Foreign Key - job_id = db.Column(db.Integer, db.ForeignKey('jobs.id')) - # relationships - job = db.relationship('Job', back_populates='notification_email_data') - notify_status = db.Column(db.String(16), default=None) - creation_date = db.Column(db.DateTime(), default=datetime.utcnow) - - def __repr__(self): - ''' - String representation of the NotificationEmailData. For human readability. - ''' - return ''.format(id=self.id) - - def to_dict(self): - return {'id': self.id, - 'job_id': self.job_id, - 'job': self.job, - 'notify_status': self.notify_status, - 'creation_date': self.creation_date} - - class CorpusFile(db.Model): ''' Class to define Files. @@ -496,7 +433,6 @@ class CorpusFile(db.Model): author = db.Column(db.String(255)) booktitle = db.Column(db.String(255)) chapter = db.Column(db.String(255)) - dir = db.Column(db.String(255)) editor = db.Column(db.String(255)) filename = db.Column(db.String(255)) institution = db.Column(db.String(255)) @@ -507,15 +443,15 @@ class CorpusFile(db.Model): school = db.Column(db.String(255)) title = db.Column(db.String(255)) + @property + def path(self): + return os.path.join(self.corpus.path, self.filename) + def delete(self): - corpus_file_path = os.path.join(current_app.config['DATA_DIR'], - str(self.corpus.user_id), - 'corpora', - str(self.corpus_id), - self.filename) try: - os.remove(corpus_file_path) + os.remove(self.path) except OSError: + logging.error('Removing {} led to an OSError!'.format(self.path)) pass db.session.delete(self) self.corpus.status = 'unprepared' @@ -553,13 +489,17 @@ class Corpus(db.Model): description = db.Column(db.String(255)) last_edited_date = db.Column(db.DateTime(), default=datetime.utcnow) max_nr_of_tokens = db.Column(db.BigInteger, default=2147483647) - status = db.Column(db.String(16)) + status = db.Column(db.String(16), default='unprepared') title = db.Column(db.String(32)) archive_file = db.Column(db.String(255)) # Relationships files = db.relationship('CorpusFile', backref='corpus', lazy='dynamic', cascade='save-update, merge, delete') + @property + def path(self): + return os.path.join(self.creator.path, 'corpora', str(self.id)) + def to_dict(self): return {'id': self.id, 'user_id': self.user_id, @@ -571,19 +511,14 @@ class Corpus(db.Model): 'files': {file.id: file.to_dict() for file in self.files}} def build(self): - corpus_dir = os.path.join(current_app.config['DATA_DIR'], - str(self.user_id), - 'corpora', - str(self.id)) - output_dir = os.path.join(corpus_dir, 'merged') + output_dir = os.path.join(self.path, 'merged') shutil.rmtree(output_dir, ignore_errors=True) os.mkdir(output_dir) master_element_tree = ET.ElementTree( ET.fromstring('\n') ) for corpus_file in self.files: - corpus_file_path = os.path.join(corpus_dir, corpus_file.filename) - element_tree = ET.parse(corpus_file_path) + element_tree = ET.parse(corpus_file.path) text_node = element_tree.find('text') text_node.set('address', corpus_file.address or "NULL") text_node.set('author', corpus_file.author) @@ -597,7 +532,7 @@ class Corpus(db.Model): text_node.set('publishing_year', str(corpus_file.publishing_year)) text_node.set('school', corpus_file.school or "NULL") text_node.set('title', corpus_file.title) - element_tree.write(corpus_file_path) + element_tree.write(corpus_file.path) master_element_tree.getroot().insert(1, text_node) output_file = os.path.join(output_dir, 'corpus.vrt') master_element_tree.write(output_file, @@ -607,18 +542,14 @@ class Corpus(db.Model): self.status = 'submitted' def delete(self): - corpus_dir = os.path.join(current_app.config['DATA_DIR'], - str(self.user_id), - 'corpora', - str(self.id)) - shutil.rmtree(corpus_dir, ignore_errors=True) + shutil.rmtree(self.path, ignore_errors=True) db.session.delete(self) def __repr__(self): ''' String representation of the corpus. For human readability. ''' - return ''.format(corpus_title=self.title) + return ''.format(self.title) class QueryResult(db.Model): @@ -636,12 +567,12 @@ class QueryResult(db.Model): query_metadata = db.Column(db.JSON()) title = db.Column(db.String(32)) + @property + def path(self): + return os.path.join(self.creator.path, 'query_results', str(self.id)) + def delete(self): - query_result_dir = os.path.join(current_app.config['DATA_DIR'], - str(self.user_id), - 'query_results', - str(self.id)) - shutil.rmtree(query_result_dir, ignore_errors=True) + shutil.rmtree(self.path, ignore_errors=True) db.session.delete(self) def to_dict(self): @@ -654,7 +585,7 @@ class QueryResult(db.Model): def __repr__(self): ''' - String representation of the CorpusAnalysisResult. For human readability. + String representation of the QueryResult. For human readability. ''' return ''.format(self.title) diff --git a/web/app/query_results/views.py b/web/app/query_results/views.py deleted file mode 100644 index ff6eae5f..00000000 --- a/web/app/query_results/views.py +++ /dev/null @@ -1,150 +0,0 @@ -from . import query_results -from . import tasks -from .. import db -from ..corpora.forms import DisplayOptionsForm, InspectDisplayOptionsForm -from ..models import QueryResult -from .forms import AddQueryResultForm -from flask import (abort, current_app, flash, make_response, redirect, - render_template, request, send_from_directory, url_for) -from flask_login import current_user, login_required -import json -import os -from jsonschema import validate - - -@query_results.route('/add', methods=['GET', 'POST']) -@login_required -def add_query_result(): - ''' - View to import a result as a json file. - ''' - add_query_result_form = AddQueryResultForm(prefix='add-query-result-form') - if add_query_result_form.is_submitted(): - if not add_query_result_form.validate(): - return make_response(add_query_result_form.errors, 400) - query_result = QueryResult( - creator=current_user, - description=add_query_result_form.description.data, - filename=add_query_result_form.file.data.filename, - title=add_query_result_form.title.data - ) - db.session.add(query_result) - db.session.commit() - # create paths to save the uploaded json file - query_result_dir = os.path.join(current_app.config['DATA_DIR'], - str(current_user.id), - 'query_results', - str(query_result.id)) - try: - os.makedirs(query_result_dir) - except Exception: - db.session.delete(query_result) - db.session.commit() - flash('Internal Server Error', 'error') - redirect_url = url_for('query_results.add_query_result') - return make_response({'redirect_url': redirect_url}, 500) - # save the uploaded file - query_result_file_path = os.path.join(query_result_dir, - query_result.filename) - add_query_result_form.file.data.save(query_result_file_path) - # parse json from file - with open(query_result_file_path, 'r') as file: - query_result_file_content = json.load(file) - # parse json schema - with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa - schema = json.load(file) - try: - # validate imported json file - validate(instance=query_result_file_content, schema=schema) - except Exception: - tasks.delete_query_result(query_result.id) - flash('Uploaded file is invalid', 'result') - redirect_url = url_for('query_results.add_query_result') - return make_response({'redirect_url': redirect_url}, 201) - query_result_file_content.pop('matches') - query_result_file_content.pop('cpos_lookup') - query_result.query_metadata = query_result_file_content - db.session.commit() - flash('Query result added!', 'result') - redirect_url = url_for('query_results.query_result', - query_result_id=query_result.id) - return make_response({'redirect_url': redirect_url}, 201) - return render_template('corpora/query_results/add_query_result.html.j2', - add_query_result_form=add_query_result_form, - title='Add query result') - - -@query_results.route('/') -@login_required -def query_result(query_result_id): - query_result = QueryResult.query.get_or_404(query_result_id) - if not (query_result.creator == current_user - or current_user.is_administrator()): - abort(403) - return render_template('corpora/query_results/query_result.html.j2', - query_result=query_result, - title='Query result') - - -@query_results.route('//inspect') -@login_required -def inspect_query_result(query_result_id): - ''' - View to inspect imported result file in a corpus analysis like interface - ''' - query_result = QueryResult.query.get_or_404(query_result_id) - query_metadata = query_result.query_metadata - if not (query_result.creator == current_user - or current_user.is_administrator()): - abort(403) - display_options_form = DisplayOptionsForm( - prefix='display-options-form', - results_per_page=request.args.get('results_per_page', 30), - result_context=request.args.get('context', 20) - ) - inspect_display_options_form = InspectDisplayOptionsForm( - prefix='inspect-display-options-form' - ) - query_result_file_path = os.path.join( - current_app.config['DATA_DIR'], - str(current_user.id), - 'query_results', - str(query_result.id), - query_result.filename - ) - with open(query_result_file_path, 'r') as query_result_file: - query_result_file_content = json.load(query_result_file) - return render_template('corpora/query_results/inspect.html.j2', - display_options_form=display_options_form, - inspect_display_options_form=inspect_display_options_form, - query_result_file_content=query_result_file_content, - query_metadata=query_metadata, - title='Inspect query result') - - -@query_results.route('//delete') -@login_required -def delete_query_result(query_result_id): - query_result = QueryResult.query.get_or_404(query_result_id) - if not (query_result.creator == current_user - or current_user.is_administrator()): - abort(403) - tasks.delete_query_result(query_result_id) - flash('Query result deleted!', 'result') - return redirect(url_for('services.service', service="corpus_analysis")) - - -@query_results.route('//download') -@login_required -def download_query_result(query_result_id): - query_result = QueryResult.query.get_or_404(query_result_id) - if not (query_result.creator == current_user - or current_user.is_administrator()): - abort(403) - query_result_dir = os.path.join(current_app.config['DATA_DIR'], - str(current_user.id), - 'query_results', - str(query_result.id)) - return send_from_directory(as_attachment=True, - directory=query_result_dir, - filename=query_result.filename) diff --git a/web/app/services/__init__.py b/web/app/services/__init__.py index 0bc0cfb2..ea9a403f 100644 --- a/web/app/services/__init__.py +++ b/web/app/services/__init__.py @@ -2,4 +2,4 @@ from flask import Blueprint services = Blueprint('services', __name__) -from . import views # noqa +from . import views diff --git a/web/app/services/views.py b/web/app/services/views.py index 6fbf2ef0..a6567985 100644 --- a/web/app/services/views.py +++ b/web/app/services/views.py @@ -1,5 +1,4 @@ -from flask import (abort, current_app, flash, make_response, render_template, - url_for) +from flask import abort, flash, make_response, render_template, url_for from flask_login import current_user, login_required from werkzeug.utils import secure_filename from . import services @@ -7,19 +6,20 @@ from .. import db from ..jobs.forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm from ..models import Job, JobInput import json +import logging import os SERVICES = {'corpus_analysis': {'name': 'Corpus analysis'}, 'file-setup': {'name': 'File setup', 'resources': {'mem_mb': 4096, 'n_cores': 4}, - 'add_job_form': AddFileSetupJobForm}, + 'form': AddFileSetupJobForm}, 'nlp': {'name': 'Natural Language Processing', 'resources': {'mem_mb': 4096, 'n_cores': 2}, - 'add_job_form': AddNLPJobForm}, + 'form': AddNLPJobForm}, 'ocr': {'name': 'Optical Character Recognition', 'resources': {'mem_mb': 8192, 'n_cores': 4}, - 'add_job_form': AddOCRJobForm}} + 'form': AddOCRJobForm}} @services.route('/', methods=['GET', 'POST']) @@ -30,54 +30,49 @@ def service(service): if service == 'corpus_analysis': return render_template('services/{}.html.j2'.format(service), title=SERVICES[service]['name']) - add_job_form = SERVICES[service]['add_job_form'](prefix='add-job-form') - if add_job_form.is_submitted(): - if not add_job_form.validate(): - return make_response(add_job_form.errors, 400) + form = SERVICES[service]['form'](prefix='add-job-form') + if form.is_submitted(): + if not form.validate(): + return make_response(form.errors, 400) service_args = [] if service == 'nlp': - service_args.append('-l {}'.format(add_job_form.language.data)) - if add_job_form.check_encoding.data: + service_args.append('-l {}'.format(form.language.data)) + if form.check_encoding.data: service_args.append('--check-encoding') if service == 'ocr': - service_args.append('-l {}'.format(add_job_form.language.data)) - if add_job_form.binarization.data: + service_args.append('-l {}'.format(form.language.data)) + if form.binarization.data: service_args.append('--binarize') job = Job(creator=current_user, - description=add_job_form.description.data, + description=form.description.data, mem_mb=SERVICES[service]['resources']['mem_mb'], n_cores=SERVICES[service]['resources']['n_cores'], service=service, service_args=json.dumps(service_args), - service_version=add_job_form.version.data, - status='preparing', title=add_job_form.title.data) + service_version=form.version.data, + status='preparing', title=form.title.data) if job.service != 'corpus_analysis': job.create_secure_filename() db.session.add(job) db.session.commit() - relative_dir = os.path.join(str(job.user_id), 'jobs', str(job.id)) - absolut_dir = os.path.join(current_app.config['DATA_DIR'], - relative_dir) try: - os.makedirs(absolut_dir) + os.makedirs(job.path) except OSError: - job.delete() - flash('Internal Server Error', 'job') - return make_response({'redirect_url': url_for('services.service', - service=service)}, - 500) + logging.error('Make dir {} led to an OSError!'.format(job.path)) + db.session.delete(job) + db.session.commit() + flash('Internal Server Error', 'error') + return make_response( + {'redirect_url': url_for('.service', service=service)}, 500) else: - for file in add_job_form.files.data: + for file in form.files.data: filename = secure_filename(file.filename) - file.save(os.path.join(absolut_dir, filename)) - job_input = JobInput(dir=relative_dir, filename=filename, - job=job) + job_input = JobInput(dir=job.path, filename=filename, job=job) + file.save(job_input.path) db.session.add(job_input) job.status = 'submitted' db.session.commit() - url = url_for('jobs.job', job_id=job.id) - flash('[{}] added'.format(url, job.title), 'job') + flash('Job "{}" added'.format(job.title), 'job') return make_response( {'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) return render_template('services/{}.html.j2'.format(service), - title=SERVICES[service]['name'], - add_job_form=add_job_form) + form=form, title=SERVICES[service]['name']) diff --git a/web/app/settings/forms.py b/web/app/settings/forms.py index 6f7abeef..5c822fd9 100644 --- a/web/app/settings/forms.py +++ b/web/app/settings/forms.py @@ -35,7 +35,7 @@ class EditGeneralSettingsForm(FlaskForm): 'Benutzername', validators=[DataRequired(), Length(1, 64), - Regexp(current_app.config['ALLOWED_USERNAME_REGEX'], + Regexp(current_app.config['NOPAQUE_USERNAME_REGEX'], message='Usernames must have only letters, numbers,' ' dots or underscores')] ) diff --git a/web/app/settings/views.py b/web/app/settings/views.py index 1bd4a07f..a7fc0b38 100644 --- a/web/app/settings/views.py +++ b/web/app/settings/views.py @@ -1,13 +1,9 @@ -from flask import current_app, flash, redirect, render_template, url_for +from flask import flash, redirect, render_template, url_for from flask_login import current_user, login_required, logout_user from . import settings, tasks from .forms import (ChangePasswordForm, EditGeneralSettingsForm, EditNotificationSettingsForm) from .. import db -from ..decorators import admin_required -from ..models import Role, User -import os -import uuid @settings.route('/') @@ -26,8 +22,7 @@ def change_password(): flash('Your password has been updated.') return redirect(url_for('.change_password')) return render_template('settings/change_password.html.j2', - form=form, - title='Change password') + form=form, title='Change password') @settings.route('/edit_general_settings', methods=['GET', 'POST']) @@ -40,12 +35,12 @@ def edit_general_settings(): current_user.username = form.username.data db.session.commit() flash('Your changes have been saved.') + return redirect(url_for('.edit_general_settings')) form.dark_mode.data = current_user.setting_dark_mode form.email.data = current_user.email form.username.data = current_user.username return render_template('settings/edit_general_settings.html.j2', - form=form, - title='General settings') + form=form, title='General settings') @settings.route('/edit_notification_settings', methods=['GET', 'POST']) @@ -59,13 +54,13 @@ def edit_notification_settings(): form.job_status_site_notifications.data db.session.commit() flash('Your changes have been saved.') + return redirect(url_for('.edit_notification_settings')) form.job_status_mail_notifications.data = \ current_user.setting_job_status_mail_notifications form.job_status_site_notifications.data = \ current_user.setting_job_status_site_notifications return render_template('settings/edit_notification_settings.html.j2', - form=form, - title='Notification settings') + form=form, title='Notification settings') @settings.route('/delete') @@ -76,5 +71,5 @@ def delete(): """ tasks.delete_user(current_user.id) logout_user() - flash('Your account has been deleted!') + flash('Your account has been marked for deletion!') return redirect(url_for('main.index')) diff --git a/web/app/tasks/__init__.py b/web/app/tasks/__init__.py index ba33a1fe..9bd21af6 100644 --- a/web/app/tasks/__init__.py +++ b/web/app/tasks/__init__.py @@ -11,15 +11,11 @@ def check_corpora(): corpora = Corpus.query.all() for corpus in filter(lambda corpus: corpus.status == 'submitted', corpora): corpus_utils.create_build_corpus_service(corpus) - for corpus in filter(lambda corpus: (corpus.status == 'queued' - or corpus.status == 'running'), - corpora): + for corpus in filter(lambda corpus: corpus.status in ['queued', 'running'], corpora): # noqa corpus_utils.checkout_build_corpus_service(corpus) - for corpus in filter(lambda corpus: corpus.status == 'start analysis', - corpora): + for corpus in filter(lambda corpus: corpus.status == 'start analysis', corpora): # noqa corpus_utils.create_cqpserver_container(corpus) - for corpus in filter(lambda corpus: corpus.status == 'stop analysis', - corpora): + for corpus in filter(lambda corpus: corpus.status == 'stop analysis', corpora): # noqa corpus_utils.remove_cqpserver_container(corpus) db.session.commit() @@ -28,8 +24,6 @@ def check_jobs(): jobs = Job.query.all() for job in filter(lambda job: job.status == 'submitted', jobs): job_utils.create_job_service(job) - for job in filter(lambda job: job.status == 'queued', jobs): - job_utils.checkout_job_service(job) - for job in filter(lambda job: job.status == 'running', jobs): + for job in filter(lambda job: job.status in ['queued', 'running'], jobs): job_utils.checkout_job_service(job) db.session.commit() diff --git a/web/app/tasks/corpus_utils.py b/web/app/tasks/corpus_utils.py index c06b19ac..dd37ad62 100644 --- a/web/app/tasks/corpus_utils.py +++ b/web/app/tasks/corpus_utils.py @@ -1,4 +1,3 @@ -from flask import current_app from . import docker_client import docker import logging @@ -7,20 +6,14 @@ import shutil def create_build_corpus_service(corpus): - corpus_dir = os.path.join(current_app.config['DATA_DIR'], - str(corpus.user_id), - 'corpora', - str(corpus.id)) - corpus_data_dir = os.path.join(corpus_dir, 'data') - corpus_file = os.path.join(corpus_dir, 'merged', 'corpus.vrt') - corpus_registry_dir = os.path.join(corpus_dir, 'registry') - if os.path.exists(corpus_data_dir): - shutil.rmtree(corpus_data_dir) - if os.path.exists(corpus_registry_dir): - shutil.rmtree(corpus_registry_dir) + corpus_data_dir = os.path.join(corpus.path, 'data') + shutil.rmtree(corpus_data_dir, ignore_errors=True) os.mkdir(corpus_data_dir) + corpus_registry_dir = os.path.join(corpus.path, 'registry') + shutil.rmtree(corpus_registry_dir, ignore_errors=True) os.mkdir(corpus_registry_dir) - service_args = { + corpus_file = os.path.join(corpus.path, 'merged', 'corpus.vrt') + service_kwargs = { 'command': 'docker-entrypoint.sh build-corpus', 'constraints': ['node.role==worker'], 'labels': {'origin': 'nopaque', @@ -32,30 +25,34 @@ def create_build_corpus_service(corpus): 'name': 'build-corpus_{}'.format(corpus.id), 'restart_policy': docker.types.RestartPolicy() } - service_image = \ - 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/cqpserver:latest' + service_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/cqpserver:latest' # noqa try: - docker_client.services.create(service_image, **service_args) + docker_client.services.create(service_image, **service_kwargs) except docker.errors.APIError as e: - logging.error('create_build_corpus_service({}): '.format(corpus.id) - + '{} (status: {} -> failed)'.format(e, corpus.status)) - corpus.status = 'failed' + logging.error('Create "{}" service raised '.format(service_kwargs['name']) # noqa + + '[docker-APIError] The server returned an error. ' + + 'Details: {}'.format(e)) else: corpus.status = 'queued' - finally: - # TODO: send email - pass def checkout_build_corpus_service(corpus): service_name = 'build-corpus_{}'.format(corpus.id) try: service = docker_client.services.get(service_name) - except docker.errors.NotFound as e: - logging.error('checkout_build_corpus_service({}):'.format(corpus.id) - + ' {} (stauts: {} -> failed)'.format(e, corpus.status)) + except docker.errors.NotFound: + logging.error('Get "{}" service raised '.format(service_name) + + '[docker-NotFound] The service does not exist. ' + + '(corpus.status: {} -> failed)'.format(corpus.status)) corpus.status = 'failed' - # TODO: handle docker.errors.APIError and docker.errors.InvalidVersion + except docker.errors.APIError as e: + logging.error('Get "{}" service raised '.format(service_name) + + '[docker-APIError] The server returned an error. ' + + 'Details: {}'.format(e)) + except docker.errors.InvalidVersion: + logging.error('Get "{}" service raised '.format(service_name) + + '[docker-InvalidVersion] One of the arguments is ' + + 'not supported with the current API version.') else: service_tasks = service.tasks() if not service_tasks: @@ -63,25 +60,23 @@ def checkout_build_corpus_service(corpus): task_state = service_tasks[0].get('Status').get('State') if corpus.status == 'queued' and task_state != 'pending': corpus.status = 'running' - elif corpus.status == 'running' and task_state == 'complete': - service.remove() - corpus.status = 'prepared' - elif corpus.status == 'running' and task_state == 'failed': - service.remove() - corpus.status = task_state - finally: - # TODO: send email - pass + elif corpus.status == 'running' and task_state in ['complete', 'failed']: # noqa + try: + service.remove() + except docker.errors.APIError as e: + logging.error('Remove "{}" service raised '.format(service_name) # noqa + + '[docker-APIError] The server returned an error. ' # noqa + + 'Details: {}'.format(e)) + return + else: + corpus.status = 'prepared' if task_state == 'complete' \ + else 'failed' def create_cqpserver_container(corpus): - corpus_dir = os.path.join(current_app.config['DATA_DIR'], - str(corpus.user_id), - 'corpora', - str(corpus.id)) - corpus_data_dir = os.path.join(corpus_dir, 'data') - corpus_registry_dir = os.path.join(corpus_dir, 'registry') - container_args = { + corpus_data_dir = os.path.join(corpus.path, 'data') + corpus_registry_dir = os.path.join(corpus.path, 'registry') + container_kwargs = { 'command': 'cqpserver', 'detach': True, 'volumes': [corpus_data_dir + ':/corpora/data:rw', @@ -89,20 +84,43 @@ def create_cqpserver_container(corpus): 'name': 'cqpserver_{}'.format(corpus.id), 'network': 'nopaque_default' } - container_image = \ - 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/cqpserver:latest' + container_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/cqpserver:latest' # noqa + # Check if a cqpserver container already exists. If this is the case, + # remove it and create a new one try: - container = docker_client.containers.get(container_args['name']) + container = docker_client.containers.get(container_kwargs['name']) except docker.errors.NotFound: pass - except docker.errors.DockerException: + except docker.errors.APIError as e: + logging.error('Get "{}" container raised '.format(container_kwargs['name']) + + '[docker-APIError] The server returned an error. ' + + 'Details: {}'.format(e)) return else: - container.remove(force=True) + try: + container.remove(force=True) + except docker.errors.APIError as e: + logging.error('Remove "{}" container raised '.format(container_kwargs['name']) + + '[docker-APIError] The server returned an error. ' + + 'Details: {}'.format(e)) + return try: - docker_client.containers.run(container_image, **container_args) - except docker.errors.DockerException: - return + docker_client.containers.run(container_image, **container_kwargs) + except docker.errors.ContainerError: + # This case should not occur, because detach is True. + logging.error('Run "{}" container raised '.format(container_kwargs['name']) + + '[docker-ContainerError] The container exits with a ' + + 'non-zero exit code and detach is False.') + corpus.status = 'failed' + except docker.errors.ImageNotFound: + logging.error('Run "{}" container raised '.format(container_kwargs['name']) + + '[docker-ImageNotFound] The specified image does not ' + + 'exist.') + corpus.status = 'failed' + except docker.errors.APIError as e: + logging.error('Run "{}" container raised '.format(container_kwargs['name']) + + '[docker-APIError] The server returned an error. ' + + 'Details: {}'.format(e)) else: corpus.status = 'analysing' @@ -113,8 +131,17 @@ def remove_cqpserver_container(corpus): container = docker_client.containers.get(container_name) except docker.errors.NotFound: pass - except docker.errors.DockerException: + except docker.errors.APIError as e: + logging.error('Get "{}" container raised '.format(container_name) + + '[docker-APIError] The server returned an error. ' + + 'Details: {}'.format(e)) return else: - container.remove(force=True) + try: + container.remove(force=True) + except docker.errors.APIError as e: + logging.error('Remove "{}" container raised '.format(container_name) + + '[docker-APIError] The server returned an error. ' + + 'Details: {}'.format(e)) + return corpus.status = 'prepared' diff --git a/web/app/tasks/job_utils.py b/web/app/tasks/job_utils.py index 2094e7cd..68db9507 100644 --- a/web/app/tasks/job_utils.py +++ b/web/app/tasks/job_utils.py @@ -1,7 +1,7 @@ from datetime import datetime -from flask import current_app from . import docker_client from .. import db +from ..email import create_message, send from ..models import JobResult import docker import logging @@ -10,51 +10,60 @@ import os def create_job_service(job): - job_dir = os.path.join(current_app.config['DATA_DIR'], - str(job.user_id), - 'jobs', - str(job.id)) cmd = '{} -i /files -o /files/output'.format(job.service) if job.service == 'file-setup': cmd += ' -f {}'.format(job.secure_filename) cmd += ' --log-dir /files' cmd += ' --zip [{}]_{}'.format(job.service, job.secure_filename) cmd += ' ' + ' '.join(json.loads(job.service_args)) - service_args = {'command': cmd, - 'constraints': ['node.role==worker'], - 'labels': {'origin': 'nopaque', - 'type': 'service.{}'.format(job.service), - 'job_id': str(job.id)}, - 'mounts': [job_dir + ':/files:rw'], - 'name': 'job_{}'.format(job.id), - 'resources': docker.types.Resources( - cpu_reservation=job.n_cores * (10 ** 9), - mem_reservation=job.mem_mb * (10 ** 6)), - 'restart_policy': docker.types.RestartPolicy()} + service_kwargs = {'command': cmd, + 'constraints': ['node.role==worker'], + 'labels': {'origin': 'nopaque', + 'type': 'service.{}'.format(job.service), + 'job_id': str(job.id)}, + 'mounts': [job.path + ':/files:rw'], + 'name': 'job_{}'.format(job.id), + 'resources': docker.types.Resources( + cpu_reservation=job.n_cores * (10 ** 9), + mem_reservation=job.mem_mb * (10 ** 6) + ), + 'restart_policy': docker.types.RestartPolicy()} service_image = ('gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/' + job.service + ':' + job.service_version) try: - docker_client.services.create(service_image, **service_args) + docker_client.services.create(service_image, **service_kwargs) except docker.errors.APIError as e: - logging.error('create_job_service({}): {} '.format(job.id, e) - + '(status: {} -> failed)'.format(job.status)) - job.status = 'failed' + logging.error('Create "{}" service raised '.format(service_kwargs['name']) # noqa + + '[docker-APIError] The server returned an error. ' + + 'Details: {}'.format(e)) else: job.status = 'queued' - finally: - # TODO: send email - pass + msg = create_message( + job.creator.email, + 'Status update for your Job "{}"'.format(job.title), + 'tasks/email/notification', + job=job + ) + send(msg) def checkout_job_service(job): service_name = 'job_{}'.format(job.id) try: service = docker_client.services.get(service_name) - except docker.errors.NotFound as e: - logging.error('checkout_job_service({}): {} '.format(job.id, e) - + '(status: {} -> submitted)'.format(job.status)) - job.status = 'submitted' - # TODO: handle docker.errors.APIError and docker.errors.InvalidVersion + except docker.errors.NotFound: + logging.error('Get "{}" service raised '.format(service_name) + + '[docker-NotFound] The service does not exist. ' + + '(job.status: {} -> failed)'.format(job.status)) + job.status = 'failed' + except docker.errors.APIError as e: + logging.error('Get "{}" service raised '.format(service_name) + + '[docker-APIError] The server returned an error. ' + + 'Details: {}'.format(e)) + except docker.errors.InvalidVersion: + logging.error('Get "{}" service raised '.format(service_name) + + '[docker-InvalidVersion] One of the arguments is ' + + 'not supported with the current API version.') else: service_tasks = service.tasks() if not service_tasks: @@ -62,22 +71,16 @@ def checkout_job_service(job): task_state = service_tasks[0].get('Status').get('State') if job.status == 'queued' and task_state != 'pending': job.status = 'running' - elif job.status == 'queued' and task_state == 'complete': + elif job.status == 'running' and task_state == 'complete': service.remove() job.end_date = datetime.utcnow() job.status = task_state if task_state == 'complete': - results_dir = os.path.join(current_app.config['DATA_DIR'], - str(job.user_id), - 'jobs', - str(job.id), - 'output') - results = filter(lambda x: x.endswith('.zip'), - os.listdir(results_dir)) - for result in results: - job_result = JobResult(dir=results_dir, - filename=result, - job_id=job.id) + job_results_dir = os.path.join(job.path, 'output') + job_results = filter(lambda x: x.endswith('.zip'), + os.listdir(job_results_dir)) + for job_result in job_results: + job_result = JobResult(filename=job_result, job=job) db.session.add(job_result) elif job.status == 'running' and task_state == 'failed': service.remove() @@ -85,6 +88,13 @@ def checkout_job_service(job): job.status = task_state finally: # TODO: send email + msg = create_message( + job.creator.email, + '[nopaque] Status update for your Job "{}"'.format(job.title), + 'tasks/email/notification', + job=job + ) + send(msg) pass diff --git a/web/app/templates/auth/login.html.j2 b/web/app/templates/auth/login.html.j2 index fa99f31a..db83e8b0 100644 --- a/web/app/templates/auth/login.html.j2 +++ b/web/app/templates/auth/login.html.j2 @@ -35,20 +35,20 @@
- {{ login_form.hidden_tag() }} - {{ wtf.render_field(login_form.user, material_icon='person') }} - {{ wtf.render_field(login_form.password, material_icon='vpn_key') }} + {{ form.hidden_tag() }} + {{ wtf.render_field(form.user, material_icon='person') }} + {{ wtf.render_field(form.password, material_icon='vpn_key') }}
- {{ wtf.render_field(login_form.remember_me) }} + {{ wtf.render_field(form.remember_me) }}
- {{ wtf.render_field(login_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}
diff --git a/web/app/templates/auth/register.html.j2 b/web/app/templates/auth/register.html.j2 index e41990b3..7b8db08d 100644 --- a/web/app/templates/auth/register.html.j2 +++ b/web/app/templates/auth/register.html.j2 @@ -34,14 +34,14 @@
- {{ registration_form.hidden_tag() }} - {{ wtf.render_field(registration_form.username, data_length='64', material_icon='person') }} - {{ wtf.render_field(registration_form.password, data_length='128', material_icon='vpn_key') }} - {{ wtf.render_field(registration_form.password_confirmation, data_length='128', material_icon='vpn_key') }} - {{ wtf.render_field(registration_form.email, class_='validate', material_icon='email', type='email') }} + {{ form.hidden_tag() }} + {{ wtf.render_field(form.username, data_length='64', material_icon='person') }} + {{ wtf.render_field(form.password, data_length='128', material_icon='vpn_key') }} + {{ wtf.render_field(form.password_confirmation, data_length='128', material_icon='vpn_key') }} + {{ wtf.render_field(form.email, class_='validate', material_icon='email', type='email') }}
- {{ wtf.render_field(registration_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}
diff --git a/web/app/templates/auth/reset_password.html.j2 b/web/app/templates/auth/reset_password.html.j2 index 21da27db..4002c158 100644 --- a/web/app/templates/auth/reset_password.html.j2 +++ b/web/app/templates/auth/reset_password.html.j2 @@ -20,12 +20,12 @@
- {{ reset_password_form.hidden_tag() }} - {{ wtf.render_field(reset_password_form.password, data_length='128') }} - {{ wtf.render_field(reset_password_form.password_confirmation, data_length='128') }} + {{ form.hidden_tag() }} + {{ wtf.render_field(form.password, data_length='128') }} + {{ wtf.render_field(form.password_confirmation, data_length='128') }}
- {{ wtf.render_field(reset_password_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}
diff --git a/web/app/templates/auth/reset_password_request.html.j2 b/web/app/templates/auth/reset_password_request.html.j2 index 0a2baf9f..07a0808e 100644 --- a/web/app/templates/auth/reset_password_request.html.j2 +++ b/web/app/templates/auth/reset_password_request.html.j2 @@ -20,11 +20,11 @@
- {{ reset_password_request_form.hidden_tag() }} - {{ wtf.render_field(reset_password_request_form.email, class_='validate', material_icon='email', type='email') }} + {{ form.hidden_tag() }} + {{ wtf.render_field(form.email, class_='validate', material_icon='email', type='email') }}
- {{ wtf.render_field(reset_password_request_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}
diff --git a/web/app/templates/corpora/add_corpus.html.j2 b/web/app/templates/corpora/add_corpus.html.j2 index 219b098f..c5e5e3b9 100644 --- a/web/app/templates/corpora/add_corpus.html.j2 +++ b/web/app/templates/corpora/add_corpus.html.j2 @@ -27,18 +27,18 @@
- {{ add_corpus_form.hidden_tag() }} + {{ form.hidden_tag() }}
- {{ wtf.render_field(add_corpus_form.title, data_length='32', material_icon='title') }} + {{ wtf.render_field(form.title, data_length='32', material_icon='title') }}
- {{ wtf.render_field(add_corpus_form.description, data_length='255', material_icon='description') }} + {{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
- {{ wtf.render_field(add_corpus_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}
diff --git a/web/app/templates/corpora/add_corpus_file.html.j2 b/web/app/templates/corpora/add_corpus_file.html.j2 index 4ca54776..14b09304 100644 --- a/web/app/templates/corpora/add_corpus_file.html.j2 +++ b/web/app/templates/corpora/add_corpus_file.html.j2 @@ -27,24 +27,24 @@
- {{ add_corpus_file_form.hidden_tag() }} + {{ form.hidden_tag() }}
- {{ wtf.render_field(add_corpus_file_form.author, data_length='255', material_icon='person') }} + {{ wtf.render_field(form.author, data_length='255', material_icon='person') }}
- {{ wtf.render_field(add_corpus_file_form.title, data_length='255', material_icon='title') }} + {{ wtf.render_field(form.title, data_length='255', material_icon='title') }}
- {{ wtf.render_field(add_corpus_file_form.publishing_year, material_icon='access_time') }} + {{ wtf.render_field(form.publishing_year, material_icon='access_time') }}
- {{ wtf.render_field(add_corpus_file_form.file, accept='.vrt', placeholder='Choose your .vrt file') }} + {{ wtf.render_field(form.file, accept='.vrt', placeholder='Choose your .vrt file') }}
- {{ wtf.render_field(add_corpus_file_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}

@@ -52,7 +52,7 @@
  • addAdd additional metadata
    - {% for field in add_corpus_file_form + {% for field in form if field.short_name not in ['author', 'csrf_token', 'file', 'publishing_year', 'submit', 'title'] %} {{ wtf.render_field(field, data_length='255', material_icon=field.label.text[0:1]) }} {% endfor %} diff --git a/web/app/templates/corpora/analyse_corpus.html.j2 b/web/app/templates/corpora/analyse_corpus.html.j2 index 29dba027..af44f86f 100644 --- a/web/app/templates/corpora/analyse_corpus.html.j2 +++ b/web/app/templates/corpora/analyse_corpus.html.j2 @@ -155,7 +155,7 @@ import { */ document.addEventListener("DOMContentLoaded", () => { // Initialize the client for server client communication in dynamic mode - let corpusId = {{ corpus_id }} + let corpusId = {{ corpus.id }} const client = new Client({'corpusId': corpusId, 'socket': nopaque.socket, 'logging': true, diff --git a/web/app/templates/corpora/corpus_file.html.j2 b/web/app/templates/corpora/corpus_file.html.j2 index 7548604b..d8022303 100644 --- a/web/app/templates/corpora/corpus_file.html.j2 +++ b/web/app/templates/corpora/corpus_file.html.j2 @@ -20,23 +20,23 @@
    - {{ edit_corpus_file_form.hidden_tag() }} + {{ form.hidden_tag() }}
    - {{ wtf.render_field(edit_corpus_file_form.author, data_length='255', material_icon='person') }} + {{ wtf.render_field(form.author, data_length='255', material_icon='person') }}
    - {{ wtf.render_field(edit_corpus_file_form.title, data_length='255', material_icon='title') }} + {{ wtf.render_field(form.title, data_length='255', material_icon='title') }}
    - {{ wtf.render_field(edit_corpus_file_form.publishing_year, material_icon='access_time') }} + {{ wtf.render_field(form.publishing_year, material_icon='access_time') }}
    - {{ wtf.render_field(edit_corpus_file_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}

    @@ -44,7 +44,7 @@
  • editEdit additional metadata
    - {% for field in edit_corpus_file_form + {% for field in form if field.short_name not in ['author', 'csrf_token', 'publishing_year', 'submit', 'title'] %} {{ wtf.render_field(field, data_length='255', material_icon=field.label.text[0:1]) }} {% endfor %} diff --git a/web/app/templates/corpora/import_corpus.html.j2 b/web/app/templates/corpora/import_corpus.html.j2 index 0bc47d8e..1a8caf08 100644 --- a/web/app/templates/corpora/import_corpus.html.j2 +++ b/web/app/templates/corpora/import_corpus.html.j2 @@ -1,4 +1,4 @@ -{% extends "nopaque.html.j2" %} + {% extends "nopaque.html.j2" %} {% from '_colors.html.j2' import colors %} {% import 'materialize/wtf.html.j2' as wtf %} @@ -27,23 +27,23 @@
    - {{ import_corpus_form.hidden_tag() }} + {{ form.hidden_tag() }}
    - {{ wtf.render_field(import_corpus_form.title, data_length='32', material_icon='title') }} + {{ wtf.render_field(form.title, data_length='32', material_icon='title') }}
    - {{ wtf.render_field(import_corpus_form.description, data_length='255', material_icon='description') }} + {{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
    - {{ wtf.render_field(import_corpus_form.file, accept='.zip', placeholder='Choose your exported .zip file') }} + {{ wtf.render_field(form.file, accept='.zip', placeholder='Choose your exported .zip file') }}
    - {{ wtf.render_field(import_corpus_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}
    diff --git a/web/app/templates/corpora/query_results/add_query_result.html.j2 b/web/app/templates/corpora/query_results/add_query_result.html.j2 index 97a83983..6ad4ebd6 100644 --- a/web/app/templates/corpora/query_results/add_query_result.html.j2 +++ b/web/app/templates/corpora/query_results/add_query_result.html.j2 @@ -27,21 +27,21 @@
    - {{ add_query_result_form.hidden_tag() }} + {{ form.hidden_tag() }}
    - {{ wtf.render_field(add_query_result_form.title, data_length='32', material_icon='title') }} + {{ wtf.render_field(form.title, data_length='32', material_icon='title') }}
    - {{ wtf.render_field(add_query_result_form.description, data_length='255', material_icon='description') }} + {{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
    - {{ wtf.render_field(add_query_result_form.file, accept='.json', placeholder='Choose your .json file') }} + {{ wtf.render_field(form.file, accept='.json', placeholder='Choose your .json file') }}
    - {{ wtf.render_field(add_query_result_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}
    diff --git a/web/app/templates/main/index.html.j2 b/web/app/templates/main/index.html.j2 index 8268de6a..0ded9824 100644 --- a/web/app/templates/main/index.html.j2 +++ b/web/app/templates/main/index.html.j2 @@ -159,20 +159,20 @@
    Log in - {{ login_form.hidden_tag() }} - {{ wtf.render_field(login_form.user, material_icon='person') }} - {{ wtf.render_field(login_form.password, material_icon='vpn_key') }} + {{ form.hidden_tag() }} + {{ wtf.render_field(form.user, material_icon='person') }} + {{ wtf.render_field(form.password, material_icon='vpn_key') }}
    - {{ wtf.render_field(login_form.remember_me) }} + {{ wtf.render_field(form.remember_me) }}
    - {{ wtf.render_field(login_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}
    diff --git a/web/app/templates/nopaque.html.j2 b/web/app/templates/nopaque.html.j2 index 81eadb6f..a54fda33 100644 --- a/web/app/templates/nopaque.html.j2 +++ b/web/app/templates/nopaque.html.j2 @@ -231,9 +231,9 @@
    info_outlineAbout and faq - {% if config.CONTACT_EMAIL_ADRESS %} - rate_reviewContact - feedbackFeedback + {% if config.NOPAQUE_CONTACT %} + rate_reviewContact + feedbackFeedback {% endif %} codeGitLab
    diff --git a/web/app/templates/services/file-setup.html.j2 b/web/app/templates/services/file-setup.html.j2 index 31f5e824..2674545c 100644 --- a/web/app/templates/services/file-setup.html.j2 +++ b/web/app/templates/services/file-setup.html.j2 @@ -48,24 +48,24 @@
    - {{ add_job_form.hidden_tag() }} + {{ form.hidden_tag() }}
    - {{ wtf.render_field(add_job_form.title, data_length='32', material_icon='title') }} + {{ wtf.render_field(form.title, data_length='32', material_icon='title') }}
    - {{ wtf.render_field(add_job_form.description, data_length='255', material_icon='description') }} + {{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
    - {{ wtf.render_field(add_job_form.files, accept='image/jpeg, image/png, image/tiff', placeholder='Choose your .jpeg, .png or .tiff files') }} + {{ wtf.render_field(form.files, accept='image/jpeg, image/png, image/tiff', placeholder='Choose your .jpeg, .png or .tiff files') }}
    - {{ wtf.render_field(add_job_form.version, material_icon='apps') }} + {{ wtf.render_field(form.version, material_icon='apps') }}
    - {{ wtf.render_field(add_job_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}
    diff --git a/web/app/templates/services/nlp.html.j2 b/web/app/templates/services/nlp.html.j2 index 83d83396..4c5018bc 100644 --- a/web/app/templates/services/nlp.html.j2 +++ b/web/app/templates/services/nlp.html.j2 @@ -66,34 +66,34 @@
    - {{ add_job_form.hidden_tag() }} + {{ form.hidden_tag() }}
    - {{ wtf.render_field(add_job_form.title, data_length='32', material_icon='title') }} + {{ wtf.render_field(form.title, data_length='32', material_icon='title') }}
    - {{ wtf.render_field(add_job_form.description, data_length='255', material_icon='description') }} + {{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
    - {{ wtf.render_field(add_job_form.files, accept='text/plain', placeholder='Choose your .txt files') }} + {{ wtf.render_field(form.files, accept='text/plain', placeholder='Choose your .txt files') }}
    - {{ wtf.render_field(add_job_form.language, material_icon='language') }} + {{ wtf.render_field(form.language, material_icon='language') }}
    - {{ wtf.render_field(add_job_form.version, material_icon='apps') }} + {{ wtf.render_field(form.version, material_icon='apps') }}
    Preprocessing
    -

    {{ add_job_form.check_encoding.label.text }}

    +

    {{ form.check_encoding.label.text }}

    If the input files are not created with the nopaque OCR service or you do not know if your text files are UTF-8 encoded, check this switch. We will try to automatically determine the right encoding for your texts to process them.

    @@ -107,7 +107,7 @@
    - {{ wtf.render_field(add_job_form.submit, material_icon='send') }} + {{ wtf.render_field(form.submit, material_icon='send') }}
    diff --git a/web/app/templates/services/ocr.html.j2 b/web/app/templates/services/ocr.html.j2 index 00608e0d..09759e0c 100644 --- a/web/app/templates/services/ocr.html.j2 +++ b/web/app/templates/services/ocr.html.j2 @@ -48,34 +48,34 @@
    - {{ add_job_form.hidden_tag() }} + {{ form.hidden_tag() }}
    - {{ wtf.render_field(add_job_form.title, data_length='32', material_icon='title') }} + {{ wtf.render_field(form.title, data_length='32', material_icon='title') }}
    - {{ wtf.render_field(add_job_form.description, data_length='255', material_icon='description') }} + {{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
    - {{ wtf.render_field(add_job_form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }} + {{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
    - {{ wtf.render_field(add_job_form.language, material_icon='language') }} + {{ wtf.render_field(form.language, material_icon='language') }}
    - {{ wtf.render_field(add_job_form.version, material_icon='apps') }} + {{ wtf.render_field(form.version, material_icon='apps') }}
    Preprocessing
    -

    {{ add_job_form.binarization.label.text }}

    +

    {{ form.binarization.label.text }}

    Based on a brightness threshold pixels are converted into either black or white. It is useful to reduce noise in images. (longer duration)

    @@ -134,7 +134,7 @@
    - {{ wtf.render_field(add_job_form.submit, color=ocr_color_darken, material_icon='send') }} + {{ wtf.render_field(form.submit, color=ocr_color_darken, material_icon='send') }}
    diff --git a/web/app/templates/tasks/email/notification.html.j2 b/web/app/templates/tasks/email/notification.html.j2 index 79f0e2dd..1aac0bf7 100644 --- a/web/app/templates/tasks/email/notification.html.j2 +++ b/web/app/templates/tasks/email/notification.html.j2 @@ -1,9 +1,8 @@ -

    Dear {{ user.username }},

    +

    Dear {{ job.creator.username }},

    -

    The status of your Job/Corpus({{ job.id }}) with the title "{{ job.title }}" has changed!

    +

    The status of your Job "{{ job.title }}" has changed!

    It is now {{ job.status }}!

    -

    Time of this status update was: {time} UTC

    -

    You can access your Job/Corpus here: {{ url_for('jobs.job', job_id=job.id) }}

    +

    You can access your Job here: {{ url_for('jobs.job', job_id=job.id) }}

    Kind regards!
    Your nopaque team

    diff --git a/web/app/templates/tasks/email/notification.txt.j2 b/web/app/templates/tasks/email/notification.txt.j2 index 25d797c8..03012b3e 100644 --- a/web/app/templates/tasks/email/notification.txt.j2 +++ b/web/app/templates/tasks/email/notification.txt.j2 @@ -1,10 +1,9 @@ -Dear {{ user.username }}, +Dear {{ job.creator.username }}, -The status of your Job/Corpus({{ job.id }}) with the title "{{ job.title }}" has changed! +The status of your Job "{{ job.title }}" has changed! It is now {{ job.status }}! -Time of this status update was: {time} UTC -You can access your Job/Corpus here: {{ url_for('jobs.job', job_id=job.id) }} +You can access your Job here: {{ url_for('jobs.job', job_id=job.id) }} Kind regards! Your nopaque team diff --git a/web/boot.sh b/web/boot.sh index f39bb4c8..9c87cfd1 100755 --- a/web/boot.sh +++ b/web/boot.sh @@ -1,5 +1,6 @@ #!/bin/bash source venv/bin/activate + while true; do flask deploy if [[ "$?" == "0" ]]; then diff --git a/web/config.py b/web/config.py index 4ca3704f..97e07697 100644 --- a/web/config.py +++ b/web/config.py @@ -7,103 +7,96 @@ ROOT_DIR = os.path.abspath(os.path.dirname(__file__)) class Config: - ''' # Cookies # ''' - REMEMBER_COOKIE_HTTPONLY = True - REMEMBER_COOKIE_SECURE = os.environ.get( - 'NOPAQUE_REMEMBER_COOKIE_SECURE', 'false').lower() == 'true' - SESSION_COOKIE_SECURE = os.environ.get( - 'NOPAQUE_SESSION_COOKIE_SECURE', 'false').lower() == 'true' + ''' # Flask # ''' + SECRET_KEY = os.environ.get('SECRET_KEY', 'hard to guess string') + SESSION_COOKIE_SECURE = \ + os.environ.get('SESSION_COOKIE_SECURE', 'false').lower() == 'true' - ''' # Database # ''' + ''' # Flask-Login # ''' + REMEMBER_COOKIE_HTTPONLY = True + REMEMBER_COOKIE_SECURE = \ + os.environ.get('REMEMBER_COOKIE_SECURE', 'false').lower() == 'true' + + ''' # Flask-Mail # ''' + MAIL_DEFAULT_SENDER = os.environ.get('MAIL_DEFAULT_SENDER') + MAIL_PASSWORD = os.environ.get('MAIL_PASSWORD') + MAIL_PORT = int(os.environ.get('MAIL_PORT')) + MAIL_SERVER = os.environ.get('MAIL_SERVER') + MAIL_USERNAME = os.environ.get('MAIL_USERNAME') + MAIL_USE_SSL = os.environ.get('MAIL_USE_SSL', 'false').lower() == 'true' + MAIL_USE_TLS = os.environ.get('MAIL_USE_TLS', 'false').lower() == 'true' + + ''' # Flask-SQLAlchemy # ''' SQLALCHEMY_RECORD_QUERIES = True SQLALCHEMY_TRACK_MODIFICATIONS = False - ''' # Email # ''' - MAIL_DEFAULT_SENDER = os.environ.get('NOPAQUE_SMTP_DEFAULT_SENDER') - MAIL_PASSWORD = os.environ.get('NOPAQUE_SMTP_PASSWORD') - MAIL_PORT = int(os.environ.get('NOPAQUE_SMTP_PORT')) - MAIL_SERVER = os.environ.get('NOPAQUE_SMTP_SERVER') - MAIL_USERNAME = os.environ.get('NOPAQUE_SMTP_USERNAME') - MAIL_USE_SSL = os.environ.get( - 'NOPAQUE_SMTP_USE_SSL', 'false').lower() == 'true' - MAIL_USE_TLS = os.environ.get( - 'NOPAQUE_SMTP_USE_TLS', 'false').lower() == 'true' - - ''' # General # ''' - ADMIN_EMAIL_ADRESS = os.environ.get('NOPAQUE_ADMIN_EMAIL_ADRESS') - ALLOWED_USERNAME_REGEX = '^[A-Za-zÄÖÜäöüß0-9_.]*$' - CONTACT_EMAIL_ADRESS = os.environ.get('NOPAQUE_CONTACT_EMAIL_ADRESS') - DATA_DIR = os.environ.get('NOPAQUE_DATA_DIR', '/mnt/nopaque') - SECRET_KEY = os.environ.get('NOPAQUE_SECRET_KEY', 'hard to guess string') - - ''' # Logging # ''' - LOG_DATE_FORMAT = os.environ.get('NOPAQUE_LOG_DATE_FORMAT', - '%Y-%m-%d %H:%M:%S') - LOG_FILE = os.environ.get('NOPAQUE_LOG_FILE', - os.path.join(ROOT_DIR, 'nopaque.log')) - LOG_FORMAT = os.environ.get( - 'NOPAQUE_LOG_FORMAT', - '[%(asctime)s] %(levelname)s in ' - '%(pathname)s (function: %(funcName)s, line: %(lineno)d): %(message)s' - ) - LOG_LEVEL = os.environ.get('NOPAQUE_LOG_LEVEL', 'WARNING') - - ''' # Message queue # ''' - SOCKETIO_MESSAGE_QUEUE_URI = os.environ.get( - 'NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI') - - ''' # Proxy fix # ''' - PROXY_FIX_X_FOR = int(os.environ.get('NOPAQUE_PROXY_FIX_X_FOR', '0')) - PROXY_FIX_X_HOST = int(os.environ.get('NOPAQUE_PROXY_FIX_X_HOST', '0')) - PROXY_FIX_X_PORT = int(os.environ.get('NOPAQUE_PROXY_FIX_X_PORT', '0')) - PROXY_FIX_X_PREFIX = int(os.environ.get('NOPAQUE_PROXY_FIX_X_PREFIX', '0')) - PROXY_FIX_X_PROTO = int(os.environ.get('NOPAQUE_PROXY_FIX_X_PROTO', '0')) + ''' # nopaque # ''' + NOPAQUE_ADMIN = os.environ.get('NOPAQUE_ADMIN') + NOPAQUE_CONTACT = os.environ.get('NOPAQUE_CONTACT') + NOPAQUE_DATA_DIR = os.environ.get('NOPAQUE_DATA_DIR', '/mnt/nopaque') + NOPAQUE_MAIL_SUBJECT_PREFIX = '[nopaque]' + NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI = \ + os.environ.get('NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI') + NOPAQUE_USERNAME_REGEX = '^[A-Za-zÄÖÜäöüß0-9_.]*$' @classmethod def init_app(cls, app): - # Set up logging according to the corresponding (LOG_*) variables - logging.basicConfig(datefmt=cls.LOG_DATE_FORMAT, - filename=cls.LOG_FILE, - format=cls.LOG_FORMAT, - level=cls.LOG_LEVEL) + # Set up logging according to the corresponding (NOPAQUE_LOG_*) + # environment variables + basic_config_kwargs = { + 'datefmt': os.environ.get('NOPAQUE_LOG_DATE_FORMAT', + '%Y-%m-%d %H:%M:%S'), + 'filename': os.environ.get('NOPAQUE_LOG_FILE', + os.path.join(ROOT_DIR, 'nopaque.log')), + 'format': os.environ.get( + 'NOPAQUE_LOG_FORMAT', + '[%(asctime)s] %(levelname)s in ' + '%(pathname)s (function: %(funcName)s, line: %(lineno)d): ' + '%(message)s' + ), + 'level': os.environ.get('NOPAQUE_LOG_LEVEL', 'WARNING') + } + logging.basicConfig(**basic_config_kwargs) # Set up and apply the ProxyFix middleware according to the - # corresponding (PROXY_FIX_*) variables - app.wsgi_app = ProxyFix(app.wsgi_app, - x_for=cls.PROXY_FIX_X_FOR, - x_host=cls.PROXY_FIX_X_HOST, - x_port=cls.PROXY_FIX_X_PORT, - x_prefix=cls.PROXY_FIX_X_PREFIX, - x_proto=cls.PROXY_FIX_X_PROTO) + # corresponding (NOPAQUE_PROXY_FIX_*) environment variables + proxy_fix_kwargs = { + 'x_for': int(os.environ.get('NOPAQUE_PROXY_FIX_X_FOR', '0')), + 'x_host': int(os.environ.get('NOPAQUE_PROXY_FIX_X_HOST', '0')), + 'x_port': int(os.environ.get('NOPAQUE_PROXY_FIX_X_PORT', '0')), + 'x_prefix': int(os.environ.get('NOPAQUE_PROXY_FIX_X_PREFIX', '0')), + 'x_proto': int(os.environ.get('NOPAQUE_PROXY_FIX_X_PROTO', '0')) + } + app.wsgi_app = ProxyFix(app.wsgi_app, **proxy_fix_kwargs) class DevelopmentConfig(Config): - ''' # Database # ''' + ''' # Flask # ''' + DEBUG = True + + ''' # Flask-SQLAlchemy # ''' SQLALCHEMY_DATABASE_URI = os.environ.get( - 'NOPAQUE_DEV_DATABASE_URL', + 'SQLALCHEMY_DATABASE_URI', 'postgresql://nopaque:nopaque@db/nopaque_dev' ) - ''' # General # ''' - DEBUG = True - class ProductionConfig(Config): - ''' # Database # ''' + ''' # Flask-SQLAlchemy # ''' SQLALCHEMY_DATABASE_URI = os.environ.get( - 'NOPAQUE_DATABASE_URL', 'postgresql://nopaque:nopaque@db/nopaque') + 'SQLALCHEMY_DATABASE_URI', 'postgresql://nopaque:nopaque@db/nopaque') class TestingConfig(Config): - ''' # Database # ''' - SQLALCHEMY_DATABASE_URI = os.environ.get( - 'NOPAQUE_TEST_DATABASE_URL', - 'postgresql://nopaque:nopaque@db/nopaque_test' - ) - - ''' # General # ''' + ''' # Flask # ''' TESTING = True WTF_CSRF_ENABLED = False + ''' # Flask-SQLAlchemy # ''' + SQLALCHEMY_DATABASE_URI = os.environ.get( + 'SQLALCHEMY_DATABASE_URI', + 'postgresql://nopaque:nopaque@db/nopaque_test' + ) + config = {'development': DevelopmentConfig, 'production': ProductionConfig, diff --git a/web/nopaque.py b/web/nopaque.py index 43d69c38..5c5c5af5 100644 --- a/web/nopaque.py +++ b/web/nopaque.py @@ -17,8 +17,7 @@ if os.path.exists(DOTENV_FILE): from app import create_app, db, socketio # noqa from app.models import (Corpus, CorpusFile, Job, JobInput, JobResult, - NotificationData, NotificationEmailData, QueryResult, - Role, User) # noqa + QueryResult, Role, User) # noqa from flask_migrate import Migrate, upgrade # noqa @@ -34,8 +33,6 @@ def make_shell_context(): 'Job': Job, 'JobInput': JobInput, 'JobResult': JobResult, - 'NotificationData': NotificationData, - 'NotificationEmailData': NotificationEmailData, 'QueryResult': QueryResult, 'Role': Role, 'User': User} @@ -53,9 +50,9 @@ def deploy(): @app.cli.command() def tasks(): - from app.tasks import process_corpora, process_jobs - process_corpora() - process_jobs() + from app.tasks import check_corpora, check_jobs + check_corpora() + check_jobs() @app.cli.command()