diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..a374dbc2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Bielefeld University - CRC 1288 - INF + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/app/cli.py b/app/cli.py index f077c645..17720f58 100644 --- a/app/cli.py +++ b/app/cli.py @@ -1,4 +1,5 @@ -from .models import Role +from . import db +from .models import Corpus, Role from flask_migrate import upgrade @@ -19,6 +20,9 @@ def register(app): @daemon.command('run') def run_daemon(): """Run daemon""" + for corpus in Corpus.query.filter(Corpus.num_analysis_sessions > 0): + corpus.num_analysis_sessions = 0 + db.session.commit() from app.daemon import Daemon daemon = Daemon() daemon.run() diff --git a/app/corpora/__init__.py b/app/corpora/__init__.py index af44719d..83cecec5 100644 --- a/app/corpora/__init__.py +++ b/app/corpora/__init__.py @@ -2,4 +2,4 @@ from flask import Blueprint bp = Blueprint('corpora', __name__) -from . import events, routes # noqa +from . import cqi_over_socketio, routes # noqa diff --git a/app/corpora/cqi_over_socketio/__init__.py b/app/corpora/cqi_over_socketio/__init__.py new file mode 100644 index 00000000..2cce7834 --- /dev/null +++ b/app/corpora/cqi_over_socketio/__init__.py @@ -0,0 +1,108 @@ +from app import db, socketio +from app.decorators import socketio_login_required +from app.models import Corpus +from flask import session +from flask_login import current_user +from flask_socketio import ConnectionRefusedError +from threading import Lock +import cqi + + +''' +This package tunnels the Corpus Query interface (CQi) protocol through +Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event. + +This module only handles the SIO connect/disconnect, which handles the setup +and teardown of necessary ressources for later use. Each CQi function has a +corresponding SIO event. The event handlers are spread across the different +modules within this package. + +Basic concept: +1. A client connects to the SIO namespace and provides the id of a corpus to be + analysed. + 1.1 The analysis session counter of the corpus is incremented. + 1.2 A CQiClient and a (Mutex) Lock belonging to it is created. + 1.3 Wait until the CQP server is running. + 1.4 Connect the CQiClient to the server. + 1.5 Save the CQiClient and the Lock in the session for subsequential use. +2. A client emits an event and may provide a single json object with necessary + arguments for the targeted CQi function. +3. A SIO event handler (decorated with cqi_over_socketio) gets executed. + - The event handler function defines all arguments. Hence the client + is sent as a single json object, the decorator decomposes it to fit + the functions signature. This also includes type checking and proper + use of the lock (acquire/release) mechanism. +4. Wait for more events +5. The client disconnects from the SIO namespace + 1.1 The analysis session counter of the corpus is decremented. + 1.2 The CQiClient and (Mutex) Lock belonging to it are teared down. +''' + + +NAMESPACE = '/corpora/corpus/corpus_analysis' + + +# Import all CQi over Socket.IO event handlers +from .cqi_corpora_corpus_subcorpora import * # noqa +from .cqi_corpora_corpus_structural_attributes import * # noqa +from .cqi_corpora_corpus_positional_attributes import * # noqa +from .cqi_corpora_corpus_alignment_attributes import * # noqa +from .cqi_corpora_corpus import * # noqa +from .cqi_corpora import * # noqa +from .cqi import * # noqa + + +@socketio.on('connect', namespace=NAMESPACE) +@socketio_login_required +def connect(auth): + # the auth variable is used in a hacky way. It contains the corpus id for + # which a corpus analysis session should be started. + corpus_id = auth['corpus_id'] + corpus = Corpus.query.get(corpus_id) + if corpus is None: + # return {'code': 404, 'msg': 'Not Found'} + raise ConnectionRefusedError('Not Found') + if not (corpus.creator == current_user or current_user.is_administrator()): + # return {'code': 403, 'msg': 'Forbidden'} + raise ConnectionRefusedError('Forbidden') + if corpus.status not in ['prepared', 'start analysis', 'analysing', 'stop analysis']: + # return {'code': 424, 'msg': 'Failed Dependency'} + raise ConnectionRefusedError('Failed Dependency') + if corpus.num_analysis_sessions is None: + corpus.num_analysis_sessions = 0 + db.session.commit() + corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1 + db.session.commit() + retry_counter = 20 + while corpus.status != 'analysing': + if retry_counter == 0: + corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 + db.session.commit() + return {'code': 408, 'msg': 'Request Timeout'} + socketio.sleep(3) + retry_counter -= 1 + db.session.refresh(corpus) + cqi_client = cqi.CQiClient('cqpserver_{}'.format(corpus_id)) + session['d'] = { + 'corpus_id': corpus_id, + 'cqi_client': cqi_client, + 'cqi_client_lock': Lock(), + } + # return {'code': 200, 'msg': 'OK'} + + +@socketio.on('disconnect', namespace=NAMESPACE) +def disconnect(): + session['d']['cqi_client_lock'].acquire() + try: + session['d']['cqi_client'].disconnect() + except cqi.errors.CQiException: + pass + except BrokenPipeError: + pass + session['d']['cqi_client_lock'].release() + corpus = Corpus.query.get(session['d']['corpus_id']) + corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 + db.session.commit() + session.pop('d') + # return {'code': 200, 'msg': 'OK'} diff --git a/app/corpora/cqi_over_socketio/cqi.py b/app/corpora/cqi_over_socketio/cqi.py new file mode 100644 index 00000000..f6edb5fe --- /dev/null +++ b/app/corpora/cqi_over_socketio/cqi.py @@ -0,0 +1,43 @@ +from app import socketio +from app.decorators import socketio_login_required +from socket import gaierror +from . import NAMESPACE as ns +from .utils import cqi_over_socketio +import cqi + + +@socketio.on('cqi.connect', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_connect(cqi_client: cqi.CQiClient): + try: + cqi_status = cqi_client.connect() + except gaierror as e: + return { + 'code': 500, + 'msg': 'Internal Server Error', + 'payload': {'code': e.args[0], 'desc': e.args[1]} + } + payload = {'code': cqi_status, + 'msg': cqi.api.specification.lookup[cqi_status]} + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.disconnect', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_disconnect(cqi_client: cqi.CQiClient): + cqi_status = cqi_client.disconnect() + payload = {'code': cqi_status, + 'msg': cqi.api.specification.lookup[cqi_status]} + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.ping', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_ping(cqi_client: cqi.CQiClient): + cqi_status = cqi_client.ping() + payload = {'code': cqi_status, + 'msg': cqi.api.specification.lookup[cqi_status]} + return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora.py b/app/corpora/cqi_over_socketio/cqi_corpora.py new file mode 100644 index 00000000..d0f82e96 --- /dev/null +++ b/app/corpora/cqi_over_socketio/cqi_corpora.py @@ -0,0 +1,22 @@ +from app import socketio +from app.decorators import socketio_login_required +from . import NAMESPACE as ns +from .utils import cqi_over_socketio +import cqi + + +@socketio.on('cqi.corpora.get', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str): + cqi_corpus = cqi_client.corpora.get(corpus_name) + payload = {**cqi_corpus.attrs} + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.corpora.list', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_list(cqi_client: cqi.CQiClient): + payload = [{**x.attrs} for x in cqi_client.corpora.list()] + return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py new file mode 100644 index 00000000..bfe8437c --- /dev/null +++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py @@ -0,0 +1,85 @@ +from app import db, socketio +from app.decorators import socketio_login_required +from app.models import Corpus +from flask import session +from . import NAMESPACE as ns +from .utils import cqi_over_socketio, lookups_by_cpos +import cqi +import math + + +@socketio.on('cqi.corpora.corpus.drop', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str): + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_status = cqi_corpus.drop() + payload = {'code': cqi_status, + 'msg': cqi.api.specification.lookup[cqi_status]} + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.corpora.corpus.query', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_status = cqi_corpus.query(subcorpus_name, query) + payload = {'code': cqi_status, + 'msg': cqi.api.specification.lookup[cqi_status]} + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +############################################################################### +# nopaque specific CQi extensions # +############################################################################### +@socketio.on('cqi.corpora.corpus.update_db', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str): + corpus = Corpus.query.get(session['d']['corpus_id']) + corpus.num_tokens = cqi_client.corpora.get('CORPUS').attrs['size'] + db.session.commit() + + +@socketio.on('cqi.corpora.corpus.paginate', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + # Sanity checks + if ( + per_page < 1 + or page < 1 + or ( + cqi_corpus.attrs['size'] > 0 + and page > math.ceil(cqi_corpus.attrs['size'] / per_page) + ) + ): + return {'code': 416, 'msg': 'Range Not Satisfiable'} + first_cpos = (page - 1) * per_page + last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page) + cpos_list = [*range(first_cpos, last_cpos)] + lookups = lookups_by_cpos(cqi_corpus, cpos_list) + payload = {} + # the items for the current page + payload['items'] = [cpos_list] + # the lookups for the items + payload['lookups'] = lookups + # the total number of items matching the query + payload['total'] = cqi_corpus.attrs['size'] + # the number of items to be displayed on a page. + payload['per_page'] = per_page + # The total number of pages + payload['pages'] = math.ceil(payload['total'] / payload['per_page']) + # the current page number (1 indexed) + payload['page'] = page if payload['pages'] > 0 else None + # True if a previous page exists + payload['has_prev'] = payload['page'] > 1 if payload['page'] else False + # True if a next page exists. + payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa + # Number of the previous page. + payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None + # Number of the next page + payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None + return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py new file mode 100644 index 00000000..95be6771 --- /dev/null +++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py @@ -0,0 +1,24 @@ +from app import socketio +from app.decorators import socketio_login_required +from . import NAMESPACE as ns +from .utils import cqi_over_socketio +import cqi + + +@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa + payload = {**cqi_alignment_attribute.attrs} + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()] + return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py new file mode 100644 index 00000000..e8c11677 --- /dev/null +++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py @@ -0,0 +1,24 @@ +from app import socketio +from app.decorators import socketio_login_required +from . import NAMESPACE as ns +from .utils import cqi_over_socketio +import cqi + + +@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa + payload = {**cqi_positional_attribute.attrs} + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()] + return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py new file mode 100644 index 00000000..2b1559f6 --- /dev/null +++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py @@ -0,0 +1,24 @@ +from app import socketio +from app.decorators import socketio_login_required +from . import NAMESPACE as ns +from .utils import cqi_over_socketio +import cqi + + +@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa + payload = {**cqi_structural_attribute.attrs} + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()] + return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py new file mode 100644 index 00000000..419be16b --- /dev/null +++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py @@ -0,0 +1,123 @@ +from app import socketio +from app.decorators import socketio_login_required +from app.models import Corpus +from flask import session +from . import NAMESPACE as ns +from .utils import cqi_over_socketio, export_subcorpus +import cqi +import json +import math +import os + + +@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) + payload = {**cqi_subcorpus.attrs} + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()] + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) + cqi_status = cqi_subcorpus.drop() + payload = {'code': cqi_status, + 'msg': cqi.api.specification.lookup[cqi_status]} + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient): + return {'code': 501, 'msg': 'Not Implemented'} + + +@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient): + return {'code': 501, 'msg': 'Not Implemented'} + + +@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient): + return {'code': 501, 'msg': 'Not Implemented'} + + +############################################################################### +# nopaque specific CQi extensions # +############################################################################### +@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) + # Sanity checks + if ( + per_page < 1 + or page < 1 + or ( + cqi_subcorpus.attrs['size'] > 0 + and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page) + ) + ): + return {'code': 416, 'msg': 'Range Not Satisfiable'} + offset = (page - 1) * per_page + cutoff = per_page + cqi_results_export = export_subcorpus( + cqi_subcorpus, context=context, cutoff=cutoff, offset=offset) + payload = {} + # the items for the current page + payload['items'] = cqi_results_export.pop('matches') + # the lookups for the items + payload['lookups'] = cqi_results_export + # the total number of items matching the query + payload['total'] = cqi_subcorpus.attrs['size'] + # the number of items to be displayed on a page. + payload['per_page'] = per_page + # The total number of pages + payload['pages'] = math.ceil(payload['total'] / payload['per_page']) + # the current page number (1 indexed) + payload['page'] = page if payload['pages'] > 0 else None + # True if a previous page exists + payload['has_prev'] = payload['page'] > 1 if payload['page'] else False + # True if a next page exists. + payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa + # Number of the previous page. + payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None + # Number of the next page + payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None + return {'code': 200, 'msg': 'OK', 'payload': payload} + + +@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) + cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context) + corpus = Corpus.query.get(session['d']['corpus_id']) + file_path = os.path.join(corpus.path, f'{subcorpus_name}.json') + with open(file_path, 'w') as file: + json.dump(cqi_subcorpus_export, file) + return {'code': 200, 'msg': 'OK'} diff --git a/app/corpora/cqi_over_socketio/utils.py b/app/corpora/cqi_over_socketio/utils.py new file mode 100644 index 00000000..04b9f1d0 --- /dev/null +++ b/app/corpora/cqi_over_socketio/utils.py @@ -0,0 +1,129 @@ +from flask import session +from functools import wraps +from inspect import signature +import cqi + + +def cqi_over_socketio(f): + @wraps(f) + def wrapped(*args): + if 'd' not in session: + return {'code': 424, 'msg': 'Failed Dependency'} + f_args = {} + # Check for missing args and if all provided args are of the right type + for param in signature(f).parameters.values(): + if param.annotation == cqi.CQiClient: + f_args[param.name] = session['d']['cqi_client'] + continue + if param.default is param.empty: + # args + if param.name not in args[0]: + return {'code': 400, 'msg': 'Bad Request'} + arg = args[0][param.name] + if type(arg) is not param.annotation: + return {'code': 400, 'msg': 'Bad Request'} + f_args[param.name] = arg + else: + # kwargs + if param.name not in args[0]: + continue + arg = args[0][param.name] + if type(arg) is not param.annotation: + return {'code': 400, 'msg': 'Bad Request'} + f_args[param.name] = arg + session['d']['cqi_client_lock'].acquire() + try: + return_value = f(**f_args) + except BrokenPipeError: + pass + except cqi.errors.CQiException as e: + return_value = { + 'code': 500, + 'msg': 'Internal Server Error', + 'payload': { + 'code': e.code, + 'desc': e.description, + 'msg': e.name + } + } + finally: + session['d']['cqi_client_lock'].release() + return return_value + return wrapped + + +def lookups_by_cpos(corpus, cpos_list): + lookups = {} + lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list} + for attr in corpus.positional_attributes.list(): + cpos_attr_values = attr.values_by_cpos(cpos_list) + for i, cpos in enumerate(cpos_list): + lookups['cpos_lookup'][cpos][attr.attrs['name']] = \ + cpos_attr_values[i] + for attr in corpus.structural_attributes.list(): + # We only want to iterate over non subattributes, identifiable by + # attr.attrs['has_values']==False + if attr.attrs['has_values']: + continue + cpos_attr_ids = attr.ids_by_cpos(cpos_list) + for i, cpos in enumerate(cpos_list): + if cpos_attr_ids[i] == -1: + continue + lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i] + occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1] + if not occured_attr_ids: + continue + subattrs = corpus.structural_attributes.list(filters={'part_of': attr}) + if not subattrs: + continue + lookup_name = f'{attr.attrs["name"]}_lookup' + lookups[lookup_name] = {} + for attr_id in occured_attr_ids: + lookups[lookup_name][attr_id] = {} + for subattr in subattrs: + subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa + for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa + lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa + return lookups + + +def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0): + if subcorpus.attrs['size'] == 0: + return {"matches": []} + first_match = max(0, offset) + last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1)) + match_boundaries = zip( + subcorpus.dump( + subcorpus.attrs['fields']['match'], first_match, last_match), + subcorpus.dump( + subcorpus.attrs['fields']['matchend'], first_match, last_match) + ) + cpos_set = set() + matches = [] + match_num = offset + 1 + for match_start, match_end in match_boundaries: + c = (match_start, match_end) + if match_start == 0 or context == 0: + lc = None + cpos_list_lbound = match_start + else: + lc_lbound = max(0, (match_start - 1 - context)) + lc_rbound = match_start - 1 + lc = (lc_lbound, lc_rbound) + cpos_list_lbound = lc_lbound + if (match_end == (subcorpus.collection.corpus.attrs['size'] - 1) + or context == 0): + rc = None + cpos_list_rbound = match_end + else: + rc_lbound = match_end + 1 + rc_rbound = min(match_end + 1 + context, + subcorpus.collection.corpus.attrs['size'] - 1) + rc = (rc_lbound, rc_rbound) + cpos_list_rbound = rc_rbound + match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc} + matches.append(match) + cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1)) + match_num += 1 + lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set)) + return {'matches': matches, **lookups} diff --git a/app/corpora/events.py b/app/corpora/events.py deleted file mode 100644 index 4c8bd606..00000000 --- a/app/corpora/events.py +++ /dev/null @@ -1,304 +0,0 @@ -from datetime import datetime -from flask import current_app, request -from flask_login import current_user -from socket import gaierror -from werkzeug.utils import secure_filename -from .. import db, socketio -from ..decorators import socketio_login_required -from ..events.socketio import sessions as socketio_sessions -from ..models import Corpus -import cqi -import math -import os -import shutil - - -''' -' A dictionary containing lists of, with corpus ids associated, Socket.IO -' session ids (sid). {: [, ...], ...} -''' -corpus_analysis_sessions = {} -''' -' A dictionary containing Socket.IO session id - CQi client pairs. -' {: CQiClient, ...} -''' -corpus_analysis_clients = {} - - -@socketio.on('corpus_analysis_init') -@socketio_login_required -def init_corpus_analysis(corpus_id): - corpus = Corpus.query.get(corpus_id) - if corpus is None: - response = {'code': 404, 'desc': None, 'msg': 'Not Found'} - socketio.emit('corpus_analysis_init', response, room=request.sid) - return - if not (corpus.creator == current_user or current_user.is_administrator()): - response = {'code': 403, 'desc': None, 'msg': 'Forbidden'} - socketio.emit('corpus_analysis_init', response, room=request.sid) - return - if corpus.status not in ['prepared', 'start analysis', 'analysing']: - response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'} - socketio.emit('corpus_analysis_init', response, room=request.sid) - return - if corpus.status == 'prepared': - corpus.status = 'start analysis' - db.session.commit() - socketio.start_background_task(corpus_analysis_session_handler, - current_app._get_current_object(), - corpus_id, current_user.id, request.sid) - - -def corpus_analysis_session_handler(app, corpus_id, user_id, session_id): - with app.app_context(): - ''' Setup analysis session ''' - corpus = Corpus.query.get(corpus_id) - retry_counter = 15 - while corpus.status != 'analysing': - db.session.refresh(corpus) - retry_counter -= 1 - if retry_counter == 0: - response = {'code': 408, 'desc': None, 'msg': 'Request Timeout'} # noqa - socketio.emit('corpus_analysis_init', response, room=session_id) # noqa - corpus.status = 'stop analysis' - db.session.commit() - return - socketio.sleep(3) - client = cqi.CQiClient('cqpserver_{}'.format(corpus_id)) - try: - connect_status = client.connect() - payload = {'code': connect_status, 'msg': cqi.api.specification.lookup[connect_status]} # noqa - except cqi.errors.CQiException as e: - handle_cqi_exception('corpus_analysis_init', e, session_id) - corpus.status = 'stop analysis' - db.session.commit() - return - except gaierror: - response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error'} # noqa - socketio.emit('corpus_analysis_init', response, room=session_id) - corpus.status = 'stop analysis' - db.session.commit() - return - corpus_analysis_clients[session_id] = client - if corpus_id in corpus_analysis_sessions: - corpus_analysis_sessions[corpus_id].append(session_id) - else: - corpus_analysis_sessions[corpus_id] = [session_id] - client.status = 'ready' - response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload} - socketio.emit('corpus_analysis_init', response, room=session_id) - ''' Observe analysis session ''' - while session_id in socketio_sessions: - socketio.sleep(3) - ''' Teardown analysis session ''' - if client.status == 'running': - client.status = 'abort' - while client.status != 'ready': - socketio.sleep(0.3) - try: - client.disconnect() - except cqi.errors.CQiException: - pass - corpus_analysis_clients.pop(session_id, None) - corpus_analysis_sessions[corpus_id].remove(session_id) - if not corpus_analysis_sessions[corpus_id]: - corpus_analysis_sessions.pop(corpus_id, None) - corpus.status = 'stop analysis' - db.session.commit() - - -@socketio.on('corpus_analysis_meta_data') -@socketio_login_required -def corpus_analysis_get_meta_data(corpus_id): - # get meta data from db - db_corpus = Corpus.query.get(corpus_id) - metadata = {} - metadata['corpus_name'] = db_corpus.title - metadata['corpus_description'] = db_corpus.description - metadata['corpus_creation_date'] = db_corpus.creation_date.isoformat() + 'Z' - metadata['corpus_last_edited_date'] = \ - db_corpus.last_edited_date.isoformat() + 'Z' - client = corpus_analysis_clients.get(request.sid) - if client is None: - response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'} - socketio.emit('corpus_analysis_meta_data', response, room=request.sid) - return - # check if client is busy or not - if client.status == 'running': - client.status = 'abort' - while client.status != 'ready': - socketio.sleep(0.3) - # get meta data from corpus in cqp server - client.status = 'running' - try: - cwb_corpus = client.corpora.get('CORPUS') - metadata['corpus_properties'] = cwb_corpus.attrs['properties'] - metadata['corpus_size_tokens'] = cwb_corpus.attrs['size'] - - text_attr = cwb_corpus.structural_attributes.get('text') - struct_attrs = cwb_corpus.structural_attributes.list( - filters={'part_of': text_attr}) - text_ids = range(0, (text_attr.attrs['size'])) - texts_metadata = {} - for text_id in text_ids: - texts_metadata[text_id] = {} - for struct_attr in struct_attrs: - texts_metadata[text_id][struct_attr.attrs['name'][(len(text_attr.attrs['name']) + 1):]] = struct_attr.values_by_ids(list(range(struct_attr.attrs['size'])))[text_id] # noqa - metadata['corpus_all_texts'] = texts_metadata - metadata['corpus_analysis_date'] = datetime.utcnow().isoformat() + 'Z' - metadata['corpus_cqi_py_protocol_version'] = client.api.version - metadata['corpus_cqi_py_package_version'] = cqi.__version__ - # TODO: make this dynamically - metadata['corpus_cqpserver_version'] = 'CQPserver v3.4.22' - - # write some metadata to the db - db_corpus.current_nr_of_tokens = metadata['corpus_size_tokens'] - db.session.commit() - - # emit data - payload = metadata - response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload} - socketio.emit('corpus_analysis_meta_data', response, room=request.sid) - except cqi.errors.CQiException as e: - payload = {'code': e.code, 'desc': e.description, 'msg': e.name} - response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error', - 'payload': payload} - socketio.emit('corpus_analysis_meta_data', response, room=request.sid) - client.status = 'ready' - - -@socketio.on('corpus_analysis_query') -@socketio_login_required -def corpus_analysis_query(query): - client = corpus_analysis_clients.get(request.sid) - if client is None: - response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'} - socketio.emit('corpus_analysis_query', response, room=request.sid) - return - if client.status == 'running': - client.status = 'abort' - while client.status != 'ready': - socketio.sleep(0.3) - client.status = 'running' - try: - corpus = client.corpora.get('CORPUS') - query_status = corpus.query(query) - results = corpus.subcorpora.get('Results') - except cqi.errors.CQiException as e: - client.status = 'ready' - handle_cqi_exception('corpus_analysis_query', e, request.sid) - return - payload = {'status': query_status, - 'msg': cqi.api.specification.lookup[query_status], - 'match_count': results.attrs['size']} - response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload} - socketio.emit('corpus_analysis_query', response, room=request.sid) - chunk_size = 100 - chunk_start = 0 - context = 50 - progress = 0 - while chunk_start <= results.attrs['size']: - if client.status == 'abort': - break - try: - chunk = results.export(context=context, cutoff=chunk_size, offset=chunk_start) # noqa - except cqi.errors.CQiException as e: - handle_cqi_exception('corpus_analysis_query', e, request.sid) - break - if (results.attrs['size'] == 0): - progress = 100 - else: - progress = ((chunk_start + chunk_size) / results.attrs['size']) * 100 # noqa - progress = min(100, int(math.ceil(progress))) - payload = {'chunk': chunk, 'progress': progress} - response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload} - socketio.emit('corpus_analysis_query_results', response, room=request.sid) # noqa - chunk_start += chunk_size - client.status = 'ready' - - -@socketio.on('corpus_analysis_get_match_with_full_context') -@socketio_login_required -def corpus_analysis_get_match_with_full_context(payload): - type = payload['type'] - data_indexes = payload['data_indexes'] - first_cpos = payload['first_cpos'] - last_cpos = payload['last_cpos'] - client = corpus_analysis_clients.get(request.sid) - if client is None: - response = {'code': 424, 'desc': 'No client found for this session', - 'msg': 'Failed Dependency'} - socketio.emit('corpus_analysis_get_match_with_full_context', response, - room=request.sid) - return - if client.status == 'running': - client.status = 'abort' - while client.status != 'ready': - socketio.sleep(0.3) - client.status = 'running' - try: - corpus = client.corpora.get('CORPUS') - s = corpus.structural_attributes.get('s') - except cqi.errors.CQiException as e: - handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa - return - i = 0 - # Send data one match at a time. - for index, f_cpos, l_cpos in zip(data_indexes, first_cpos, last_cpos): - if client.status == 'abort': - break - i += 1 - matches = [] - cpos_lookup = text_lookup = {} - try: - tmp = s.export(f_cpos, l_cpos, context=10) - except cqi.errors.CQiException as e: - handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa - break - matches.append(tmp['matches'][0]) - cpos_lookup.update(tmp['cpos_lookup']) - text_lookup.update(tmp['text_lookup']) - progress = i / len(data_indexes) * 100 - payload = {'matches': matches, 'progress': progress, - 'cpos_lookup': cpos_lookup, 'text_lookup': text_lookup} - response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload, - 'type': type, 'data_indexes': data_indexes} - socketio.emit('corpus_analysis_get_match_with_full_context', - response, room=request.sid) - client.status = 'ready' - - -@socketio.on('export_corpus') -@socketio_login_required -def export_corpus(corpus_id): - corpus = Corpus.query.get(corpus_id) - if corpus is None: - response = {'code': 404, 'msg': 'Not found'} - socketio.emit('export_corpus', response, room=request.sid) - return - if corpus.status != 'prepared': - response = {'code': 412, 'msg': 'Precondition Failed'} - socketio.emit('export_corpus', response, room=request.sid) - return - # delete old corpus archive if it exists/has been build before - if corpus.archive_file is not None and os.path.isfile(corpus.archive_file): - os.remove(corpus.archive_file) - archive_file_base_name = '[corpus]_' + secure_filename(corpus.title) - corpus.archive_file = archive_file_base_name + '.zip' - db.session.commit() - shutil.make_archive( - os.path.join(corpus.creator.path, 'corpora', archive_file_base_name), - 'zip', - corpus.path - ) - socketio.emit('export_corpus_{}'.format(corpus.id), room=request.sid) - - -def handle_cqi_exception(event, exception, room): - response = {'code': 500, - 'desc': None, - 'msg': 'Internal Server Error', - 'payload': {'code': exception.code, - 'desc': exception.description, - 'msg': exception.name}} - socketio.emit(event, response, room=room) diff --git a/app/corpora/forms.py b/app/corpora/forms.py index 5f2d3570..c015d87e 100644 --- a/app/corpora/forms.py +++ b/app/corpora/forms.py @@ -1,8 +1,8 @@ from flask_wtf import FlaskForm from werkzeug.utils import secure_filename -from wtforms import (BooleanField, FileField, StringField, SubmitField, - ValidationError, IntegerField, SelectField) -from wtforms.validators import DataRequired, Length, NumberRange +from wtforms import (FileField, StringField, SubmitField, + ValidationError, IntegerField) +from wtforms.validators import DataRequired, Length class AddCorpusFileForm(FlaskForm): @@ -91,76 +91,3 @@ class ImportCorpusForm(FlaskForm): raise ValidationError('File does not have an approved extension: ' '.zip') field.data.filename = secure_filename(field.data.filename) - - -class QueryForm(FlaskForm): - ''' - Form to submit a query to the server which is executed via cqi-py. - ''' - query = StringField('Query', - validators=[DataRequired(), Length(1, 1024)]) - submit = SubmitField('Search') - - -class DisplayOptionsForm(FlaskForm): - ''' - Form to alter how the matches are represented to the user by the user. - ''' - expert_mode = BooleanField('Expert mode') - result_context = SelectField('Result context', - choices=[('', 'Choose your option'), - ('10', '10'), - ('20', '20'), - ('30', '30'), - ('40', '40'), - ('50', '50')]) - results_per_page = SelectField('Results per page', - choices=[('', 'Choose your option'), - ('10', '10'), - ('20', '20'), - ('30', '30'), - ('40', '40'), - ('50', '50')]) - - -class InspectDisplayOptionsForm(FlaskForm): - ''' - Form for the inspect modal where the user can interact with how the current - match is being represented to him. - ''' - expert_mode_inspect = BooleanField('Expert mode') - highlight_sentences = BooleanField('Split sentences') - context_sentences = IntegerField('Context sentences', - validators=[NumberRange(min=0, max=10)], - default=3) - - -class QueryDownloadForm(FlaskForm): - ''' - Form to choose in what file format the analysis results are being - downloaded. WIP. - ''' - file_type = SelectField('File type', - choices=[('', 'Choose file type'), - ('csv', 'csv'), - ('json', 'json'), - ('excel', 'excel'), - ('html', 'html-table')], - validators=[DataRequired()]) - - -class AddQueryResultForm(FlaskForm): - ''' - Form used to import one result json file. - ''' - description = StringField('Description', - validators=[DataRequired(), Length(1, 255)]) - file = FileField('File', validators=[DataRequired()]) - title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) - submit = SubmitField() - - def validate_file(self, field): - if not field.data.filename.lower().endswith('.json'): - raise ValidationError('File does not have an approved extension: ' - '.json') - field.data.filename = secure_filename(field.data.filename) diff --git a/app/corpora/query_results_forms.py b/app/corpora/query_results_forms.py new file mode 100644 index 00000000..bb55e513 --- /dev/null +++ b/app/corpora/query_results_forms.py @@ -0,0 +1,21 @@ +from flask_wtf import FlaskForm +from werkzeug.utils import secure_filename +from wtforms import FileField, StringField, SubmitField, ValidationError +from wtforms.validators import DataRequired, Length + + +class AddQueryResultForm(FlaskForm): + ''' + Form used to import one result json file. + ''' + description = StringField('Description', + validators=[DataRequired(), Length(1, 255)]) + file = FileField('File', validators=[DataRequired()]) + title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) + submit = SubmitField() + + def validate_file(self, field): + if not field.data.filename.lower().endswith('.json'): + raise ValidationError('File does not have an approved extension: ' + '.json') + field.data.filename = secure_filename(field.data.filename) diff --git a/app/corpora/query_results_routes.py b/app/corpora/query_results_routes.py new file mode 100644 index 00000000..1ccc477e --- /dev/null +++ b/app/corpora/query_results_routes.py @@ -0,0 +1,134 @@ +from flask import (abort, current_app, flash, make_response, redirect, request, + render_template, url_for, send_from_directory) +from flask_login import current_user, login_required +from . import bp +from . import tasks +from .forms import (AddQueryResultForm, DisplayOptionsForm, + InspectDisplayOptionsForm) +from .. import db +from ..models import QueryResult +import json +import os + + +@bp.route('/result/add', methods=['GET', 'POST']) +@login_required +def add_query_result(): + ''' + View to import a result as a json file. + ''' + abort(503) + form = AddQueryResultForm(prefix='add-query-result-form') + if form.is_submitted(): + if not form.validate(): + return make_response(form.errors, 400) + query_result = QueryResult(creator=current_user, + description=form.description.data, + filename=form.file.data.filename, + title=form.title.data) + db.session.add(query_result) + db.session.flush() + db.session.refresh(query_result) + try: + os.makedirs(os.path.dirname(query_result.path)) + except OSError: + current_app.logger.error( + 'Make dir {} led to an OSError!'.format(query_result.path) + ) + db.session.rollback() + flash('Internal Server Error', 'error') + return make_response( + {'redirect_url': url_for('.add_query_result')}, 500) + # save the uploaded file + form.file.data.save(query_result.path) + # parse json from file + with open(query_result.path, 'r') as file: + query_result_file_content = json.load(file) + # parse json schema + # with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa + # schema = json.load(file) + # try: + # # validate imported json file + # validate(instance=query_result_file_content, schema=schema) + # except Exception: + # tasks.delete_query_result(query_result.id) + # flash('Uploaded file is invalid', 'result') + # return make_response( + # {'redirect_url': url_for('.add_query_result')}, 201) + query_result_file_content.pop('matches') + query_result_file_content.pop('cpos_lookup') + query_result.query_metadata = query_result_file_content + db.session.commit() + flash('Query result added!', 'result') + return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa + return render_template('corpora/query_results/add_query_result.html.j2', + form=form, title='Add query result') + + +@bp.route('/result/') +@login_required +def query_result(query_result_id): + abort(503) + query_result = QueryResult.query.get_or_404(query_result_id) + if not (query_result.creator == current_user + or current_user.is_administrator()): + abort(403) + return render_template('corpora/query_results/query_result.html.j2', + query_result=query_result, title='Query result') + + +@bp.route('/result//inspect') +@login_required +def inspect_query_result(query_result_id): + ''' + View to inspect imported result file in a corpus analysis like interface + ''' + abort(503) + query_result = QueryResult.query.get_or_404(query_result_id) + query_metadata = query_result.query_metadata + if not (query_result.creator == current_user + or current_user.is_administrator()): + abort(403) + display_options_form = DisplayOptionsForm( + prefix='display-options-form', + results_per_page=request.args.get('results_per_page', 30), + result_context=request.args.get('context', 20) + ) + inspect_display_options_form = InspectDisplayOptionsForm( + prefix='inspect-display-options-form' + ) + with open(query_result.path, 'r') as query_result_file: + query_result_file_content = json.load(query_result_file) + return render_template('corpora/query_results/inspect.html.j2', + query_result=query_result, + display_options_form=display_options_form, + inspect_display_options_form=inspect_display_options_form, # noqa + query_result_file_content=query_result_file_content, + query_metadata=query_metadata, + title='Inspect query result') + + +@bp.route('/result//delete') +@login_required +def delete_query_result(query_result_id): + abort(503) + query_result = QueryResult.query.get_or_404(query_result_id) + if not (query_result.creator == current_user + or current_user.is_administrator()): + abort(403) + flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa + tasks.delete_query_result(query_result_id) + return redirect(url_for('services.service', service="corpus_analysis")) + + +@bp.route('/result//download') +@login_required +def download_query_result(query_result_id): + abort(503) + query_result = QueryResult.query.get_or_404(query_result_id) + if not (query_result.creator == current_user + or current_user.is_administrator()): + abort(403) + return send_from_directory(as_attachment=True, + directory=os.path.dirname(query_result.path), + filename=query_result.filename) diff --git a/app/corpora/routes.py b/app/corpora/routes.py index eff1be55..f700a540 100644 --- a/app/corpora/routes.py +++ b/app/corpora/routes.py @@ -1,16 +1,12 @@ -from flask import (abort, current_app, flash, make_response, redirect, request, +from flask import (abort, current_app, flash, make_response, redirect, render_template, url_for, send_from_directory) from flask_login import current_user, login_required from . import bp from . import tasks -from .forms import (AddCorpusFileForm, AddCorpusForm, AddQueryResultForm, - EditCorpusFileForm, QueryDownloadForm, QueryForm, - DisplayOptionsForm, InspectDisplayOptionsForm, +from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm, ImportCorpusForm) -from jsonschema import validate from .. import db -from ..models import Corpus, CorpusFile, QueryResult -import json +from ..models import Corpus, CorpusFile import os import shutil import glob @@ -22,21 +18,22 @@ from .import_corpus import check_zip_contents @bp.route('/add', methods=['GET', 'POST']) @login_required def add_corpus(): - form = AddCorpusForm() + form = AddCorpusForm(prefix='add-corpus-form') if form.validate_on_submit(): - corpus = Corpus(creator=current_user, - description=form.description.data, - title=form.title.data) + corpus = Corpus( + creator=current_user, + description=form.description.data, + title=form.title.data + ) db.session.add(corpus) db.session.flush() db.session.refresh(corpus) try: os.makedirs(corpus.path) - except OSError: - current_app.logger.error( - 'Make dir {} led to an OSError!'.format(corpus.path) - ) + except OSError as e: + current_app.logger.error(f'Could not add corpus: {e}') db.session.rollback() + flash('Internal Server Error', 'error') abort(500) else: db.session.commit() @@ -49,22 +46,23 @@ def add_corpus(): @bp.route('/import', methods=['GET', 'POST']) @login_required def import_corpus(): + abort(503) form = ImportCorpusForm() if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) - corpus = Corpus(creator=current_user, - description=form.description.data, - title=form.title.data) + corpus = Corpus( + creator=current_user, + description=form.description.data, + title=form.title.data + ) db.session.add(corpus) db.session.flush() db.session.refresh(corpus) try: os.makedirs(corpus.path) - except OSError: - current_app.logger.error( - 'Make dir {} led to an OSError!'.format(corpus.path) - ) + except OSError as e: + current_app.logger.error(f'Could not import corpus: {e}') db.session.rollback() flash('Internal Server Error', 'error') return make_response( @@ -128,9 +126,21 @@ def corpus(corpus_id): corpus_files=corpus_files, title='Corpus') +@bp.route('//analyse') +@login_required +def analyse_corpus(corpus_id): + corpus = Corpus.query.get_or_404(corpus_id) + return render_template( + 'corpora/analyse_corpus.html.j2', + corpus=corpus, + title=f'Analyse Corpus {corpus.title}' + ) + + @bp.route('//download') @login_required def download_corpus(corpus_id): + abort(503) corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) @@ -142,31 +152,6 @@ def download_corpus(corpus_id): ) -@bp.route('//analyse') -@login_required -def analyse_corpus(corpus_id): - corpus = Corpus.query.get_or_404(corpus_id) - display_options_form = DisplayOptionsForm( - prefix='display-options-form', - result_context=request.args.get('context', 20), - results_per_page=request.args.get('results_per_page', 30) - ) - query_form = QueryForm(prefix='query-form', - query=request.args.get('query')) - query_download_form = QueryDownloadForm(prefix='query-download-form') - inspect_display_options_form = InspectDisplayOptionsForm( - prefix='inspect-display-options-form') - return render_template( - 'corpora/analyse_corpus.html.j2', - corpus=corpus, - display_options_form=display_options_form, - inspect_display_options_form=inspect_display_options_form, - query_form=query_form, - query_download_form=query_download_form, - title='Corpus analysis' - ) - - @bp.route('//delete') @login_required def delete_corpus(corpus_id): @@ -190,20 +175,22 @@ def add_corpus_file(corpus_id): return make_response(form.errors, 400) # Save the file form.file.data.save(os.path.join(corpus.path, form.file.data.filename)) - corpus_file = CorpusFile(address=form.address.data, - author=form.author.data, - booktitle=form.booktitle.data, - chapter=form.chapter.data, - corpus=corpus, - editor=form.editor.data, - filename=form.file.data.filename, - institution=form.institution.data, - journal=form.journal.data, - pages=form.pages.data, - publisher=form.publisher.data, - publishing_year=form.publishing_year.data, - school=form.school.data, - title=form.title.data) + corpus_file = CorpusFile( + address=form.address.data, + author=form.author.data, + booktitle=form.booktitle.data, + chapter=form.chapter.data, + corpus=corpus, + editor=form.editor.data, + filename=form.file.data.filename, + institution=form.institution.data, + journal=form.journal.data, + pages=form.pages.data, + publisher=form.publisher.data, + publishing_year=form.publishing_year.data, + school=form.school.data, + title=form.title.data + ) db.session.add(corpus_file) corpus.status = 'unprepared' db.session.commit() @@ -298,122 +285,3 @@ def prepare_corpus(corpus_id): else: flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error') # noqa return redirect(url_for('.corpus', corpus_id=corpus_id)) - - -# Following are view functions to add, view etc. exported results. -@bp.route('/result/add', methods=['GET', 'POST']) -@login_required -def add_query_result(): - ''' - View to import a result as a json file. - ''' - form = AddQueryResultForm(prefix='add-query-result-form') - if form.is_submitted(): - if not form.validate(): - return make_response(form.errors, 400) - query_result = QueryResult(creator=current_user, - description=form.description.data, - filename=form.file.data.filename, - title=form.title.data) - db.session.add(query_result) - db.session.flush() - db.session.refresh(query_result) - try: - os.makedirs(os.path.dirname(query_result.path)) - except OSError: - current_app.logger.error( - 'Make dir {} led to an OSError!'.format(query_result.path) - ) - db.session.rollback() - flash('Internal Server Error', 'error') - return make_response( - {'redirect_url': url_for('.add_query_result')}, 500) - # save the uploaded file - form.file.data.save(query_result.path) - # parse json from file - with open(query_result.path, 'r') as file: - query_result_file_content = json.load(file) - # parse json schema - # with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa - # schema = json.load(file) - # try: - # # validate imported json file - # validate(instance=query_result_file_content, schema=schema) - # except Exception: - # tasks.delete_query_result(query_result.id) - # flash('Uploaded file is invalid', 'result') - # return make_response( - # {'redirect_url': url_for('.add_query_result')}, 201) - query_result_file_content.pop('matches') - query_result_file_content.pop('cpos_lookup') - query_result.query_metadata = query_result_file_content - db.session.commit() - flash('Query result added!', 'result') - return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa - return render_template('corpora/query_results/add_query_result.html.j2', - form=form, title='Add query result') - - -@bp.route('/result/') -@login_required -def query_result(query_result_id): - query_result = QueryResult.query.get_or_404(query_result_id) - if not (query_result.creator == current_user - or current_user.is_administrator()): - abort(403) - return render_template('corpora/query_results/query_result.html.j2', - query_result=query_result, title='Query result') - - -@bp.route('/result//inspect') -@login_required -def inspect_query_result(query_result_id): - ''' - View to inspect imported result file in a corpus analysis like interface - ''' - query_result = QueryResult.query.get_or_404(query_result_id) - query_metadata = query_result.query_metadata - if not (query_result.creator == current_user - or current_user.is_administrator()): - abort(403) - display_options_form = DisplayOptionsForm( - prefix='display-options-form', - results_per_page=request.args.get('results_per_page', 30), - result_context=request.args.get('context', 20) - ) - inspect_display_options_form = InspectDisplayOptionsForm( - prefix='inspect-display-options-form' - ) - with open(query_result.path, 'r') as query_result_file: - query_result_file_content = json.load(query_result_file) - return render_template('corpora/query_results/inspect.html.j2', - query_result=query_result, - display_options_form=display_options_form, - inspect_display_options_form=inspect_display_options_form, # noqa - query_result_file_content=query_result_file_content, - query_metadata=query_metadata, - title='Inspect query result') - - -@bp.route('/result//delete') -@login_required -def delete_query_result(query_result_id): - query_result = QueryResult.query.get_or_404(query_result_id) - if not (query_result.creator == current_user - or current_user.is_administrator()): - abort(403) - flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa - tasks.delete_query_result(query_result_id) - return redirect(url_for('services.service', service="corpus_analysis")) - - -@bp.route('/result//download') -@login_required -def download_query_result(query_result_id): - query_result = QueryResult.query.get_or_404(query_result_id) - if not (query_result.creator == current_user - or current_user.is_administrator()): - abort(403) - return send_from_directory(as_attachment=True, - directory=os.path.dirname(query_result.path), - filename=query_result.filename) diff --git a/app/daemon/__init__.py b/app/daemon/__init__.py index 461e0ca8..60adcf2a 100644 --- a/app/daemon/__init__.py +++ b/app/daemon/__init__.py @@ -1,4 +1,5 @@ from app import db +from flask import current_app from time import sleep from .corpus_utils import CheckCorporaMixin from .job_utils import CheckJobsMixin @@ -8,6 +9,11 @@ import docker class Daemon(CheckCorporaMixin, CheckJobsMixin): def __init__(self): self.docker = docker.from_env() + self.docker.login( + username=current_app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'], + password=current_app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'], + registry=current_app.config['NOPAQUE_DOCKER_REGISTRY'] + ) def run(self): while True: diff --git a/app/daemon/corpus_utils.py b/app/daemon/corpus_utils.py index 5bee8848..31cad929 100644 --- a/app/daemon/corpus_utils.py +++ b/app/daemon/corpus_utils.py @@ -8,21 +8,19 @@ import shutil class CheckCorporaMixin: def check_corpora(self): corpora = Corpus.query.all() - queued_corpora = list(filter(lambda corpus: corpus.status == 'queued', corpora)) # noqa - running_corpora = list(filter(lambda corpus: corpus.status == 'running', corpora)) # noqa - start_analysis_corpora = list(filter(lambda corpus: corpus.status == 'start analysis', corpora)) # noqa - analysing_corpora = list(filter(lambda corpus: corpus.status == 'analysing', corpora)) # noqa - stop_analysis_corpora = list(filter(lambda corpus: corpus.status == 'stop analysis', corpora)) # noqa - submitted_corpora = list(filter(lambda corpus: corpus.status == 'submitted', corpora)) # noqa - for corpus in submitted_corpora: + for corpus in (x for x in corpora if x.status == 'submitted'): self.create_build_corpus_service(corpus) - for corpus in queued_corpora + running_corpora: + for corpus in (x for x in corpora if x.status == 'queued' or x.status == 'running'): # noqa self.checkout_build_corpus_service(corpus) - for corpus in start_analysis_corpora: - self.create_cqpserver_container(corpus) - for corpus in analysing_corpora: + for corpus in (x for x in corpora if x.status == 'prepared' and x.num_analysis_sessions > 0): # noqa + corpus.status = 'start analysis' + for corpus in (x for x in corpora if x.status == 'analysing' and x.num_analysis_sessions == 0): # noqa + corpus.status = 'stop analysis' + for corpus in (x for x in corpora if x.status == 'analysing'): self.checkout_analysing_corpus_container(corpus) - for corpus in stop_analysis_corpora: + for corpus in (x for x in corpora if x.status == 'start analysis'): + self.create_cqpserver_container(corpus) + for corpus in (x for x in corpora if x.status == 'stop analysis'): self.remove_cqpserver_container(corpus) def create_build_corpus_service(self, corpus): @@ -32,7 +30,7 @@ class CheckCorporaMixin: ''' ## Constraints ## ''' constraints = ['node.role==worker'] ''' ## Image ## ''' - image = current_app.config['DOCKER_IMAGE_PREFIX'] + 'cqpserver:latest' + image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cqpserver:r1674' # noqa ''' ## Labels ## ''' labels = { 'origin': current_app.config['SERVER_NAME'], @@ -43,27 +41,24 @@ class CheckCorporaMixin: ''' ### Corpus file mount ### ''' corpus_file_source = os.path.join(corpus.path, 'merged', 'corpus.vrt') corpus_file_target = '/root/files/corpus.vrt' - corpus_file_mount = \ - corpus_file_source + ':' + corpus_file_target + ':ro' + corpus_file_mount = f'{corpus_file_source}:{corpus_file_target}:ro' ''' ### Corpus data mount ### ''' corpus_data_source = os.path.join(corpus.path, 'data') corpus_data_target = '/corpora/data' - corpus_data_mount = \ - corpus_data_source + ':' + corpus_data_target + ':rw' + corpus_data_mount = f'{corpus_data_source}:{corpus_data_target}:rw' # Make sure that their is no data in the corpus data directory shutil.rmtree(corpus_data_source, ignore_errors=True) os.mkdir(corpus_data_source) ''' ### Corpus registry mount ### ''' corpus_registry_source = os.path.join(corpus.path, 'registry') corpus_registry_target = '/usr/local/share/cwb/registry' - corpus_registry_mount = \ - corpus_registry_source + ':' + corpus_registry_target + ':rw' + corpus_registry_mount = f'{corpus_registry_source}:{corpus_registry_target}:rw' # noqa # Make sure that their is no data in the corpus registry directory shutil.rmtree(corpus_registry_source, ignore_errors=True) os.mkdir(corpus_registry_source) mounts = [corpus_file_mount, corpus_data_mount, corpus_registry_mount] ''' ## Name ## ''' - name = 'build-corpus_{}'.format(corpus.id) + name = f'build-corpus_{corpus.id}' ''' ## Restart policy ## ''' restart_policy = docker.types.RestartPolicy() try: @@ -78,57 +73,48 @@ class CheckCorporaMixin: ) except docker.errors.APIError as e: current_app.logger.error( - 'Create "{}" service raised '.format(name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) + f'Create service "{name}" failed ' + + f'due to "docker.errors.APIError": {e}' ) - else: - corpus.status = 'queued' + return + corpus.status = 'queued' def checkout_build_corpus_service(self, corpus): - service_name = 'build-corpus_{}'.format(corpus.id) + service_name = f'build-corpus_{corpus.id}' try: service = self.docker.services.get(service_name) - except docker.errors.NotFound: + except docker.errors.NotFound as e: current_app.logger.error( - 'Get "{}" service raised '.format(service_name) - + '"docker.errors.NotFound" The service does not exist. ' - + '(corpus.status: {} -> failed)'.format(corpus.status) + f'Get service "{service_name}" failed ' + + f'due to "docker.errors.NotFound": {e}' ) corpus.status = 'failed' + return except docker.errors.APIError as e: current_app.logger.error( - 'Get "{}" service raised '.format(service_name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) - ) - except docker.errors.InvalidVersion: - current_app.logger.error( - 'Get "{}" service raised '.format(service_name) - + '"docker.errors.InvalidVersion" One of the arguments is ' - + 'not supported with the current API version.' + f'Get service "{service_name}" failed ' + + f'due to "docker.errors.APIError": {e}' ) + service_tasks = service.tasks() + if not service_tasks: + return + task_state = service_tasks[0].get('Status').get('State') + if corpus.status == 'queued' and task_state != 'pending': + corpus.status = 'running' + return + elif corpus.status == 'running' and task_state == 'complete': + corpus.status = 'prepared' + elif corpus.status == 'running' and task_state == 'failed': + corpus.status = 'failed' else: - service_tasks = service.tasks() - if not service_tasks: - return - task_state = service_tasks[0].get('Status').get('State') - if corpus.status == 'queued' and task_state != 'pending': - corpus.status = 'running' - elif (corpus.status == 'running' - and task_state in ['complete', 'failed']): - try: - service.remove() - except docker.errors.APIError as e: - current_app.logger.error( - 'Remove "{}" service raised '.format(service_name) - + '"docker.errors.APIError" The server returned an error. ' # noqa - + 'Details: {}'.format(e) - ) - return - else: - corpus.status = \ - 'prepared' if task_state == 'complete' else 'failed' + return + try: + service.remove() + except docker.errors.APIError as e: + current_app.logger.error( + f'Remove service "{service_name}" failed ' + + f'due to "docker.errors.APIError": {e}' + ) def create_cqpserver_container(self, corpus): ''' # Docker container settings # ''' @@ -137,22 +123,20 @@ class CheckCorporaMixin: ''' ## Detach ## ''' detach = True ''' ## Image ## ''' - image = current_app.config['DOCKER_IMAGE_PREFIX'] + 'cqpserver:latest' + image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cqpserver:r1674' # noqa ''' ## Name ## ''' - name = 'cqpserver_{}'.format(corpus.id) + name = f'cqpserver_{corpus.id}' ''' ## Network ## ''' network = 'nopaque_default' ''' ## Volumes ## ''' ''' ### Corpus data volume ### ''' corpus_data_source = os.path.join(corpus.path, 'data') corpus_data_target = '/corpora/data' - corpus_data_volume = \ - corpus_data_source + ':' + corpus_data_target + ':rw' + corpus_data_volume = f'{corpus_data_source}:{corpus_data_target}:rw' ''' ### Corpus registry volume ### ''' corpus_registry_source = os.path.join(corpus.path, 'registry') corpus_registry_target = '/usr/local/share/cwb/registry' - corpus_registry_volume = \ - corpus_registry_source + ':' + corpus_registry_target + ':rw' + corpus_registry_volume = f'{corpus_registry_source}:{corpus_registry_target}:rw' # noqa volumes = [corpus_data_volume, corpus_registry_volume] # Check if a cqpserver container already exists. If this is the case, # remove it and create a new one @@ -162,9 +146,8 @@ class CheckCorporaMixin: pass except docker.errors.APIError as e: current_app.logger.error( - 'Get "{}" container raised '.format(name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) + f'Get container "{name}" failed ' + + f'due to "docker.errors.APIError": {e}' ) return else: @@ -172,77 +155,68 @@ class CheckCorporaMixin: container.remove(force=True) except docker.errors.APIError as e: current_app.logger.error( - 'Remove "{}" container raised '.format(name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) + f'Remove container "{name}" failed ' + + f'due to "docker.errors.APIError": {e}' ) return try: - self.docker.containers.run(image, command=command, detach=detach, - volumes=volumes, name=name, - network=network) - except docker.errors.ContainerError: - # This case should not occur, because detach is True. + self.docker.containers.run( + image, + command=command, + detach=detach, + volumes=volumes, + name=name, + network=network + ) + except docker.errors.ImageNotFound as e: current_app.logger.error( - 'Run "{}" container raised '.format(name) - + '"docker.errors.ContainerError" The container exits with a ' - + 'non-zero exit code and detach is False.' - ) - corpus.status = 'failed' - except docker.errors.ImageNotFound: - current_app.logger.error( - 'Run "{}" container raised '.format(name) - + '"docker.errors.ImageNotFound" The specified image does not ' - + 'exist.' + f'Run container "{name}" failed ' + + f'due to "docker.errors.ImageNotFound" error: {e}' ) corpus.status = 'failed' + return except docker.errors.APIError as e: current_app.logger.error( - 'Run "{}" container raised '.format(name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) + f'Run container "{name}" failed ' + + f'due to "docker.errors.APIError" error: {e}' ) - else: - corpus.status = 'analysing' + return + corpus.status = 'analysing' def checkout_analysing_corpus_container(self, corpus): - container_name = 'cqpserver_{}'.format(corpus.id) + container_name = f'cqpserver_{corpus.id}' try: self.docker.containers.get(container_name) - except docker.errors.NotFound: + except docker.errors.NotFound as e: current_app.logger.error( - 'Could not find "{}" but the corpus state is "analysing".' + f'Get container "{container_name}" failed ' + + f'due to "docker.errors.NotFound": {e}' ) + corpus.num_analysis_sessions = 0 corpus.status = 'prepared' except docker.errors.APIError as e: current_app.logger.error( - 'Get "{}" container raised '.format(container_name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) + f'Get container "{container_name}" failed ' + + f'due to "docker.errors.APIError": {e}' ) - return def remove_cqpserver_container(self, corpus): - container_name = 'cqpserver_{}'.format(corpus.id) + container_name = f'cqpserver_{corpus.id}' try: container = self.docker.containers.get(container_name) except docker.errors.NotFound: - pass + corpus.status = 'prepared' + return except docker.errors.APIError as e: current_app.logger.error( - 'Get "{}" container raised '.format(container_name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) + f'Get container "{container_name}" failed ' + + f'due to "docker.errors.APIError": {e}' ) return - else: - try: - container.remove(force=True) - except docker.errors.APIError as e: - current_app.logger.error( - 'Remove "{}" container raised '.format(container_name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) - ) - return - corpus.status = 'prepared' + try: + container.remove(force=True) + except docker.errors.APIError as e: + current_app.logger.error( + f'Remove container "{container_name}" failed ' + + f'due to "docker.errors.APIError": {e}' + ) diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index 47424a81..78bae839 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -12,15 +12,11 @@ import shutil class CheckJobsMixin: def check_jobs(self): jobs = Job.query.all() - canceling_jobs = list(filter(lambda job: job.status == 'canceling', jobs)) # noqa - queued_jobs = list(filter(lambda job: job.status == 'queued', jobs)) - running_jobs = list(filter(lambda job: job.status == 'running', jobs)) - submitted_jobs = list(filter(lambda job: job.status == 'submitted', jobs)) # noqa - for job in submitted_jobs: + for job in (x for x in jobs if x.status == 'submitted'): self.create_job_service(job) - for job in queued_jobs + running_jobs: + for job in (x for x in jobs if x.status in ['queued', 'running']): self.checkout_job_service(job) - for job in canceling_jobs: + for job in (x for x in jobs if x.status == 'canceling'): self.remove_job_service(job) def create_job_service(self, job): @@ -30,26 +26,23 @@ class CheckJobsMixin: mem_mb = 2048 n_cores = 2 executable = 'file-setup' - image = (current_app.config['DOCKER_IMAGE_PREFIX'] - + 'file-setup:' + job.service_version) + image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}file-setup:{job.service_version}' # noqa elif job.service == 'ocr': mem_mb = 4096 n_cores = 4 executable = 'ocr' - image = (current_app.config['DOCKER_IMAGE_PREFIX'] - + 'ocr:' + job.service_version) + image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}ocr:{job.service_version}' # noqa elif job.service == 'nlp': mem_mb = 2048 n_cores = 2 executable = 'nlp' - image = (current_app.config['DOCKER_IMAGE_PREFIX'] - + 'nlp:' + job.service_version) + image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}nlp:{job.service_version}' # noqa ''' ## Command ## ''' - command = '{} -i /input -o /output'.format(executable) + command = f'{executable} -i /input -o /output' command += ' --log-dir /input' - command += ' --mem-mb {}'.format(mem_mb) - command += ' --n-cores {}'.format(n_cores) - command += ' --zip [' + job.service + ']_' + secure_filename(job.title) + command += f' --mem-mb {mem_mb}' + command += f' --n-cores {n_cores}' + command += f' --zip [{job.service}]_{secure_filename(job.title)}' command += ' ' + ' '.join(json.loads(job.service_args)) ''' ## Constraints ## ''' constraints = ['node.role==worker'] @@ -64,18 +57,18 @@ class CheckJobsMixin: input_mount_source = job.path input_mount_target = '/input' if job.service == 'file-setup': - input_mount_target += '/' + secure_filename(job.title) - input_mount = input_mount_source + ':' + input_mount_target + ':rw' + input_mount_target += f'/{secure_filename(job.title)}' + input_mount = f'{input_mount_source}:{input_mount_target}:rw' ''' ### Output mount ### ''' output_mount_source = os.path.join(job.path, 'output') output_mount_target = '/output' - output_mount = output_mount_source + ':' + output_mount_target + ':rw' + output_mount = f'{output_mount_source}:{output_mount_target}:rw' # Make sure that their is no data in the output directory shutil.rmtree(output_mount_source, ignore_errors=True) os.makedirs(output_mount_source) mounts = [input_mount, output_mount] ''' ## Name ## ''' - name = 'job_{}'.format(job.id) + name = f'job_{job.id}' ''' ## Resources ## ''' resources = docker.types.Resources( cpu_reservation=n_cores * (10 ** 9), @@ -96,104 +89,83 @@ class CheckJobsMixin: ) except docker.errors.APIError as e: current_app.logger.error( - 'Create "{}" service raised '.format(name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) + f'Create service "{name}" failed ' + + f'due to "docker.errors.APIError": {e}' ) return - else: - job.status = 'queued' + job.status = 'queued' def checkout_job_service(self, job): - service_name = 'job_{}'.format(job.id) + service_name = f'job_{job.id}' try: service = self.docker.services.get(service_name) - except docker.errors.NotFound: + except docker.errors.NotFound as e: current_app.logger.error( - 'Get "{}" service raised '.format(service_name) - + '"docker.errors.NotFound" The service does not exist. ' - + '(job.status: {} -> failed)'.format(job.status) + f'Get service "{service_name}" failed ' + + f'due to "docker.errors.NotFound": {e}' ) job.status = 'failed' + return except docker.errors.APIError as e: current_app.logger.error( - 'Get "{}" service raised '.format(service_name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) + f'Get service "{service_name}" failed ' + + f'due to "docker.errors.APIError": {e}' ) return - except docker.errors.InvalidVersion: - current_app.logger.error( - 'Get "{}" service raised '.format(service_name) - + '"docker.errors.InvalidVersion" One of the arguments is ' - + 'not supported with the current API version.' - ) + service_tasks = service.tasks() + if not service_tasks: return + task_state = service_tasks[0].get('Status').get('State') + if job.status == 'queued' and task_state != 'pending': + job.status = 'running' + return + elif job.status == 'running' and task_state == 'complete': + job.status = 'complete' + results_dir = os.path.join(job.path, 'output') + result_files = [x for x in os.listdir(results_dir) if x.endswith('.zip')] # noqa + for result_file in result_files: + job_result = JobResult(filename=result_file, job=job) + db.session.add(job_result) + db.session.flush() + db.session.refresh(job_result) + elif job.status == 'running' and task_state == 'failed': + job.status = 'failed' else: - service_tasks = service.tasks() - if not service_tasks: - return - task_state = service_tasks[0].get('Status').get('State') - if job.status == 'queued' and task_state != 'pending': - job.status = 'running' - elif job.status == 'running' and task_state in ['complete', 'failed']: # noqa - try: - service.remove() - except docker.errors.APIError as e: - current_app.logger.error( - 'Remove "{}" service raised '.format(service_name) - + '"docker.errors.APIError" The server returned an error. ' # noqa - + 'Details: {}'.format(e) - ) - return - else: - if task_state == 'complete': - results_dir = os.path.join(job.path, 'output') - result_files = filter(lambda x: x.endswith('.zip'), - os.listdir(results_dir)) - for result_file in result_files: - job_result = JobResult(filename=result_file, job=job) # noqa - db.session.add(job_result) - db.session.flush() - db.session.refresh(job_result) - job.end_date = datetime.utcnow() - job.status = task_state + return + job.end_date = datetime.utcnow() + try: + service.remove() + except docker.errors.APIError as e: + current_app.logger.error( + f'Remove service "{service_name}" failed ' + + f'due to "docker.errors.APIError": {e}' + ) def remove_job_service(self, job): - service_name = 'job_{}'.format(job.id) + service_name = f'job_{job.id}' try: service = self.docker.services.get(service_name) except docker.errors.NotFound: job.status = 'canceled' + return except docker.errors.APIError as e: current_app.logger.error( - 'Get "{}" service raised '.format(service_name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) + f'Get service "{service_name}" failed ' + + f'due to "docker.errors.APIError": {e}' ) return - except docker.errors.InvalidVersion: + try: + service.update(mounts=None) + except docker.errors.APIError as e: current_app.logger.error( - 'Get "{}" service raised '.format(service_name) - + '"docker.errors.InvalidVersion" One of the arguments is ' - + 'not supported with the current API version.' + f'Update service "{service_name}" failed ' + + f'due to "docker.errors.APIError": {e}' ) return - else: - try: - service.update(mounts=None) - except docker.errors.APIError as e: - current_app.logger.error( - 'Update "{}" service raised '.format(service_name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) - ) - return - try: - service.remove() - except docker.errors.APIError as e: - current_app.logger.error( - 'Remove "{}" service raised '.format(service_name) - + '"docker.errors.APIError" The server returned an error. ' - + 'Details: {}'.format(e) - ) + try: + service.remove() + except docker.errors.APIError as e: + current_app.logger.error( + f'Remove "{service_name}" service failed ' + + f'due to "docker.errors.APIError": {e}' + ) diff --git a/app/events/socketio.py b/app/events/socketio.py index ff7f787a..81f40533 100644 --- a/app/events/socketio.py +++ b/app/events/socketio.py @@ -1,6 +1,6 @@ from flask import request from flask_login import current_user -from flask_socketio import join_room, leave_room +from flask_socketio import join_room from .. import socketio from ..decorators import socketio_login_required from ..models import User @@ -25,7 +25,7 @@ def socketio_connect(): ' On connect the sid is saved in the sessions list. ''' sessions.append(request.sid) - return {'code': 200, 'msg': 'OK'} + # return {'code': 200, 'msg': 'OK'} @socketio.on('disconnect') @@ -37,7 +37,7 @@ def socketio_disconnect(): sessions.remove(request.sid) except ValueError: pass - return {'code': 200, 'msg': 'OK'} + # return {'code': 200, 'msg': 'OK'} @socketio.on('start_user_session') diff --git a/app/models.py b/app/models.py index 91a812b6..0cc4e83c 100644 --- a/app/models.py +++ b/app/models.py @@ -567,16 +567,18 @@ class Corpus(db.Model): user_id = db.Column(db.Integer, db.ForeignKey('users.id')) # Fields creation_date = db.Column(db.DateTime(), default=datetime.utcnow) - current_nr_of_tokens = db.Column(db.Integer, default=0) description = db.Column(db.String(255)) last_edited_date = db.Column(db.DateTime(), default=datetime.utcnow) - max_nr_of_tokens = 2147483647 status = db.Column(db.String(16), default='unprepared') title = db.Column(db.String(32)) + num_analysis_sessions = db.Column(db.Integer, default=0) + num_tokens = db.Column(db.Integer, default=0) archive_file = db.Column(db.String(255)) # Relationships files = db.relationship('CorpusFile', backref='corpus', lazy='dynamic', cascade='save-update, merge, delete') + # Python class variables + max_num_tokens = 2147483647 @property def analysis_url(self): @@ -601,12 +603,13 @@ class Corpus(db.Model): 'id': self.id, 'user_id': self.user_id, 'creation_date': self.creation_date.isoformat() + 'Z', - 'current_nr_of_tokens': self.current_nr_of_tokens, 'description': self.description, + 'max_num_tokens': self.max_num_tokens, + 'num_analysis_sessions': self.num_analysis_sessions, + 'num_tokens': self.num_tokens, 'status': self.status, 'last_edited_date': self.last_edited_date.isoformat() + 'Z', - 'max_nr_of_tokens': self.max_nr_of_tokens, - 'title': self.title, + 'title': self.title } if include_relationships: dict_corpus['files'] = {file.id: file.to_dict() @@ -617,30 +620,25 @@ class Corpus(db.Model): output_dir = os.path.join(self.path, 'merged') shutil.rmtree(output_dir, ignore_errors=True) os.mkdir(output_dir) - master_element_tree = ET.ElementTree( - ET.fromstring('\n') - ) + output_file = os.path.join(output_dir, 'corpus.vrt') + corpus_element = ET.fromstring('\n') for corpus_file in self.files: element_tree = ET.parse(corpus_file.path) text_node = element_tree.find('text') - text_node.set('address', corpus_file.address or "NULL") + text_node.set('address', corpus_file.address or 'NULL') text_node.set('author', corpus_file.author) - text_node.set('booktitle', corpus_file.booktitle or "NULL") - text_node.set('chapter', corpus_file.chapter or "NULL") - text_node.set('editor', corpus_file.editor or "NULL") - text_node.set('institution', corpus_file.institution or "NULL") - text_node.set('journal', corpus_file.journal or "NULL") - text_node.set('pages', corpus_file.pages or "NULL") - text_node.set('publisher', corpus_file.publisher or "NULL") + text_node.set('booktitle', corpus_file.booktitle or 'NULL') + text_node.set('chapter', corpus_file.chapter or 'NULL') + text_node.set('editor', corpus_file.editor or 'NULL') + text_node.set('institution', corpus_file.institution or 'NULL') + text_node.set('journal', corpus_file.journal or 'NULL') + text_node.set('pages', corpus_file.pages or 'NULL') + text_node.set('publisher', corpus_file.publisher or 'NULL') text_node.set('publishing_year', str(corpus_file.publishing_year)) - text_node.set('school', corpus_file.school or "NULL") + text_node.set('school', corpus_file.school or 'NULL') text_node.set('title', corpus_file.title) - element_tree.write(corpus_file.path) - master_element_tree.getroot().insert(1, text_node) - output_file = os.path.join(output_dir, 'corpus.vrt') - master_element_tree.write(output_file, - xml_declaration=True, - encoding='utf-8') + corpus_element.insert(1, text_node) + ET.ElementTree(corpus_element).write(output_file, encoding='utf-8') self.last_edited_date = datetime.utcnow() self.status = 'submitted' diff --git a/app/static/css/nopaque.css b/app/static/css/nopaque.css index 862d1d5d..41dfd628 100644 --- a/app/static/css/nopaque.css +++ b/app/static/css/nopaque.css @@ -112,3 +112,6 @@ h1 .nopaque-icons, h2 .nopaque-icons, h3 .nopaque-icons, h4 .nopaque-icons, .nopaque-icons.service-icon[data-service="ocr"]:empty:before {content: "F";} .status-text[data-status]:empty:before {content: attr(data-status);} + +.hoverable {cursor: pointer;} +.s-attr.chip .p-attr.chip {background-color: inherit;} diff --git a/app/static/js/nopaque/CorpusAnalysis/CQiClient.js b/app/static/js/nopaque/CorpusAnalysis/CQiClient.js new file mode 100644 index 00000000..42efef51 --- /dev/null +++ b/app/static/js/nopaque/CorpusAnalysis/CQiClient.js @@ -0,0 +1,439 @@ +class CQiClient { + constructor(corpusId) { + this.socket = io( + '/corpora/corpus/corpus_analysis', + {auth: {corpus_id: corpusId}, transports: ['websocket'], upgrade: false} + ); + this.connected = false; + this.corpora = new CQiCorpusCollection(this.socket); + } + + connect() { + return new Promise((resolve, reject) => { + this.socket.emit('cqi.connect', response => { + if (response.code === 200) { + this.connected = true; + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + + disconnect() { + return new Promise((resolve, reject) => { + this.socket.emit('cqi.disconnect', response => { + if (response.code === 200) { + this.connected = false; + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + + ping() { + return new Promise((resolve, reject) => { + this.socket.emit('cqi.ping', response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } +} + + +class CQiCorpusCollection { + constructor(socket) { + this.socket = socket; + } + + get(corpusName) { + return new Promise((resolve, reject) => { + let args = {corpus_name: corpusName}; + this.socket.emit('cqi.corpora.get', args, response => { + if (response.code === 200) { + resolve(new CQiCorpus(this.socket, response.payload)); + } else { + reject(response); + } + }); + }); + } + + list() { + return new Promise((resolve, reject) => { + this.socket.emit('cqi.corpora.list', response => { + if (response.code === 200) { + resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, x);})); + } else { + reject(response); + } + }); + }); + } +} + + +class CQiCorpus { + constructor(socket, attrs) { + this.socket = socket; + this.charset = attrs.charset; + this.name = attrs.name; + this.properties = attrs.properties; + this.size = attrs.size; + this.alignmentAttributes = new CQiAlignmentAttributeCollection(this.socket, this); + this.positionalAttributes = new CQiPositionalAttributeCollection(this.socket, this); + this.structuralAttributes = new CQiStructuralAttributeCollection(this.socket, this); + this.subcorpora = new CQiSubcorpusCollection(this.socket, this); + } + + drop() { + return new Promise((resolve, reject) => { + let args = {corpus_name: this.name}; + this.socket.emit('cqi.corpora.corpus.drop', args, response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + + query(subcorpus_name, queryString) { + return new Promise((resolve, reject) => { + let args = { + corpus_name: this.name, + subcorpus_name: subcorpus_name, + query: queryString + }; + this.socket.emit('cqi.corpora.corpus.query', args, response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + + // nopaque specific CQi extension + paginate(page=1, perPage=20) { + return new Promise((resolve, reject) => { + let args = {corpus_name: this.name, page: page, per_page: perPage}; + this.socket.emit('cqi.corpora.corpus.paginate', args, response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + + updateDb() { + let args = {corpus_name: this.name}; + this.socket.emit('cqi.corpora.corpus.update_db', args); + } +} + + +class CQiAlignmentAttributeCollection { + constructor(socket, corpus) { + this.corpus = corpus; + this.socket = socket; + } + + get(alignmentAttributeName) { + return new Promise((resolve, reject) => { + let args = {corpus_name: this.corpus.name, + alignment_attribute_name: alignmentAttributeName}; + this.socket.emit('cqi.corpora.corpus.alignment_attributes.get', args, response => { + if (response.code === 200) { + resolve(new CQiAlignmentAttribute(this.socket, this.corpus, response.payload)); + } else { + reject(response); + } + }); + }); + } + + list() { + return new Promise((resolve, reject) => { + let args = {corpus_name: this.corpus.name}; + this.socket.emit('cqi.corpus.alignment_attributes.list', args, response => { + if (response.code === 200) { + resolve(response.payload.map(x => {return new CQiAlignmentAttribute(this.socket, this.corpus, x);})); + } else { + reject(response); + } + }); + }); + } +} + + +class CQiAlignmentAttribute { + constructor(socket, corpus, attrs) { + this.socket = socket; + this.corpus = corpus; + this.name = attrs.name; + this.size = attrs.size; + } +} + + +class CQiPositionalAttributeCollection { + constructor(socket, corpus) { + this.corpus = corpus; + this.socket = socket; + } + + get(positionalAttributeName) { + return new Promise((resolve, reject) => { + let args = { + corpus_name: this.corpus.name, + positional_attribute_name: positionalAttributeName + }; + this.socket.emit('cqi.corpora.corpus.positional_attributes.get', args, response => { + if (response.code === 200) { + resolve(new CQiPositionalAttribute(this.socket, this.corpus, response.payload)); + } else { + reject(response); + } + }); + }); + } + + list() { + return new Promise((resolve, reject) => { + let args = {corpus_name: this.corpus.name}; + this.socket.emit('cqi.corpus.positional_attributes.list', args, response => { + if (response.code === 200) { + resolve(response.payload.map(x => {return new CQiPositionalAttribute(this.socket, this.corpus, x);})); + } else { + reject(response); + } + }); + }); + } +} + + +class CQiPositionalAttribute { + constructor(socket, corpus, attrs) { + this.socket = socket; + this.corpus = corpus; + this.lexiconSize = attrs.lexicon_size; + this.name = attrs.name; + this.size = attrs.size; + } +} + + +class CQiStructuralAttributeCollection { + constructor(socket, corpus) { + this.corpus = corpus; + this.socket = socket; + } + + get(structuralAttributeName) { + return new Promise((resolve, reject) => { + let args = { + corpus_name: this.corpus.name, + structural_attribute_name: structuralAttributeName + }; + this.socket.emit('cqi.corpora.corpus.structural_attributes.get', args, response => { + if (response.code === 200) { + resolve(new CQiStructuralAttribute(this.socket, this.corpus, response.payload)); + } else { + reject(response); + } + }); + }); + } + + list() { + return new Promise((resolve, reject) => { + let args = {corpus_name: this.corpus.name}; + this.socket.emit('cqi.corpus.structural_attributes.list', args, response => { + if (response.code === 200) { + resolve(response.payload.map(x => {return new CQiStructuralAttribute(this.socket, this.corpus, x);})); + } else { + reject(response); + } + }); + }); + } +} + + +class CQiStructuralAttribute { + constructor(socket, corpus, attrs) { + this.socket = socket; + this.corpus = corpus; + this.hasValues = attrs.has_values; + this.name = attrs.name; + this.size = attrs.size; + } +} + + +class CQiSubcorpusCollection { + constructor(socket, corpus) { + this.corpus = corpus; + this.socket = socket; + } + + get(subcorpusName) { + return new Promise((resolve, reject) => { + let args = {corpus_name: this.corpus.name, subcorpus_name: subcorpusName}; + this.socket.emit('cqi.corpora.corpus.subcorpora.get', args, response => { + if (response.code === 200) { + resolve(new CQiSubcorpus(this.socket, this.corpus, response.payload)); + } else { + reject(response); + } + }); + }); + } + + list() { + return new Promise((resolve, reject) => { + let args = {corpus_name: this.corpus.name}; + this.socket.emit('cqi.corpora.corpus.subcorpora.list', args, response => { + if (response.code === 200) { + resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, this.corpus, x);})); + } else { + reject(response); + } + }); + }); + } +} + + +class CQiSubcorpus { + constructor(socket, corpus, attrs) { + this.socket = socket; + this.corpus = corpus; + this.fields = attrs.fields; + this.name = attrs.name; + this.size = attrs.size; + } + + drop() { + return new Promise((resolve, reject) => { + let args = {corpus_name: this.corpus.name, subcorpus_name: this.name}; + this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.drop', args, response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + + dump(field, first, last) { + return new Promise((resolve, reject) => { + let args = { + corpus_name: this.corpus.name, + subcorpus_name: this.name, + field: field, + first: first, + last: last + }; + this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.dump', args, response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + + export(context=50) { + return new Promise((resolve, reject) => { + let args = { + corpus_name: this.corpus.name, + subcorpus_name: this.name, + context: context + }; + this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.export', args, response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + + fdst_1(cutoff, field, attribute) { + return new Promise((resolve, reject) => { + let args = { + corpus_name: this.corpus.name, + subcorpus_name: this.name, + cutoff: cutoff, + field: field, + attribute: attribute + }; + this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + + fdst_2(cutoff, field1, attribute1, field2, attribute2) { + return new Promise((resolve, reject) => { + let args = { + corpus_name: this.corpus.name, + subcorpus_name: this.name, + cutoff: cutoff, + field1: field1, + attribute1: attribute1, + field2: field2, + attribute2: attribute2 + }; + this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + + // nopaque specific CQi extension + paginate(page=1, perPage=20, context=50) { + return new Promise((resolve, reject) => { + let args = { + corpus_name: this.corpus.name, + subcorpus_name: this.name, + page: page, + per_page: perPage, + context: context + }; + this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.paginate', args, response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } +} diff --git a/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisApp.js b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisApp.js new file mode 100644 index 00000000..71a4780a --- /dev/null +++ b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisApp.js @@ -0,0 +1,118 @@ +class CorpusAnalysisApp { + static entitiyColors = { + PERSON: '#a6e22d', + PER: '#a6e22d', + NORP: '#ef60b4', + FACILITY: '#43c6fc', + ORG: '#43c6fc', + GPE: '#fd9720', + LOC: '#fd9720', + PRODUCT: '#a99dfb', + MISC: '#a99dfb', + EVENT: ':#fc0', + WORK_OF_ART: '#fc0', + LANGUAGE: '#fc0', + DATE: '#2fbbab', + TIME: '#2fbbab', + PERCENT: '#bbb', + MONEY: '#bbb', + QUANTITY: '#bbb', + ORDINAL: '#bbb', + CARDINAL: '#bbb' + }; + + constructor(corpusId) { + this.data = {}; + + // HTML elements + this.elements = { + container: document.querySelector('#corpus-analysis-app-container'), + extensionTabs: document.querySelector('#corpus-analysis-app-extension-tabs'), + initModal: document.querySelector('#corpus-analysis-app-init-modal'), + initError: document.querySelector('#corpus-analysis-app-init-error'), + initProgress: document.querySelector('#corpus-analysis-app-init-progress'), + overview: document.querySelector('#corpus-analysis-app-overview') + }; + // Materialize elements + this.elements.m = { + extensionTabs: M.Tabs.init(this.elements.extensionTabs), + initModal: M.Modal.init(this.elements.initModal, {dismissible: false}) + }; + + this.extensions = {}; + + this.settings = { + corpusId: corpusId + }; + } + + init() { + this.disableActionElements(); + this.elements.m.initModal.open(); + // Init data + this.data.cQiClient = new CQiClient(this.settings.corpusId); + this.data.cQiClient.connect() + .then(cQiStatus => { + return this.data.cQiClient.corpora.get('CORPUS'); + }) + .then( + cQiCorpus => { + this.data.corpus = {o: cQiCorpus}; + // TODO: Don't do this here + cQiCorpus.updateDb(); + this.enableActionElements(); + for (let extension of Object.values(this.extensions)) {extension.init();} + this.elements.m.initModal.close(); + }, + cQiError => { + this.elements.initError.innerText = JSON.stringify(cQiError); + this.elements.initError.classList.remove('hide'); + this.elements.initProgress.classList.add('hide'); + if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) { + nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error'); + } + } + ); + // Add event listeners + for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) { + extensionSelectorElement.addEventListener('click', () => { + this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target); + }); + } + } + + registerExtension(extension) { + if (extension.name in this.extensions) { + console.error(`Can't register extension ${extension.name}: Already registered`); + return; + } + this.extensions[extension.name] = extension; + if ('cQiClient' in this.data && this.data.cQiClient.connected) {extension.init();} + } + + disableActionElements() { + let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action'); + for (let actionElement of actionElements) { + if (actionElement.nodeName === 'INPUT') { + actionElement.disabled = true; + } else if (actionElement.nodeName === 'SELECT') { + actionElement.parentNode.querySelector('input.select-dropdown').disabled = true; + } else { + actionElement.classList.add('disabled'); + } + } + } + + enableActionElements() { + let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action'); + for (let actionElement of actionElements) { + if (actionElement.nodeName === 'INPUT') { + actionElement.disabled = false; + } else if (actionElement.nodeName === 'SELECT') { + actionElement.parentNode.querySelector('input.select-dropdown').disabled = false; + } else { + actionElement.classList.remove('disabled'); + } + } + } +} diff --git a/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisConcordance.js b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisConcordance.js new file mode 100644 index 00000000..24a9ab53 --- /dev/null +++ b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisConcordance.js @@ -0,0 +1,432 @@ +class CorpusAnalysisConcordance { + name = 'Concordance'; + + constructor(app) { + this.app = app; + + this.data = {}; + + this.elements = { + // TODO: Prefix elements with "corpus-analysis-app-" + container: document.querySelector('#concordance-extension-container'), + error: document.querySelector('#concordance-extension-error'), + form: document.querySelector('#concordance-extension-form'), + progress: document.querySelector('#concordance-extension-progress'), + subcorpusInfo: document.querySelector('#concordance-extension-subcorpus-info'), + subcorpusActions: document.querySelector('#concordance-extension-subcorpus-actions'), + subcorpusItems: document.querySelector('#concordance-extension-subcorpus-items'), + subcorpusList: document.querySelector('#concordance-extension-subcorpus-list'), + subcorpusPagination: document.querySelector('#concordance-extension-subcorpus-pagination') + }; + + this.settings = { + context: parseInt(this.elements.form['context'].value), + perPage: parseInt(this.elements.form['per-page'].value), + selectedSubcorpus: undefined, + textStyle: parseInt(this.elements.form['text-style'].value), + tokenRepresentation: this.elements.form['token-representation'].value + }; + + this.app.registerExtension(this); + } + + init() { + // Init data + this.data.corpus = this.app.data.corpus; + this.data.subcorpora = {}; + // Add event listeners + this.elements.form.addEventListener('submit', event => { + event.preventDefault(); + this.app.disableActionElements(); + let query = this.elements.form.query.value.trim(); + let subcorpusName = this.elements.form['subcorpus-name'].value; + this.elements.error.innerText = ''; + this.elements.error.classList.add('hide'); + this.elements.progress.classList.remove('hide'); + let subcorpus = {}; + this.data.corpus.o.query(subcorpusName, query) + .then(cQiStatus => { + subcorpus.q = query; + return this.data.corpus.o.subcorpora.get(subcorpusName); + }) + .then(cQiSubcorpus => { + subcorpus.o = cQiSubcorpus; + return cQiSubcorpus.paginate(1, this.settings.perPage, this.settings.context); + }) + .then( + paginatedSubcorpus => { + subcorpus.p = paginatedSubcorpus; + if (subcorpus !== 'Last') {this.data.subcorpora.Last = subcorpus;} + this.data.subcorpora[subcorpusName] = subcorpus; + this.settings.selectedSubcorpus = subcorpusName; + this.renderSubcorpusList(); + this.renderSubcorpusInfo(); + this.renderSubcorpusActions(); + this.renderSubcorpusItems(); + this.renderSubcorpusPagination(); + this.elements.progress.classList.add('hide'); + this.app.enableActionElements(); + }, + cQiError => { + this.elements.error.innerText = JSON.stringify(cQiError); + this.elements.error.classList.remove('hide'); + if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) { + nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error'); + } + this.elements.progress.classList.add('hide'); + this.app.enableActionElements(); + } + ); + }); + this.elements.form.addEventListener('change', event => { + if (event.target === this.elements.form['context']) { + this.settings.context = parseInt(this.elements.form['context'].value); + this.elements.form.submit.click(); + } + if (event.target === this.elements.form['per-page']) { + this.settings.perPage = parseInt(this.elements.form['per-page'].value); + this.elements.form.submit.click(); + } + if (event.target === this.elements.form['text-style']) { + this.settings.textStyle = parseInt(this.elements.form['text-style'].value); + this.setTextStyle(); + } + if (event.target === this.elements.form['token-representation']) { + this.settings.tokenRepresentation = this.elements.form['token-representation'].value; + this.setTokenRepresentation(); + } + }); + } + + clearSubcorpusList() { + this.elements.subcorpusList.innerHTML = ''; + this.elements.subcorpusList.classList.add('hide'); + } + + renderSubcorpusList() { + this.clearSubcorpusList(); + for (let subcorpusName in this.data.subcorpora) { + this.elements.subcorpusList.innerHTML += ` + bookmark${subcorpusName} + `.trim(); + } + for (let subcorpusSelectorElement of this.elements.subcorpusList.querySelectorAll('.subcorpus-selector')) { + let subcorpusName = subcorpusSelectorElement.dataset.target; + if (subcorpusName === this.settings.selectedSubcorpus) { + subcorpusSelectorElement.classList.add('disabled'); + continue; + } + subcorpusSelectorElement.addEventListener('click', () => { + this.settings.selectedSubcorpus = subcorpusName; + this.elements.progress.classList.remove('hide'); + this.renderSubcorpusList(); + this.renderSubcorpusInfo(); + this.renderSubcorpusActions(); + this.renderSubcorpusActions(); + this.renderSubcorpusItems(); + this.renderSubcorpusPagination(); + this.elements.progress.classList.add('hide'); + }); + } + this.elements.subcorpusList.classList.remove('hide'); + } + + clearSubcorpusInfo() { + this.elements.subcorpusInfo.innerHTML = ''; + this.elements.subcorpusInfo.classList.add('hide'); + } + + renderSubcorpusInfo() { + let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; + this.clearSubcorpusInfo(); + this.elements.subcorpusInfo.innerHTML = `${subcorpus.p.total} matches found for ${subcorpus.q.replace(//g, ">")}`; + this.elements.subcorpusInfo.classList.remove('hide'); + } + + clearSubcorpusActions() { + for (let tooltippedElement of this.elements.subcorpusActions.querySelectorAll('.tooltipped')) { + M.Tooltip.getInstance(tooltippedElement).destroy(); + } + this.elements.subcorpusActions.innerHTML = ''; + } + + renderSubcorpusActions() { + this.clearSubcorpusActions(); + this.elements.subcorpusActions.innerHTML += ` + + file_download + + + delete + + `.trim(); + M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped')); + this.elements.subcorpusActions.querySelector('.delete-subcorpus-trigger').addEventListener('click', event => { + event.preventDefault(); + let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; + subcorpus.o.drop().then( + cQiStatus => { + nopaque.appClient.flash(`${subcorpus.o.name} deleted`, 'corpus'); + delete this.data.subcorpora[subcorpus.o.name]; + this.settings.selectedSubcorpus = undefined; + for (let subcorpusName in this.data.subcorpora) { + this.settings.selectedSubcorpus = subcorpusName; + break; + } + this.renderSubcorpusList(); + if (this.settings.selectedSubcorpus) { + this.renderSubcorpusInfo(); + this.renderSubcorpusActions(); + this.renderSubcorpusItems(); + this.renderSubcorpusPagination(); + } else { + this.clearSubcorpusInfo(); + this.clearSubcorpusActions(); + this.clearSubcorpusItems(); + this.clearSubcorpusPagination(); + } + }, + cQiError => { + nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error'); + } + ); + }); + } + + clearSubcorpusItems() { + // Destroy with .p-attr elements associated Materialize tooltips + for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr.tooltipped')) { + M.Tooltip.getInstance(pAttrElement)?.destroy(); + } + this.elements.subcorpusItems.innerHTML = ` + + +

+ searchNothing here...
+ No matches available. +

+ + + `.trim(); + } + + renderSubcorpusItems() { + let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; + this.clearSubcorpusItems(); + for (let item of subcorpus.p.items) { + this.elements.subcorpusItems.innerHTML += ` + + ${item.num} + ${this.foo(...item.c)} + ${item.lc ? this.cposRange2HTML(...item.lc) : ''} + ${this.cposRange2HTML(...item.c)} + ${item.rc ? this.cposRange2HTML(...item.rc) : ''} + + search + add + + + `.trim(); + } + this.setTextStyle(); + this.setTokenRepresentation(); + for (let gotoReaderTriggerElement of this.elements.subcorpusItems.querySelectorAll('.goto-reader-trigger')) { + gotoReaderTriggerElement.addEventListener('click', event => { + event.preventDefault(); + let corpusAnalysisReader = this.app.extensions.Reader; + let itemId = parseInt(gotoReaderTriggerElement.closest('.item').dataset.id); + let item = undefined; + for (let x of subcorpus.p.items) {if (x.num === itemId) {item = x;}} + let page = Math.max(1, Math.ceil(item.c[0] / corpusAnalysisReader.settings.perPage)); + corpusAnalysisReader.page(page, () => { + let range = new Range(); + let leftCpos = corpusAnalysisReader.data.corpus.p.items[0].includes(item.c[0]) ? item.c[0] : corpusAnalysisReader.data.corpus.p.items[0][0]; + let rightCpos = corpusAnalysisReader.data.corpus.p.items[0].includes(item.c[1]) ? item.c[1] : corpusAnalysisReader.data.corpus.p.items[0].at(-1); + let leftElement = corpusAnalysisReader.elements.corpus.querySelector(`.p-attr[data-cpos="${leftCpos}"]`); + let rightElement = corpusAnalysisReader.elements.corpus.querySelector(`.p-attr[data-cpos="${rightCpos}"]`); + range.setStartBefore(leftElement); + range.setEndAfter(rightElement); + document.getSelection().removeAllRanges(); + document.getSelection().addRange(range); + }); + this.app.elements.m.extensionTabs.select('reader-extension-container'); + }); + } + } + + clearSubcorpusPagination() { + this.elements.subcorpusPagination.innerHTML = ''; + this.elements.subcorpusPagination.classList.add('hide'); + } + + renderSubcorpusPagination() { + let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; + this.clearSubcorpusPagination(); + if (subcorpus.p.pages === 0) {return;} + this.elements.subcorpusPagination.innerHTML += ` +
  • + + first_page + +
  • + `.trim(); + this.elements.subcorpusPagination.innerHTML += ` +
  • + + chevron_left + +
  • + `.trim(); + for (let i = 1; i <= subcorpus.p.pages; i++) { + this.elements.subcorpusPagination.innerHTML += ` +
  • + ${i} +
  • + `.trim(); + } + this.elements.subcorpusPagination.innerHTML += ` +
  • + + chevron_right + +
  • + `.trim(); + this.elements.subcorpusPagination.innerHTML += ` +
  • + + last_page + +
  • + `.trim(); + for (let paginationTriggerElement of this.elements.subcorpusPagination.querySelectorAll('.pagination-trigger[data-target]')) { + paginationTriggerElement.addEventListener('click', event => { + event.preventDefault(); + this.app.disableActionElements(); + this.elements.progress.classList.remove('hide'); + let page = parseInt(paginationTriggerElement.dataset.target); + subcorpus.o.paginate(page, this.settings.perPage, this.settings.context) + .then( + paginatedSubcorpus => { + subcorpus.p = paginatedSubcorpus; + this.renderSubcorpusItems(); + this.renderSubcorpusPagination(); + this.elements.progress.classList.add('hide'); + this.app.enableActionElements(); + } + ) + }); + } + this.elements.subcorpusPagination.classList.remove('hide'); + } + + foo(firstCpos, lastCpos) { + let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; + /* Returns a list of texts occuring in this cpos range */ + let textIds = new Set(); + for (let cpos = firstCpos; cpos <= lastCpos; cpos++) { + textIds.add(subcorpus.p.lookups.cpos_lookup[cpos].text); + } + return [...textIds].map(x => subcorpus.p.lookups.text_lookup[x].title).join(', '); + } + + cposRange2HTML(firstCpos, lastCpos) { + let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; + let prevPAttr, pAttr, nextPAttr; + let isEntityStart, isEntityEnd; + let html = ''; + for (let cpos = firstCpos; cpos <= lastCpos; cpos++) { + prevPAttr = cpos > firstCpos ? subcorpus.p.lookups.cpos_lookup[cpos - 1] : null; + pAttr = subcorpus.p.lookups.cpos_lookup[cpos]; + nextPAttr = cpos < lastCpos ? subcorpus.p.lookups.cpos_lookup[cpos + 1] : null; + isEntityStart = 'ent' in pAttr && pAttr.ent !== prevPAttr?.ent; + isEntityEnd = 'ent' in pAttr && pAttr.ent !== nextPAttr?.ent; + // Add a space before pAttr + if (cpos !== firstCpos || pAttr.simple_pos !== 'PUNCT') {html += ' ';} + // Add entity start + if (isEntityStart) { + html += ``; + } + // Add pAttr + html += ``; + // Add entity end + if (isEntityEnd) { + html += ` ${subcorpus.p.lookups.ent_lookup[pAttr.ent].type}`; + html += ''; + } + } + return html; + } + + setTextStyle() { + let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; + if (this.settings.textStyle >= 0) { + // Destroy with .p-attr elements associated Materialize tooltips + for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr.tooltipped')) { + M.Tooltip.getInstance(pAttrElement)?.destroy(); + } + // Set basic styling on .p-attr elements + for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) { + pAttrElement.setAttribute('class', 'p-attr'); + } + // Set basic styling on .s-attr[data-type="ent"] elements + for (let entElement of this.elements.subcorpusItems.querySelectorAll('.s-attr[data-type="ent"]')) { + entElement.querySelector('.s-attr[data-type="ent_type"]').classList.add('hide'); + entElement.removeAttribute('style'); + entElement.setAttribute('class', 's-attr'); + } + } + if (this.settings.textStyle >= 1) { + // Set advanced styling on .s-attr[data-type="ent"] elements + for (let entElement of this.elements.subcorpusItems.querySelectorAll('.s-attr[data-type="ent"]')) { + let ent = subcorpus.p.lookups.ent_lookup[entElement.dataset.id]; + entElement.classList.add('chip'); + entElement.style.backgroundColor = CorpusAnalysisApp.entitiyColors[ent.type]; + entElement.querySelector('.s-attr[data-type="ent_type"]').classList.remove('hide'); + } + } + if (this.settings.textStyle >= 2) { + // Set advanced styling on .p-attr elements + for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) { + pAttrElement.classList.add('chip', 'hoverable', 'tooltipped'); + let cpos = pAttrElement.dataset.cpos; + let pAttr = subcorpus.p.lookups.cpos_lookup[cpos]; + let positionalPropertiesHTML = ` +

    + Positional properties
    + Token: ${cpos} + `.trim(); + let structuralPropertiesHTML = ` +

    + Structural properties + `.trim(); + for (let [property, propertyValue] of Object.entries(pAttr)) { + if (['lemma', 'ner', 'pos', 'simple_pos', 'word'].includes(property)) { + if (propertyValue === 'None') {continue;} + positionalPropertiesHTML += `
    subdirectory_arrow_right${property}: ${propertyValue}`; + } else { + structuralPropertiesHTML += `
    ${property}: ${propertyValue}`; + if (!(`${property}_lookup` in subcorpus.p.lookups)) {continue;} + for (let [subproperty, subpropertyValue] of Object.entries(subcorpus.p.lookups[`${property}_lookup`][propertyValue])) { + if (subpropertyValue === 'NULL') {continue;} + structuralPropertiesHTML += `
    subdirectory_arrow_right${subproperty}: ${subpropertyValue}` + } + } + } + positionalPropertiesHTML += '

    '; + structuralPropertiesHTML += '

    '; + M.Tooltip.init( + pAttrElement, + {html: positionalPropertiesHTML + structuralPropertiesHTML} + ); + } + } + } + + setTokenRepresentation() { + let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; + for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) { + let pAttr = subcorpus.p.lookups.cpos_lookup[pAttrElement.dataset.cpos]; + pAttrElement.innerText = pAttr[this.settings.tokenRepresentation]; + } + } +} diff --git a/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisReader.js b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisReader.js new file mode 100644 index 00000000..2b2a5843 --- /dev/null +++ b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisReader.js @@ -0,0 +1,270 @@ +class CorpusAnalysisReader { + name = 'Reader'; + + constructor(app) { + this.app = app; + + this.data = {}; + + this.elements = { + // TODO: Prefix elements with "corpus-analysis-app-" + container: document.querySelector('#reader-extension-container'), + error: document.querySelector('#reader-extension-error'), + form: document.querySelector('#reader-extension-form'), + progress: document.querySelector('#reader-extension-progress'), + corpus: document.querySelector('#reader-extension-corpus'), + corpusPagination: document.querySelector('#reader-extension-corpus-pagination') + }; + + this.settings = { + perPage: parseInt(this.elements.form['per-page'].value), + textStyle: parseInt(this.elements.form['text-style'].value), + tokenRepresentation: this.elements.form['token-representation'].value + } + + this.app.registerExtension(this); + } + + init() { + // Init data + this.data.corpus = this.app.data.corpus; + this.data.subcorpora = {}; + // Add event listeners + this.elements.form.addEventListener('submit', (event) => { + event.preventDefault(); + this.app.disableActionElements(); + this.elements.error.innerText = ''; + this.elements.error.classList.add('hide'); + this.elements.progress.classList.remove('hide'); + this.data.corpus.o.paginate(1, this.settings.perPage) + .then( + paginatedCorpus => { + this.data.corpus.p = paginatedCorpus; + this.renderCorpus(); + this.renderCorpusPagination(); + this.elements.progress.classList.add('hide'); + this.app.enableActionElements(); + }, + error => { + this.elements.error.innerText = JSON.stringify(error); + this.elements.error.classList.remove('hide'); + if ('payload' in error && 'code' in error.payload && 'msg' in error.payload) { + nopaque.appClient.flash(`${error.payload.code}: ${error.payload.msg}`, 'error'); + } + this.elements.progress.classList.add('hide'); + this.app.enableActionElements(); + } + ); + }); + this.elements.form.addEventListener('change', event => { + if (event.target === this.elements.form['per-page']) { + this.settings.perPage = parseInt(this.elements.form['per-page'].value); + this.elements.form.submit.click(); + } + if (event.target === this.elements.form['text-style']) { + this.settings.textStyle = parseInt(this.elements.form['text-style'].value); + this.setTextStyle(); + } + if (event.target === this.elements.form['token-representation']) { + this.settings.tokenRepresentation = this.elements.form['token-representation'].value; + this.setTokenRepresentation(); + } + }); + // Load initial data + this.elements.form.submit.click(); + } + + clearCorpus() { + // Destroy with .p-attr elements associated Materialize tooltips + for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr.tooltipped')) { + M.Tooltip.getInstance(pAttrElement)?.destroy(); + } + this.elements.corpus.innerHTML = ` +

    + searchNothing here...
    + No text available. +

    + `.trim(); + } + + renderCorpus() { + this.clearCorpus(); + let item = this.data.corpus.p.items[0]; + this.elements.corpus.innerHTML += ` +

    ${this.cposRange2HTML(item[0], item[item.length - 1])}

    + `.trim(); + this.setTextStyle(); + this.setTokenRepresentation(); + } + + clearCorpusPagination() { + this.elements.corpusPagination.innerHTML = ''; + this.elements.corpusPagination.classList.add('hide'); + } + + renderCorpusPagination() { + this.clearCorpusPagination(); + if (this.data.corpus.p.pages === 0) {return;} + this.elements.corpusPagination.innerHTML += ` +
  • + + first_page + +
  • + `.trim(); + this.elements.corpusPagination.innerHTML += ` +
  • + + chevron_left + +
  • + `.trim(); + for (let i = 1; i <= this.data.corpus.p.pages; i++) { + this.elements.corpusPagination.innerHTML += ` +
  • + ${i} +
  • + `.trim(); + } + this.elements.corpusPagination.innerHTML += ` +
  • + + chevron_right + +
  • + `.trim(); + this.elements.corpusPagination.innerHTML += ` +
  • + + last_page + +
  • + `.trim(); + for (let paginateTriggerElement of this.elements.corpusPagination.querySelectorAll('.pagination-trigger[data-target]')) { + paginateTriggerElement.addEventListener('click', event => { + event.preventDefault(); + let page = parseInt(paginateTriggerElement.dataset.target); + this.page(page); + }); + } + this.elements.corpusPagination.classList.remove('hide'); + } + + cposRange2HTML(firstCpos, lastCpos) { + let prevPAttr, pAttr, nextPAttr; + let isEntityStart, isEntityEnd; + let html = ''; + for (let cpos = firstCpos; cpos <= lastCpos; cpos++) { + prevPAttr = cpos > firstCpos ? this.data.corpus.p.lookups.cpos_lookup[cpos - 1] : null; + pAttr = this.data.corpus.p.lookups.cpos_lookup[cpos]; + nextPAttr = cpos < lastCpos ? this.data.corpus.p.lookups.cpos_lookup[cpos + 1] : null; + isEntityStart = 'ent' in pAttr && pAttr.ent !== prevPAttr?.ent; + isEntityEnd = 'ent' in pAttr && pAttr.ent !== nextPAttr?.ent; + // Add a space before pAttr + if (cpos !== firstCpos || pAttr.simple_pos !== 'PUNCT') {html += ' ';} + // Add entity start + if (isEntityStart) { + html += ``; + } + // Add pAttr + html += ``; + // Add entity end + if (isEntityEnd) { + html += ` ${this.data.corpus.p.lookups.ent_lookup[pAttr.ent].type}`; + html += ''; + } + } + return html; + } + + page(pageNum, callback) { + if (this.data.corpus.p.page === pageNum && typeof callback === 'function') { + callback(); + return; + } + this.app.disableActionElements(); + this.elements.progress.classList.remove('hide'); + this.data.corpus.o.paginate(pageNum, this.settings.perPage) + .then( + paginatedCorpus => { + this.data.corpus.p = paginatedCorpus; + this.renderCorpus(); + this.renderCorpusPagination(); + this.elements.progress.classList.add('hide'); + this.app.enableActionElements(); + if (typeof callback === 'function') {callback();} + } + ) + } + + setTextStyle() { + if (this.settings.textStyle >= 0) { + // Destroy with .p-attr elements associated Materialize tooltips + for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr.tooltipped')) { + M.Tooltip.getInstance(pAttrElement)?.destroy(); + } + // Set basic styling on .p-attr elements + for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) { + pAttrElement.setAttribute('class', 'p-attr'); + } + // Set basic styling on .s-attr[data-type="ent"] elements + for (let entElement of this.elements.corpus.querySelectorAll('.s-attr[data-type="ent"]')) { + entElement.querySelector('.s-attr[data-type="ent_type"]').classList.add('hide'); + entElement.removeAttribute('style'); + entElement.setAttribute('class', 's-attr'); + } + } + if (this.settings.textStyle >= 1) { + // Set advanced styling on .s-attr[data-type="ent"] elements + for (let entElement of this.elements.corpus.querySelectorAll('.s-attr[data-type="ent"]')) { + let ent = this.data.corpus.p.lookups.ent_lookup[entElement.dataset.id]; + entElement.classList.add('chip'); + entElement.style.backgroundColor = CorpusAnalysisApp.entitiyColors[ent.type]; + entElement.querySelector('.s-attr[data-type="ent_type"]').classList.remove('hide'); + } + } + if (this.settings.textStyle >= 2) { + // Set advanced styling on .p-attr elements + for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) { + pAttrElement.classList.add('chip', 'hoverable', 'tooltipped'); + let cpos = pAttrElement.dataset.cpos; + let pAttr = this.data.corpus.p.lookups.cpos_lookup[cpos]; + let positionalPropertiesHTML = ` +

    + Positional properties
    + Token: ${cpos} + `.trim(); + let structuralPropertiesHTML = ` +

    + Structural properties + `.trim(); + for (let [property, propertyValue] of Object.entries(pAttr)) { + if (['lemma', 'ner', 'pos', 'simple_pos', 'word'].includes(property)) { + if (propertyValue === 'None') {continue;} + positionalPropertiesHTML += `
    subdirectory_arrow_right${property}: ${propertyValue}`; + } else { + structuralPropertiesHTML += `
    ${property}: ${propertyValue}`; + if (!(`${property}_lookup` in this.data.corpus.p.lookups)) {continue;} + for (let [subproperty, subpropertyValue] of Object.entries(this.data.corpus.p.lookups[`${property}_lookup`][propertyValue])) { + if (subpropertyValue === 'NULL') {continue;} + structuralPropertiesHTML += `
    subdirectory_arrow_right${subproperty}: ${subpropertyValue}` + } + } + } + positionalPropertiesHTML += '

    '; + structuralPropertiesHTML += '

    '; + M.Tooltip.init( + pAttrElement, + {html: positionalPropertiesHTML + structuralPropertiesHTML} + ); + } + } + } + + setTokenRepresentation() { + for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) { + let pAttr = this.data.corpus.p.lookups.cpos_lookup[pAttrElement.dataset.cpos]; + pAttrElement.innerText = pAttr[this.settings.tokenRepresentation]; + } + } +} diff --git a/app/static/js/nopaque/RessourceDisplays/CorpusDisplay.js b/app/static/js/nopaque/RessourceDisplays/CorpusDisplay.js index 827fa02b..d6038ef6 100644 --- a/app/static/js/nopaque/RessourceDisplays/CorpusDisplay.js +++ b/app/static/js/nopaque/RessourceDisplays/CorpusDisplay.js @@ -13,7 +13,7 @@ class CorpusDisplay extends RessourceDisplay { this.setLastEditedDate(this.user.data.corpora[this.corpusId].last_edited_date); this.setStatus(this.user.data.corpora[this.corpusId].status); this.setTitle(this.user.data.corpora[this.corpusId].title); - this.setTokenRatio(this.user.data.corpora[this.corpusId].current_nr_of_tokens, this.user.data.corpora[this.corpusId].max_nr_of_tokens); + this.setTokenRatio(this.user.data.corpora[this.corpusId].num_tokens, this.user.data.corpora[this.corpusId].max_num_tokens); } patch(patch) { @@ -53,9 +53,8 @@ class CorpusDisplay extends RessourceDisplay { for (let element of this.displayElement.querySelectorAll('.corpus-title')) {this.setElement(element, title);} } - setTokenRatio(currentNrOfTokens, maxNrOfTokens) { - let tokenRatio = `${currentNrOfTokens}/${maxNrOfTokens}`; - for (let element of this.displayElement.querySelectorAll('.corpus-token-ratio')) {this.setElement(element, tokenRatio);} + setTokenRatio(numTokens, maxNumTokens) { + for (let element of this.displayElement.querySelectorAll('.corpus-token-ratio')) {this.setElement(element, `${numTokens}/${maxNumTokens}`);} } setDescription(description) { diff --git a/app/templates/corpora/analyse_corpus.concordance.html.j2 b/app/templates/corpora/analyse_corpus.concordance.html.j2 new file mode 100644 index 00000000..4a9cc139 --- /dev/null +++ b/app/templates/corpora/analyse_corpus.concordance.html.j2 @@ -0,0 +1,102 @@ +
    +
    +
    +
    +
    +
    +
    + search + + + +
    +
    + arrow_forward + + +
    +
    +
    +
    + short_text + + +
    +
    + format_list_numbered + + +
    +
    + format_shapes + + +
    +
    + format_quote + + +
    +
    +
    +
    +

     

    + build Query builder + +
    +
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    +
    +
    +

    +
    +
    + + + + + + + + + + + + +
    SourceLeft contextKWICRight Context
    +
      +
      +
      +
      +
      diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2 index 7666f550..ccf682e0 100644 --- a/app/templates/corpora/analyse_corpus.html.j2 +++ b/app/templates/corpora/analyse_corpus.html.j2 @@ -1,337 +1,73 @@ {% extends "base.html.j2" %} -{% from "corpora/_breadcrumbs.html.j2" import breadcrumbs with context %} {% import "materialize/wtf.html.j2" as wtf %} -{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %} +{% block main_attribs %} class="service-scheme" data-service="corpus-analysis" id="corpus-analysis-app-container"{% endblock main_attribs %} {% block page_content %} -
      + + +
      -
      -
      - -
      -
      -
      -
      - search - {{ query_form.query() }} - {{ query_form.query.label }} - - - help - - CQP query language tutorial - - -
      -
      - build Query builder - {{ wtf.render_field(query_form.submit, material_icon='send') }} -
      -
      -
      -
      +

      {{ title }}

      +
      + +
      +
      +
      + list_altConcordance +

      Query your corpus with the CQP query language utilizing a KWIC view.

      - -
      -
      -
      -
      - {# Importing menus for query settings, export etc. #} - {% include 'corpora/interactions/infos.html.j2' %} - {% include 'corpora/interactions/export.html.j2' %} - {% include 'corpora/interactions/create.html.j2' %} - {% include 'corpora/interactions/display.html.j2' %} -
      - {% include 'tables/query_results.html.j2' %} +
      +
      +
      + chrome_reader_modeReader +

      Inspect your corpus in detail with a full text view, including annotations.

      - - -{% include 'corpora/interactions/scroll_to_top.html.j2' %} - - -{% include 'modals/query_builder.html.j2' %} -{% include 'modals/show_metadata.html.j2' %} -{% include 'modals/analysis_init.html.j2' %} -{% include 'modals/export_query_results.html.j2' %} -{% include 'modals/context_modal.html.j2' %} -{% include 'modals/show_corpus_files.html.j2' %} +{% include "corpora/analyse_corpus.reader.html.j2" %} +{% include "corpora/analyse_corpus.concordance.html.j2" %} {% endblock page_content %} +{% block modals %} +{{ super() }} + +{% endblock modals %} + {% block scripts %} {{ super() }} - - +{% endassets %} + -{% endblock %} +{% endblock scripts %} diff --git a/app/templates/corpora/analyse_corpus.reader.html.j2 b/app/templates/corpora/analyse_corpus.reader.html.j2 new file mode 100644 index 00000000..99cdf56e --- /dev/null +++ b/app/templates/corpora/analyse_corpus.reader.html.j2 @@ -0,0 +1,67 @@ +
      +
      +
      +
      +
      +
      +
      +
      +
      + format_list_numbered + + +
      +
      + format_shapes + + +
      +
      + format_quote + + +
      +
      + +
      +
      +
      +
      +

       

      + +
      +
      +
      +
      +
      +
      + +
      +
      +
      +
      +
      +
      + + + + +
      +
        +
        +
        +
        +
        diff --git a/app/templates/corpora/interactions/analysis.html.j2 b/app/templates/corpora/interactions/analysis.html.j2 deleted file mode 100644 index 735ef951..00000000 --- a/app/templates/corpora/interactions/analysis.html.j2 +++ /dev/null @@ -1,30 +0,0 @@ - - -
        -
        Analysis
        -
        -
        -
        - -
        -
        - -
        -
        -
        \ No newline at end of file diff --git a/app/templates/corpora/interactions/cite.html.j2 b/app/templates/corpora/interactions/cite.html.j2 deleted file mode 100644 index 45767854..00000000 --- a/app/templates/corpora/interactions/cite.html.j2 +++ /dev/null @@ -1,30 +0,0 @@ - - -
        -
        Cite Nopaque
        -
        -
        -
        - -
        -
        - -
        -
        -
        \ No newline at end of file diff --git a/app/templates/corpora/interactions/create.html.j2 b/app/templates/corpora/interactions/create.html.j2 deleted file mode 100644 index 4d4172b0..00000000 --- a/app/templates/corpora/interactions/create.html.j2 +++ /dev/null @@ -1,24 +0,0 @@ - - -
        -
        Create
        -
        -
        -
        -

        Add matches to Sub-Results with the - add - button in the list or inspect view. -

        -
        -
        -
        -

        matches added for sub-results:

        - -
        -
        -
        -
        \ No newline at end of file diff --git a/app/templates/corpora/interactions/display.html.j2 b/app/templates/corpora/interactions/display.html.j2 deleted file mode 100644 index 8eb96e8e..00000000 --- a/app/templates/corpora/interactions/display.html.j2 +++ /dev/null @@ -1,30 +0,0 @@ -{% import "materialize/wtf.html.j2" as wtf %} - - -
        -
        Display
        -
        -
        -
        -
        - {{ wtf.render_field(display_options_form.results_per_page, - material_icon='format_list_numbered') }} - {{ wtf.render_field(display_options_form.result_context, - material_icon='short_text') }} -
        -
        - {{ display_options_form.expert_mode.label.text }} -
        -
        -
        - -
        -
        -
        -
        -
        -
        -
        diff --git a/app/templates/corpora/interactions/export.html.j2 b/app/templates/corpora/interactions/export.html.j2 deleted file mode 100644 index c6102329..00000000 --- a/app/templates/corpora/interactions/export.html.j2 +++ /dev/null @@ -1,79 +0,0 @@ - - -
        -
        Export
        -
        -
        -
        -
        - Full context - - info_outline - -
        -
        -
        - -
        -
        -
        -
        - - -
        -
        -
        -
        -
        -
        -
        - - -
        -
        -
        -
        -
        -
        -
        -
        \ No newline at end of file diff --git a/app/templates/corpora/interactions/infos.html.j2 b/app/templates/corpora/interactions/infos.html.j2 deleted file mode 100644 index 83959864..00000000 --- a/app/templates/corpora/interactions/infos.html.j2 +++ /dev/null @@ -1,59 +0,0 @@ - - -
        -
        Infos
        -
        -
        -
        - - dvr - / - - matches loaded - -
        -
        -
        -
        -
        -
        -
        - -
        -
        - -
        -
        -

        - help - Server is sending your results. - Functions like "Export Results" and "Match Inspect" will be - available after all matches have been loaded. -

        -
        -
        -
        \ No newline at end of file diff --git a/app/templates/corpora/interactions/scroll_to_top.html.j2 b/app/templates/corpora/interactions/scroll_to_top.html.j2 deleted file mode 100644 index 5d0ea728..00000000 --- a/app/templates/corpora/interactions/scroll_to_top.html.j2 +++ /dev/null @@ -1,5 +0,0 @@ - \ No newline at end of file diff --git a/app/templates/corpora/query_results/_breadcrumbs.html.j2 b/app/templates/corpora/query_results/_breadcrumbs.html.j2 deleted file mode 100644 index 9fb0464a..00000000 --- a/app/templates/corpora/query_results/_breadcrumbs.html.j2 +++ /dev/null @@ -1,12 +0,0 @@ -{% set breadcrumbs %} -
      • navigate_next
      • -
      • My query results
      • -
      • navigate_next
      • -{% if request.path == url_for('.add_query_result') %} -
      • {{ title }}
      • -{% elif request.path == url_for('.query_result', query_result_id=query_result.id) %} -
      • {{ query_result.title }}
      • -{% elif request.path == url_for('.inspect_query_result', query_result_id=query_result.id) %} -
      • {{ title }}
      • -{% endif %} -{% endset %} diff --git a/app/templates/corpora/query_results/add_query_result.html.j2 b/app/templates/corpora/query_results/add_query_result.html.j2 deleted file mode 100644 index 61933816..00000000 --- a/app/templates/corpora/query_results/add_query_result.html.j2 +++ /dev/null @@ -1,56 +0,0 @@ -{% extends "base.html.j2" %} -{% from "corpora/query_results/_breadcrumbs.html.j2" import breadcrumbs with context %} -{% import "materialize/wtf.html.j2" as wtf %} - -{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %} - -{% block page_content %} -
        -
        -
        -

        {{ title }}

        -
        - -
        -

        Fill out the following form to upload and view your exported query data from the corpus analsis.

        - arrow_backBack to dashboard -
        - -
        -
        -
        -
        - {{ form.hidden_tag() }} -
        -
        - {{ wtf.render_field(form.title, data_length='32', material_icon='title') }} -
        -
        - {{ wtf.render_field(form.description, data_length='255', material_icon='description') }} -
        -
        - {{ wtf.render_field(form.file, accept='.json', placeholder='Choose your .json file') }} -
        -
        -
        -
        - {{ wtf.render_field(form.submit, material_icon='send') }} -
        -
        -
        -
        -
        -
        - - -{% endblock %} diff --git a/app/templates/corpora/query_results/inspect.html.j2 b/app/templates/corpora/query_results/inspect.html.j2 deleted file mode 100644 index afc00f3b..00000000 --- a/app/templates/corpora/query_results/inspect.html.j2 +++ /dev/null @@ -1,241 +0,0 @@ -{% extends "base.html.j2" %} -{% from "corpora/query_results/_breadcrumbs.html.j2" import breadcrumbs with context %} - -{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %} - -{% block page_content %} -
        -
        -
        -
        - -
        -
        -
        -
        - search - - -
        -
        -
        -
        -
        -
        -
        -
        -
        -
        - - -
        -
        -
        -
        - {% include 'corpora/interactions/infos.html.j2' %} - {% include 'corpora/interactions/display.html.j2' %} - {% include 'corpora/interactions/analysis.html.j2' %} - {% include 'corpora/interactions/cite.html.j2' %} -
        - {# Hide those because they are not needed when inspecting results. - But some of their elements are being asked for by the client. #} - {% include 'corpora/interactions/export.html.j2' %} - {% include 'corpora/interactions/create.html.j2' %} -
        -
        - {% include 'tables/query_results.html.j2' %} -
        -
        -
        -
        - -{# Import modals #} -{% include 'modals/show_metadata.html.j2' %} -{% include 'modals/show_corpus_files.html.j2' %} -{% include 'modals/context_modal.html.j2' %} - - -{% include 'corpora/interactions/scroll_to_top.html.j2' %} -{% endblock page_content %} - - -{% block scripts %} -{{ super() }} - -{% endblock %} diff --git a/app/templates/corpora/query_results/query_result.html.j2 b/app/templates/corpora/query_results/query_result.html.j2 deleted file mode 100644 index 51377129..00000000 --- a/app/templates/corpora/query_results/query_result.html.j2 +++ /dev/null @@ -1,131 +0,0 @@ -{% extends "base.html.j2" %} -{% from "corpora/query_results/_breadcrumbs.html.j2" import breadcrumbs with context %} - -{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %} - -{% block page_content %} -
        -
        -
        -

        {{ title }}

        -
        - -
        -

        Below the metadata for the results from the Corpus - {{ query_result.query_metadata.corpus_name }} generated with the query - {{ query_result.query_metadata.query }} are shown. -

        -
        - -
        -
        - -
        - - - - - - - - - {% for pair in query_result.query_metadata|dictsort %} - - - {% if pair[0] == 'corpus_all_texts' - or pair[0] == 'text_lookup' %} - - {% else %} - - {% endif %} - - {% endfor %} - -
        Metadata DescriptionValue
        {{ pair[0] }} - - {% for key, value in pair[1].items() %} - - - - {% endfor %} -
        - {{ value['title'] }} written - by {{ value['author'] }} - in {{ value['publishing_year'] }} - More - - info_outline - - -
        -
        {{ pair[1] }}
        -
        - -
        -
        -
        -
        - - -{% endblock page_content %} - -{% block scripts %} -{{ super() }} - -{% endblock %} diff --git a/app/templates/main/dashboard.html.j2 b/app/templates/main/dashboard.html.j2 index f46b1101..b7ece533 100644 --- a/app/templates/main/dashboard.html.j2 +++ b/app/templates/main/dashboard.html.j2 @@ -82,7 +82,7 @@
          diff --git a/app/templates/materialize/wtf.html.j2 b/app/templates/materialize/wtf.html.j2 index 428a4ee8..730fd337 100644 --- a/app/templates/materialize/wtf.html.j2 +++ b/app/templates/materialize/wtf.html.j2 @@ -31,7 +31,7 @@ {% endif %} {% for error in field.errors %} - {{ error }} + {{ error }} {% endfor %}
          {% endmacro %} @@ -47,7 +47,7 @@
          {% for error in field.errors %} - {{ error }} + {{ error }} {% endfor %}
          {% endmacro %} @@ -68,7 +68,7 @@ {{ field.label }} {% endif %} {% for error in field.errors %} - {{ error }} + {{ error }} {% endfor %} {% endmacro %} diff --git a/app/templates/tables/query_results.html.j2 b/app/templates/tables/query_results.html.j2 deleted file mode 100644 index 1b7503dc..00000000 --- a/app/templates/tables/query_results.html.j2 +++ /dev/null @@ -1,21 +0,0 @@ - - -
          -
            - - - - - - - - - - - - - -
            Nr.TitleLeft contextMatch{# Actions #}Right Context
            -
              -
              \ No newline at end of file diff --git a/migrations/versions/a4b3cf4ab098_.py b/migrations/versions/a4b3cf4ab098_.py new file mode 100644 index 00000000..3876e3ef --- /dev/null +++ b/migrations/versions/a4b3cf4ab098_.py @@ -0,0 +1,30 @@ +"""empty message + +Revision ID: a4b3cf4ab098 +Revises: c384d7b3268a +Create Date: 2021-09-23 13:14:16.227784 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'a4b3cf4ab098' +down_revision = 'c384d7b3268a' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('corpora', sa.Column('num_tokens', sa.Integer(), nullable=True)) + op.drop_column('corpora', 'current_nr_of_tokens') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('corpora', sa.Column('current_nr_of_tokens', sa.INTEGER(), autoincrement=False, nullable=True)) + op.drop_column('corpora', 'num_tokens') + # ### end Alembic commands ### diff --git a/migrations/versions/be010d5d708d_.py b/migrations/versions/be010d5d708d_.py new file mode 100644 index 00000000..4a2d93b8 --- /dev/null +++ b/migrations/versions/be010d5d708d_.py @@ -0,0 +1,28 @@ +"""empty message + +Revision ID: be010d5d708d +Revises: a4b3cf4ab098 +Create Date: 2021-09-24 09:34:54.173653 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'be010d5d708d' +down_revision = 'a4b3cf4ab098' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('corpora', sa.Column('num_analysis_sessions', sa.Integer(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('corpora', 'num_analysis_sessions') + # ### end Alembic commands ###