diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..a374dbc2
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Bielefeld University - CRC 1288 - INF
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/app/cli.py b/app/cli.py
index f077c645..17720f58 100644
--- a/app/cli.py
+++ b/app/cli.py
@@ -1,4 +1,5 @@
-from .models import Role
+from . import db
+from .models import Corpus, Role
from flask_migrate import upgrade
@@ -19,6 +20,9 @@ def register(app):
@daemon.command('run')
def run_daemon():
"""Run daemon"""
+ for corpus in Corpus.query.filter(Corpus.num_analysis_sessions > 0):
+ corpus.num_analysis_sessions = 0
+ db.session.commit()
from app.daemon import Daemon
daemon = Daemon()
daemon.run()
diff --git a/app/corpora/__init__.py b/app/corpora/__init__.py
index af44719d..83cecec5 100644
--- a/app/corpora/__init__.py
+++ b/app/corpora/__init__.py
@@ -2,4 +2,4 @@ from flask import Blueprint
bp = Blueprint('corpora', __name__)
-from . import events, routes # noqa
+from . import cqi_over_socketio, routes # noqa
diff --git a/app/corpora/cqi_over_socketio/__init__.py b/app/corpora/cqi_over_socketio/__init__.py
new file mode 100644
index 00000000..2cce7834
--- /dev/null
+++ b/app/corpora/cqi_over_socketio/__init__.py
@@ -0,0 +1,108 @@
+from app import db, socketio
+from app.decorators import socketio_login_required
+from app.models import Corpus
+from flask import session
+from flask_login import current_user
+from flask_socketio import ConnectionRefusedError
+from threading import Lock
+import cqi
+
+
+'''
+This package tunnels the Corpus Query interface (CQi) protocol through
+Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
+
+This module only handles the SIO connect/disconnect, which handles the setup
+and teardown of necessary ressources for later use. Each CQi function has a
+corresponding SIO event. The event handlers are spread across the different
+modules within this package.
+
+Basic concept:
+1. A client connects to the SIO namespace and provides the id of a corpus to be
+ analysed.
+ 1.1 The analysis session counter of the corpus is incremented.
+ 1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
+ 1.3 Wait until the CQP server is running.
+ 1.4 Connect the CQiClient to the server.
+ 1.5 Save the CQiClient and the Lock in the session for subsequential use.
+2. A client emits an event and may provide a single json object with necessary
+ arguments for the targeted CQi function.
+3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
+ - The event handler function defines all arguments. Hence the client
+ is sent as a single json object, the decorator decomposes it to fit
+ the functions signature. This also includes type checking and proper
+ use of the lock (acquire/release) mechanism.
+4. Wait for more events
+5. The client disconnects from the SIO namespace
+ 1.1 The analysis session counter of the corpus is decremented.
+ 1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
+'''
+
+
+NAMESPACE = '/corpora/corpus/corpus_analysis'
+
+
+# Import all CQi over Socket.IO event handlers
+from .cqi_corpora_corpus_subcorpora import * # noqa
+from .cqi_corpora_corpus_structural_attributes import * # noqa
+from .cqi_corpora_corpus_positional_attributes import * # noqa
+from .cqi_corpora_corpus_alignment_attributes import * # noqa
+from .cqi_corpora_corpus import * # noqa
+from .cqi_corpora import * # noqa
+from .cqi import * # noqa
+
+
+@socketio.on('connect', namespace=NAMESPACE)
+@socketio_login_required
+def connect(auth):
+ # the auth variable is used in a hacky way. It contains the corpus id for
+ # which a corpus analysis session should be started.
+ corpus_id = auth['corpus_id']
+ corpus = Corpus.query.get(corpus_id)
+ if corpus is None:
+ # return {'code': 404, 'msg': 'Not Found'}
+ raise ConnectionRefusedError('Not Found')
+ if not (corpus.creator == current_user or current_user.is_administrator()):
+ # return {'code': 403, 'msg': 'Forbidden'}
+ raise ConnectionRefusedError('Forbidden')
+ if corpus.status not in ['prepared', 'start analysis', 'analysing', 'stop analysis']:
+ # return {'code': 424, 'msg': 'Failed Dependency'}
+ raise ConnectionRefusedError('Failed Dependency')
+ if corpus.num_analysis_sessions is None:
+ corpus.num_analysis_sessions = 0
+ db.session.commit()
+ corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
+ db.session.commit()
+ retry_counter = 20
+ while corpus.status != 'analysing':
+ if retry_counter == 0:
+ corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
+ db.session.commit()
+ return {'code': 408, 'msg': 'Request Timeout'}
+ socketio.sleep(3)
+ retry_counter -= 1
+ db.session.refresh(corpus)
+ cqi_client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
+ session['d'] = {
+ 'corpus_id': corpus_id,
+ 'cqi_client': cqi_client,
+ 'cqi_client_lock': Lock(),
+ }
+ # return {'code': 200, 'msg': 'OK'}
+
+
+@socketio.on('disconnect', namespace=NAMESPACE)
+def disconnect():
+ session['d']['cqi_client_lock'].acquire()
+ try:
+ session['d']['cqi_client'].disconnect()
+ except cqi.errors.CQiException:
+ pass
+ except BrokenPipeError:
+ pass
+ session['d']['cqi_client_lock'].release()
+ corpus = Corpus.query.get(session['d']['corpus_id'])
+ corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
+ db.session.commit()
+ session.pop('d')
+ # return {'code': 200, 'msg': 'OK'}
diff --git a/app/corpora/cqi_over_socketio/cqi.py b/app/corpora/cqi_over_socketio/cqi.py
new file mode 100644
index 00000000..f6edb5fe
--- /dev/null
+++ b/app/corpora/cqi_over_socketio/cqi.py
@@ -0,0 +1,43 @@
+from app import socketio
+from app.decorators import socketio_login_required
+from socket import gaierror
+from . import NAMESPACE as ns
+from .utils import cqi_over_socketio
+import cqi
+
+
+@socketio.on('cqi.connect', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_connect(cqi_client: cqi.CQiClient):
+ try:
+ cqi_status = cqi_client.connect()
+ except gaierror as e:
+ return {
+ 'code': 500,
+ 'msg': 'Internal Server Error',
+ 'payload': {'code': e.args[0], 'desc': e.args[1]}
+ }
+ payload = {'code': cqi_status,
+ 'msg': cqi.api.specification.lookup[cqi_status]}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.disconnect', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_disconnect(cqi_client: cqi.CQiClient):
+ cqi_status = cqi_client.disconnect()
+ payload = {'code': cqi_status,
+ 'msg': cqi.api.specification.lookup[cqi_status]}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.ping', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_ping(cqi_client: cqi.CQiClient):
+ cqi_status = cqi_client.ping()
+ payload = {'code': cqi_status,
+ 'msg': cqi.api.specification.lookup[cqi_status]}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
diff --git a/app/corpora/cqi_over_socketio/cqi_corpora.py b/app/corpora/cqi_over_socketio/cqi_corpora.py
new file mode 100644
index 00000000..d0f82e96
--- /dev/null
+++ b/app/corpora/cqi_over_socketio/cqi_corpora.py
@@ -0,0 +1,22 @@
+from app import socketio
+from app.decorators import socketio_login_required
+from . import NAMESPACE as ns
+from .utils import cqi_over_socketio
+import cqi
+
+
+@socketio.on('cqi.corpora.get', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ payload = {**cqi_corpus.attrs}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.corpora.list', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_list(cqi_client: cqi.CQiClient):
+ payload = [{**x.attrs} for x in cqi_client.corpora.list()]
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py
new file mode 100644
index 00000000..bfe8437c
--- /dev/null
+++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py
@@ -0,0 +1,85 @@
+from app import db, socketio
+from app.decorators import socketio_login_required
+from app.models import Corpus
+from flask import session
+from . import NAMESPACE as ns
+from .utils import cqi_over_socketio, lookups_by_cpos
+import cqi
+import math
+
+
+@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ cqi_status = cqi_corpus.drop()
+ payload = {'code': cqi_status,
+ 'msg': cqi.api.specification.lookup[cqi_status]}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.corpora.corpus.query', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ cqi_status = cqi_corpus.query(subcorpus_name, query)
+ payload = {'code': cqi_status,
+ 'msg': cqi.api.specification.lookup[cqi_status]}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+###############################################################################
+# nopaque specific CQi extensions #
+###############################################################################
+@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
+ corpus = Corpus.query.get(session['d']['corpus_id'])
+ corpus.num_tokens = cqi_client.corpora.get('CORPUS').attrs['size']
+ db.session.commit()
+
+
+@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ # Sanity checks
+ if (
+ per_page < 1
+ or page < 1
+ or (
+ cqi_corpus.attrs['size'] > 0
+ and page > math.ceil(cqi_corpus.attrs['size'] / per_page)
+ )
+ ):
+ return {'code': 416, 'msg': 'Range Not Satisfiable'}
+ first_cpos = (page - 1) * per_page
+ last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page)
+ cpos_list = [*range(first_cpos, last_cpos)]
+ lookups = lookups_by_cpos(cqi_corpus, cpos_list)
+ payload = {}
+ # the items for the current page
+ payload['items'] = [cpos_list]
+ # the lookups for the items
+ payload['lookups'] = lookups
+ # the total number of items matching the query
+ payload['total'] = cqi_corpus.attrs['size']
+ # the number of items to be displayed on a page.
+ payload['per_page'] = per_page
+ # The total number of pages
+ payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
+ # the current page number (1 indexed)
+ payload['page'] = page if payload['pages'] > 0 else None
+ # True if a previous page exists
+ payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
+ # True if a next page exists.
+ payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
+ # Number of the previous page.
+ payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
+ # Number of the next page
+ payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py
new file mode 100644
index 00000000..95be6771
--- /dev/null
+++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py
@@ -0,0 +1,24 @@
+from app import socketio
+from app.decorators import socketio_login_required
+from . import NAMESPACE as ns
+from .utils import cqi_over_socketio
+import cqi
+
+
+@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
+ payload = {**cqi_alignment_attribute.attrs}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py
new file mode 100644
index 00000000..e8c11677
--- /dev/null
+++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py
@@ -0,0 +1,24 @@
+from app import socketio
+from app.decorators import socketio_login_required
+from . import NAMESPACE as ns
+from .utils import cqi_over_socketio
+import cqi
+
+
+@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
+ payload = {**cqi_positional_attribute.attrs}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py
new file mode 100644
index 00000000..2b1559f6
--- /dev/null
+++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py
@@ -0,0 +1,24 @@
+from app import socketio
+from app.decorators import socketio_login_required
+from . import NAMESPACE as ns
+from .utils import cqi_over_socketio
+import cqi
+
+
+@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
+ payload = {**cqi_structural_attribute.attrs}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py
new file mode 100644
index 00000000..419be16b
--- /dev/null
+++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py
@@ -0,0 +1,123 @@
+from app import socketio
+from app.decorators import socketio_login_required
+from app.models import Corpus
+from flask import session
+from . import NAMESPACE as ns
+from .utils import cqi_over_socketio, export_subcorpus
+import cqi
+import json
+import math
+import os
+
+
+@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
+ payload = {**cqi_subcorpus.attrs}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
+ cqi_status = cqi_subcorpus.drop()
+ payload = {'code': cqi_status,
+ 'msg': cqi.api.specification.lookup[cqi_status]}
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient):
+ return {'code': 501, 'msg': 'Not Implemented'}
+
+
+@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient):
+ return {'code': 501, 'msg': 'Not Implemented'}
+
+
+@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient):
+ return {'code': 501, 'msg': 'Not Implemented'}
+
+
+###############################################################################
+# nopaque specific CQi extensions #
+###############################################################################
+@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
+ # Sanity checks
+ if (
+ per_page < 1
+ or page < 1
+ or (
+ cqi_subcorpus.attrs['size'] > 0
+ and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
+ )
+ ):
+ return {'code': 416, 'msg': 'Range Not Satisfiable'}
+ offset = (page - 1) * per_page
+ cutoff = per_page
+ cqi_results_export = export_subcorpus(
+ cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
+ payload = {}
+ # the items for the current page
+ payload['items'] = cqi_results_export.pop('matches')
+ # the lookups for the items
+ payload['lookups'] = cqi_results_export
+ # the total number of items matching the query
+ payload['total'] = cqi_subcorpus.attrs['size']
+ # the number of items to be displayed on a page.
+ payload['per_page'] = per_page
+ # The total number of pages
+ payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
+ # the current page number (1 indexed)
+ payload['page'] = page if payload['pages'] > 0 else None
+ # True if a previous page exists
+ payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
+ # True if a next page exists.
+ payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
+ # Number of the previous page.
+ payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
+ # Number of the next page
+ payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
+
+
+@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
+@socketio_login_required
+@cqi_over_socketio
+def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
+ cqi_corpus = cqi_client.corpora.get(corpus_name)
+ cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
+ cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
+ corpus = Corpus.query.get(session['d']['corpus_id'])
+ file_path = os.path.join(corpus.path, f'{subcorpus_name}.json')
+ with open(file_path, 'w') as file:
+ json.dump(cqi_subcorpus_export, file)
+ return {'code': 200, 'msg': 'OK'}
diff --git a/app/corpora/cqi_over_socketio/utils.py b/app/corpora/cqi_over_socketio/utils.py
new file mode 100644
index 00000000..04b9f1d0
--- /dev/null
+++ b/app/corpora/cqi_over_socketio/utils.py
@@ -0,0 +1,129 @@
+from flask import session
+from functools import wraps
+from inspect import signature
+import cqi
+
+
+def cqi_over_socketio(f):
+ @wraps(f)
+ def wrapped(*args):
+ if 'd' not in session:
+ return {'code': 424, 'msg': 'Failed Dependency'}
+ f_args = {}
+ # Check for missing args and if all provided args are of the right type
+ for param in signature(f).parameters.values():
+ if param.annotation == cqi.CQiClient:
+ f_args[param.name] = session['d']['cqi_client']
+ continue
+ if param.default is param.empty:
+ # args
+ if param.name not in args[0]:
+ return {'code': 400, 'msg': 'Bad Request'}
+ arg = args[0][param.name]
+ if type(arg) is not param.annotation:
+ return {'code': 400, 'msg': 'Bad Request'}
+ f_args[param.name] = arg
+ else:
+ # kwargs
+ if param.name not in args[0]:
+ continue
+ arg = args[0][param.name]
+ if type(arg) is not param.annotation:
+ return {'code': 400, 'msg': 'Bad Request'}
+ f_args[param.name] = arg
+ session['d']['cqi_client_lock'].acquire()
+ try:
+ return_value = f(**f_args)
+ except BrokenPipeError:
+ pass
+ except cqi.errors.CQiException as e:
+ return_value = {
+ 'code': 500,
+ 'msg': 'Internal Server Error',
+ 'payload': {
+ 'code': e.code,
+ 'desc': e.description,
+ 'msg': e.name
+ }
+ }
+ finally:
+ session['d']['cqi_client_lock'].release()
+ return return_value
+ return wrapped
+
+
+def lookups_by_cpos(corpus, cpos_list):
+ lookups = {}
+ lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
+ for attr in corpus.positional_attributes.list():
+ cpos_attr_values = attr.values_by_cpos(cpos_list)
+ for i, cpos in enumerate(cpos_list):
+ lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
+ cpos_attr_values[i]
+ for attr in corpus.structural_attributes.list():
+ # We only want to iterate over non subattributes, identifiable by
+ # attr.attrs['has_values']==False
+ if attr.attrs['has_values']:
+ continue
+ cpos_attr_ids = attr.ids_by_cpos(cpos_list)
+ for i, cpos in enumerate(cpos_list):
+ if cpos_attr_ids[i] == -1:
+ continue
+ lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
+ occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
+ if not occured_attr_ids:
+ continue
+ subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
+ if not subattrs:
+ continue
+ lookup_name = f'{attr.attrs["name"]}_lookup'
+ lookups[lookup_name] = {}
+ for attr_id in occured_attr_ids:
+ lookups[lookup_name][attr_id] = {}
+ for subattr in subattrs:
+ subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
+ for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
+ lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
+ return lookups
+
+
+def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
+ if subcorpus.attrs['size'] == 0:
+ return {"matches": []}
+ first_match = max(0, offset)
+ last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
+ match_boundaries = zip(
+ subcorpus.dump(
+ subcorpus.attrs['fields']['match'], first_match, last_match),
+ subcorpus.dump(
+ subcorpus.attrs['fields']['matchend'], first_match, last_match)
+ )
+ cpos_set = set()
+ matches = []
+ match_num = offset + 1
+ for match_start, match_end in match_boundaries:
+ c = (match_start, match_end)
+ if match_start == 0 or context == 0:
+ lc = None
+ cpos_list_lbound = match_start
+ else:
+ lc_lbound = max(0, (match_start - 1 - context))
+ lc_rbound = match_start - 1
+ lc = (lc_lbound, lc_rbound)
+ cpos_list_lbound = lc_lbound
+ if (match_end == (subcorpus.collection.corpus.attrs['size'] - 1)
+ or context == 0):
+ rc = None
+ cpos_list_rbound = match_end
+ else:
+ rc_lbound = match_end + 1
+ rc_rbound = min(match_end + 1 + context,
+ subcorpus.collection.corpus.attrs['size'] - 1)
+ rc = (rc_lbound, rc_rbound)
+ cpos_list_rbound = rc_rbound
+ match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
+ matches.append(match)
+ cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
+ match_num += 1
+ lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
+ return {'matches': matches, **lookups}
diff --git a/app/corpora/events.py b/app/corpora/events.py
deleted file mode 100644
index 4c8bd606..00000000
--- a/app/corpora/events.py
+++ /dev/null
@@ -1,304 +0,0 @@
-from datetime import datetime
-from flask import current_app, request
-from flask_login import current_user
-from socket import gaierror
-from werkzeug.utils import secure_filename
-from .. import db, socketio
-from ..decorators import socketio_login_required
-from ..events.socketio import sessions as socketio_sessions
-from ..models import Corpus
-import cqi
-import math
-import os
-import shutil
-
-
-'''
-' A dictionary containing lists of, with corpus ids associated, Socket.IO
-' session ids (sid). {: [, ...], ...}
-'''
-corpus_analysis_sessions = {}
-'''
-' A dictionary containing Socket.IO session id - CQi client pairs.
-' {: CQiClient, ...}
-'''
-corpus_analysis_clients = {}
-
-
-@socketio.on('corpus_analysis_init')
-@socketio_login_required
-def init_corpus_analysis(corpus_id):
- corpus = Corpus.query.get(corpus_id)
- if corpus is None:
- response = {'code': 404, 'desc': None, 'msg': 'Not Found'}
- socketio.emit('corpus_analysis_init', response, room=request.sid)
- return
- if not (corpus.creator == current_user or current_user.is_administrator()):
- response = {'code': 403, 'desc': None, 'msg': 'Forbidden'}
- socketio.emit('corpus_analysis_init', response, room=request.sid)
- return
- if corpus.status not in ['prepared', 'start analysis', 'analysing']:
- response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
- socketio.emit('corpus_analysis_init', response, room=request.sid)
- return
- if corpus.status == 'prepared':
- corpus.status = 'start analysis'
- db.session.commit()
- socketio.start_background_task(corpus_analysis_session_handler,
- current_app._get_current_object(),
- corpus_id, current_user.id, request.sid)
-
-
-def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
- with app.app_context():
- ''' Setup analysis session '''
- corpus = Corpus.query.get(corpus_id)
- retry_counter = 15
- while corpus.status != 'analysing':
- db.session.refresh(corpus)
- retry_counter -= 1
- if retry_counter == 0:
- response = {'code': 408, 'desc': None, 'msg': 'Request Timeout'} # noqa
- socketio.emit('corpus_analysis_init', response, room=session_id) # noqa
- corpus.status = 'stop analysis'
- db.session.commit()
- return
- socketio.sleep(3)
- client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
- try:
- connect_status = client.connect()
- payload = {'code': connect_status, 'msg': cqi.api.specification.lookup[connect_status]} # noqa
- except cqi.errors.CQiException as e:
- handle_cqi_exception('corpus_analysis_init', e, session_id)
- corpus.status = 'stop analysis'
- db.session.commit()
- return
- except gaierror:
- response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error'} # noqa
- socketio.emit('corpus_analysis_init', response, room=session_id)
- corpus.status = 'stop analysis'
- db.session.commit()
- return
- corpus_analysis_clients[session_id] = client
- if corpus_id in corpus_analysis_sessions:
- corpus_analysis_sessions[corpus_id].append(session_id)
- else:
- corpus_analysis_sessions[corpus_id] = [session_id]
- client.status = 'ready'
- response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
- socketio.emit('corpus_analysis_init', response, room=session_id)
- ''' Observe analysis session '''
- while session_id in socketio_sessions:
- socketio.sleep(3)
- ''' Teardown analysis session '''
- if client.status == 'running':
- client.status = 'abort'
- while client.status != 'ready':
- socketio.sleep(0.3)
- try:
- client.disconnect()
- except cqi.errors.CQiException:
- pass
- corpus_analysis_clients.pop(session_id, None)
- corpus_analysis_sessions[corpus_id].remove(session_id)
- if not corpus_analysis_sessions[corpus_id]:
- corpus_analysis_sessions.pop(corpus_id, None)
- corpus.status = 'stop analysis'
- db.session.commit()
-
-
-@socketio.on('corpus_analysis_meta_data')
-@socketio_login_required
-def corpus_analysis_get_meta_data(corpus_id):
- # get meta data from db
- db_corpus = Corpus.query.get(corpus_id)
- metadata = {}
- metadata['corpus_name'] = db_corpus.title
- metadata['corpus_description'] = db_corpus.description
- metadata['corpus_creation_date'] = db_corpus.creation_date.isoformat() + 'Z'
- metadata['corpus_last_edited_date'] = \
- db_corpus.last_edited_date.isoformat() + 'Z'
- client = corpus_analysis_clients.get(request.sid)
- if client is None:
- response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
- socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
- return
- # check if client is busy or not
- if client.status == 'running':
- client.status = 'abort'
- while client.status != 'ready':
- socketio.sleep(0.3)
- # get meta data from corpus in cqp server
- client.status = 'running'
- try:
- cwb_corpus = client.corpora.get('CORPUS')
- metadata['corpus_properties'] = cwb_corpus.attrs['properties']
- metadata['corpus_size_tokens'] = cwb_corpus.attrs['size']
-
- text_attr = cwb_corpus.structural_attributes.get('text')
- struct_attrs = cwb_corpus.structural_attributes.list(
- filters={'part_of': text_attr})
- text_ids = range(0, (text_attr.attrs['size']))
- texts_metadata = {}
- for text_id in text_ids:
- texts_metadata[text_id] = {}
- for struct_attr in struct_attrs:
- texts_metadata[text_id][struct_attr.attrs['name'][(len(text_attr.attrs['name']) + 1):]] = struct_attr.values_by_ids(list(range(struct_attr.attrs['size'])))[text_id] # noqa
- metadata['corpus_all_texts'] = texts_metadata
- metadata['corpus_analysis_date'] = datetime.utcnow().isoformat() + 'Z'
- metadata['corpus_cqi_py_protocol_version'] = client.api.version
- metadata['corpus_cqi_py_package_version'] = cqi.__version__
- # TODO: make this dynamically
- metadata['corpus_cqpserver_version'] = 'CQPserver v3.4.22'
-
- # write some metadata to the db
- db_corpus.current_nr_of_tokens = metadata['corpus_size_tokens']
- db.session.commit()
-
- # emit data
- payload = metadata
- response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
- socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
- except cqi.errors.CQiException as e:
- payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
- response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
- 'payload': payload}
- socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
- client.status = 'ready'
-
-
-@socketio.on('corpus_analysis_query')
-@socketio_login_required
-def corpus_analysis_query(query):
- client = corpus_analysis_clients.get(request.sid)
- if client is None:
- response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
- socketio.emit('corpus_analysis_query', response, room=request.sid)
- return
- if client.status == 'running':
- client.status = 'abort'
- while client.status != 'ready':
- socketio.sleep(0.3)
- client.status = 'running'
- try:
- corpus = client.corpora.get('CORPUS')
- query_status = corpus.query(query)
- results = corpus.subcorpora.get('Results')
- except cqi.errors.CQiException as e:
- client.status = 'ready'
- handle_cqi_exception('corpus_analysis_query', e, request.sid)
- return
- payload = {'status': query_status,
- 'msg': cqi.api.specification.lookup[query_status],
- 'match_count': results.attrs['size']}
- response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
- socketio.emit('corpus_analysis_query', response, room=request.sid)
- chunk_size = 100
- chunk_start = 0
- context = 50
- progress = 0
- while chunk_start <= results.attrs['size']:
- if client.status == 'abort':
- break
- try:
- chunk = results.export(context=context, cutoff=chunk_size, offset=chunk_start) # noqa
- except cqi.errors.CQiException as e:
- handle_cqi_exception('corpus_analysis_query', e, request.sid)
- break
- if (results.attrs['size'] == 0):
- progress = 100
- else:
- progress = ((chunk_start + chunk_size) / results.attrs['size']) * 100 # noqa
- progress = min(100, int(math.ceil(progress)))
- payload = {'chunk': chunk, 'progress': progress}
- response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
- socketio.emit('corpus_analysis_query_results', response, room=request.sid) # noqa
- chunk_start += chunk_size
- client.status = 'ready'
-
-
-@socketio.on('corpus_analysis_get_match_with_full_context')
-@socketio_login_required
-def corpus_analysis_get_match_with_full_context(payload):
- type = payload['type']
- data_indexes = payload['data_indexes']
- first_cpos = payload['first_cpos']
- last_cpos = payload['last_cpos']
- client = corpus_analysis_clients.get(request.sid)
- if client is None:
- response = {'code': 424, 'desc': 'No client found for this session',
- 'msg': 'Failed Dependency'}
- socketio.emit('corpus_analysis_get_match_with_full_context', response,
- room=request.sid)
- return
- if client.status == 'running':
- client.status = 'abort'
- while client.status != 'ready':
- socketio.sleep(0.3)
- client.status = 'running'
- try:
- corpus = client.corpora.get('CORPUS')
- s = corpus.structural_attributes.get('s')
- except cqi.errors.CQiException as e:
- handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa
- return
- i = 0
- # Send data one match at a time.
- for index, f_cpos, l_cpos in zip(data_indexes, first_cpos, last_cpos):
- if client.status == 'abort':
- break
- i += 1
- matches = []
- cpos_lookup = text_lookup = {}
- try:
- tmp = s.export(f_cpos, l_cpos, context=10)
- except cqi.errors.CQiException as e:
- handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa
- break
- matches.append(tmp['matches'][0])
- cpos_lookup.update(tmp['cpos_lookup'])
- text_lookup.update(tmp['text_lookup'])
- progress = i / len(data_indexes) * 100
- payload = {'matches': matches, 'progress': progress,
- 'cpos_lookup': cpos_lookup, 'text_lookup': text_lookup}
- response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload,
- 'type': type, 'data_indexes': data_indexes}
- socketio.emit('corpus_analysis_get_match_with_full_context',
- response, room=request.sid)
- client.status = 'ready'
-
-
-@socketio.on('export_corpus')
-@socketio_login_required
-def export_corpus(corpus_id):
- corpus = Corpus.query.get(corpus_id)
- if corpus is None:
- response = {'code': 404, 'msg': 'Not found'}
- socketio.emit('export_corpus', response, room=request.sid)
- return
- if corpus.status != 'prepared':
- response = {'code': 412, 'msg': 'Precondition Failed'}
- socketio.emit('export_corpus', response, room=request.sid)
- return
- # delete old corpus archive if it exists/has been build before
- if corpus.archive_file is not None and os.path.isfile(corpus.archive_file):
- os.remove(corpus.archive_file)
- archive_file_base_name = '[corpus]_' + secure_filename(corpus.title)
- corpus.archive_file = archive_file_base_name + '.zip'
- db.session.commit()
- shutil.make_archive(
- os.path.join(corpus.creator.path, 'corpora', archive_file_base_name),
- 'zip',
- corpus.path
- )
- socketio.emit('export_corpus_{}'.format(corpus.id), room=request.sid)
-
-
-def handle_cqi_exception(event, exception, room):
- response = {'code': 500,
- 'desc': None,
- 'msg': 'Internal Server Error',
- 'payload': {'code': exception.code,
- 'desc': exception.description,
- 'msg': exception.name}}
- socketio.emit(event, response, room=room)
diff --git a/app/corpora/forms.py b/app/corpora/forms.py
index 5f2d3570..c015d87e 100644
--- a/app/corpora/forms.py
+++ b/app/corpora/forms.py
@@ -1,8 +1,8 @@
from flask_wtf import FlaskForm
from werkzeug.utils import secure_filename
-from wtforms import (BooleanField, FileField, StringField, SubmitField,
- ValidationError, IntegerField, SelectField)
-from wtforms.validators import DataRequired, Length, NumberRange
+from wtforms import (FileField, StringField, SubmitField,
+ ValidationError, IntegerField)
+from wtforms.validators import DataRequired, Length
class AddCorpusFileForm(FlaskForm):
@@ -91,76 +91,3 @@ class ImportCorpusForm(FlaskForm):
raise ValidationError('File does not have an approved extension: '
'.zip')
field.data.filename = secure_filename(field.data.filename)
-
-
-class QueryForm(FlaskForm):
- '''
- Form to submit a query to the server which is executed via cqi-py.
- '''
- query = StringField('Query',
- validators=[DataRequired(), Length(1, 1024)])
- submit = SubmitField('Search')
-
-
-class DisplayOptionsForm(FlaskForm):
- '''
- Form to alter how the matches are represented to the user by the user.
- '''
- expert_mode = BooleanField('Expert mode')
- result_context = SelectField('Result context',
- choices=[('', 'Choose your option'),
- ('10', '10'),
- ('20', '20'),
- ('30', '30'),
- ('40', '40'),
- ('50', '50')])
- results_per_page = SelectField('Results per page',
- choices=[('', 'Choose your option'),
- ('10', '10'),
- ('20', '20'),
- ('30', '30'),
- ('40', '40'),
- ('50', '50')])
-
-
-class InspectDisplayOptionsForm(FlaskForm):
- '''
- Form for the inspect modal where the user can interact with how the current
- match is being represented to him.
- '''
- expert_mode_inspect = BooleanField('Expert mode')
- highlight_sentences = BooleanField('Split sentences')
- context_sentences = IntegerField('Context sentences',
- validators=[NumberRange(min=0, max=10)],
- default=3)
-
-
-class QueryDownloadForm(FlaskForm):
- '''
- Form to choose in what file format the analysis results are being
- downloaded. WIP.
- '''
- file_type = SelectField('File type',
- choices=[('', 'Choose file type'),
- ('csv', 'csv'),
- ('json', 'json'),
- ('excel', 'excel'),
- ('html', 'html-table')],
- validators=[DataRequired()])
-
-
-class AddQueryResultForm(FlaskForm):
- '''
- Form used to import one result json file.
- '''
- description = StringField('Description',
- validators=[DataRequired(), Length(1, 255)])
- file = FileField('File', validators=[DataRequired()])
- title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
- submit = SubmitField()
-
- def validate_file(self, field):
- if not field.data.filename.lower().endswith('.json'):
- raise ValidationError('File does not have an approved extension: '
- '.json')
- field.data.filename = secure_filename(field.data.filename)
diff --git a/app/corpora/query_results_forms.py b/app/corpora/query_results_forms.py
new file mode 100644
index 00000000..bb55e513
--- /dev/null
+++ b/app/corpora/query_results_forms.py
@@ -0,0 +1,21 @@
+from flask_wtf import FlaskForm
+from werkzeug.utils import secure_filename
+from wtforms import FileField, StringField, SubmitField, ValidationError
+from wtforms.validators import DataRequired, Length
+
+
+class AddQueryResultForm(FlaskForm):
+ '''
+ Form used to import one result json file.
+ '''
+ description = StringField('Description',
+ validators=[DataRequired(), Length(1, 255)])
+ file = FileField('File', validators=[DataRequired()])
+ title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
+ submit = SubmitField()
+
+ def validate_file(self, field):
+ if not field.data.filename.lower().endswith('.json'):
+ raise ValidationError('File does not have an approved extension: '
+ '.json')
+ field.data.filename = secure_filename(field.data.filename)
diff --git a/app/corpora/query_results_routes.py b/app/corpora/query_results_routes.py
new file mode 100644
index 00000000..1ccc477e
--- /dev/null
+++ b/app/corpora/query_results_routes.py
@@ -0,0 +1,134 @@
+from flask import (abort, current_app, flash, make_response, redirect, request,
+ render_template, url_for, send_from_directory)
+from flask_login import current_user, login_required
+from . import bp
+from . import tasks
+from .forms import (AddQueryResultForm, DisplayOptionsForm,
+ InspectDisplayOptionsForm)
+from .. import db
+from ..models import QueryResult
+import json
+import os
+
+
+@bp.route('/result/add', methods=['GET', 'POST'])
+@login_required
+def add_query_result():
+ '''
+ View to import a result as a json file.
+ '''
+ abort(503)
+ form = AddQueryResultForm(prefix='add-query-result-form')
+ if form.is_submitted():
+ if not form.validate():
+ return make_response(form.errors, 400)
+ query_result = QueryResult(creator=current_user,
+ description=form.description.data,
+ filename=form.file.data.filename,
+ title=form.title.data)
+ db.session.add(query_result)
+ db.session.flush()
+ db.session.refresh(query_result)
+ try:
+ os.makedirs(os.path.dirname(query_result.path))
+ except OSError:
+ current_app.logger.error(
+ 'Make dir {} led to an OSError!'.format(query_result.path)
+ )
+ db.session.rollback()
+ flash('Internal Server Error', 'error')
+ return make_response(
+ {'redirect_url': url_for('.add_query_result')}, 500)
+ # save the uploaded file
+ form.file.data.save(query_result.path)
+ # parse json from file
+ with open(query_result.path, 'r') as file:
+ query_result_file_content = json.load(file)
+ # parse json schema
+ # with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa
+ # schema = json.load(file)
+ # try:
+ # # validate imported json file
+ # validate(instance=query_result_file_content, schema=schema)
+ # except Exception:
+ # tasks.delete_query_result(query_result.id)
+ # flash('Uploaded file is invalid', 'result')
+ # return make_response(
+ # {'redirect_url': url_for('.add_query_result')}, 201)
+ query_result_file_content.pop('matches')
+ query_result_file_content.pop('cpos_lookup')
+ query_result.query_metadata = query_result_file_content
+ db.session.commit()
+ flash('Query result added!', 'result')
+ return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
+ return render_template('corpora/query_results/add_query_result.html.j2',
+ form=form, title='Add query result')
+
+
+@bp.route('/result/')
+@login_required
+def query_result(query_result_id):
+ abort(503)
+ query_result = QueryResult.query.get_or_404(query_result_id)
+ if not (query_result.creator == current_user
+ or current_user.is_administrator()):
+ abort(403)
+ return render_template('corpora/query_results/query_result.html.j2',
+ query_result=query_result, title='Query result')
+
+
+@bp.route('/result//inspect')
+@login_required
+def inspect_query_result(query_result_id):
+ '''
+ View to inspect imported result file in a corpus analysis like interface
+ '''
+ abort(503)
+ query_result = QueryResult.query.get_or_404(query_result_id)
+ query_metadata = query_result.query_metadata
+ if not (query_result.creator == current_user
+ or current_user.is_administrator()):
+ abort(403)
+ display_options_form = DisplayOptionsForm(
+ prefix='display-options-form',
+ results_per_page=request.args.get('results_per_page', 30),
+ result_context=request.args.get('context', 20)
+ )
+ inspect_display_options_form = InspectDisplayOptionsForm(
+ prefix='inspect-display-options-form'
+ )
+ with open(query_result.path, 'r') as query_result_file:
+ query_result_file_content = json.load(query_result_file)
+ return render_template('corpora/query_results/inspect.html.j2',
+ query_result=query_result,
+ display_options_form=display_options_form,
+ inspect_display_options_form=inspect_display_options_form, # noqa
+ query_result_file_content=query_result_file_content,
+ query_metadata=query_metadata,
+ title='Inspect query result')
+
+
+@bp.route('/result//delete')
+@login_required
+def delete_query_result(query_result_id):
+ abort(503)
+ query_result = QueryResult.query.get_or_404(query_result_id)
+ if not (query_result.creator == current_user
+ or current_user.is_administrator()):
+ abort(403)
+ flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
+ tasks.delete_query_result(query_result_id)
+ return redirect(url_for('services.service', service="corpus_analysis"))
+
+
+@bp.route('/result//download')
+@login_required
+def download_query_result(query_result_id):
+ abort(503)
+ query_result = QueryResult.query.get_or_404(query_result_id)
+ if not (query_result.creator == current_user
+ or current_user.is_administrator()):
+ abort(403)
+ return send_from_directory(as_attachment=True,
+ directory=os.path.dirname(query_result.path),
+ filename=query_result.filename)
diff --git a/app/corpora/routes.py b/app/corpora/routes.py
index eff1be55..f700a540 100644
--- a/app/corpora/routes.py
+++ b/app/corpora/routes.py
@@ -1,16 +1,12 @@
-from flask import (abort, current_app, flash, make_response, redirect, request,
+from flask import (abort, current_app, flash, make_response, redirect,
render_template, url_for, send_from_directory)
from flask_login import current_user, login_required
from . import bp
from . import tasks
-from .forms import (AddCorpusFileForm, AddCorpusForm, AddQueryResultForm,
- EditCorpusFileForm, QueryDownloadForm, QueryForm,
- DisplayOptionsForm, InspectDisplayOptionsForm,
+from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm,
ImportCorpusForm)
-from jsonschema import validate
from .. import db
-from ..models import Corpus, CorpusFile, QueryResult
-import json
+from ..models import Corpus, CorpusFile
import os
import shutil
import glob
@@ -22,21 +18,22 @@ from .import_corpus import check_zip_contents
@bp.route('/add', methods=['GET', 'POST'])
@login_required
def add_corpus():
- form = AddCorpusForm()
+ form = AddCorpusForm(prefix='add-corpus-form')
if form.validate_on_submit():
- corpus = Corpus(creator=current_user,
- description=form.description.data,
- title=form.title.data)
+ corpus = Corpus(
+ creator=current_user,
+ description=form.description.data,
+ title=form.title.data
+ )
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
os.makedirs(corpus.path)
- except OSError:
- current_app.logger.error(
- 'Make dir {} led to an OSError!'.format(corpus.path)
- )
+ except OSError as e:
+ current_app.logger.error(f'Could not add corpus: {e}')
db.session.rollback()
+ flash('Internal Server Error', 'error')
abort(500)
else:
db.session.commit()
@@ -49,22 +46,23 @@ def add_corpus():
@bp.route('/import', methods=['GET', 'POST'])
@login_required
def import_corpus():
+ abort(503)
form = ImportCorpusForm()
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
- corpus = Corpus(creator=current_user,
- description=form.description.data,
- title=form.title.data)
+ corpus = Corpus(
+ creator=current_user,
+ description=form.description.data,
+ title=form.title.data
+ )
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
os.makedirs(corpus.path)
- except OSError:
- current_app.logger.error(
- 'Make dir {} led to an OSError!'.format(corpus.path)
- )
+ except OSError as e:
+ current_app.logger.error(f'Could not import corpus: {e}')
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
@@ -128,9 +126,21 @@ def corpus(corpus_id):
corpus_files=corpus_files, title='Corpus')
+@bp.route('//analyse')
+@login_required
+def analyse_corpus(corpus_id):
+ corpus = Corpus.query.get_or_404(corpus_id)
+ return render_template(
+ 'corpora/analyse_corpus.html.j2',
+ corpus=corpus,
+ title=f'Analyse Corpus {corpus.title}'
+ )
+
+
@bp.route('//download')
@login_required
def download_corpus(corpus_id):
+ abort(503)
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
@@ -142,31 +152,6 @@ def download_corpus(corpus_id):
)
-@bp.route('//analyse')
-@login_required
-def analyse_corpus(corpus_id):
- corpus = Corpus.query.get_or_404(corpus_id)
- display_options_form = DisplayOptionsForm(
- prefix='display-options-form',
- result_context=request.args.get('context', 20),
- results_per_page=request.args.get('results_per_page', 30)
- )
- query_form = QueryForm(prefix='query-form',
- query=request.args.get('query'))
- query_download_form = QueryDownloadForm(prefix='query-download-form')
- inspect_display_options_form = InspectDisplayOptionsForm(
- prefix='inspect-display-options-form')
- return render_template(
- 'corpora/analyse_corpus.html.j2',
- corpus=corpus,
- display_options_form=display_options_form,
- inspect_display_options_form=inspect_display_options_form,
- query_form=query_form,
- query_download_form=query_download_form,
- title='Corpus analysis'
- )
-
-
@bp.route('//delete')
@login_required
def delete_corpus(corpus_id):
@@ -190,20 +175,22 @@ def add_corpus_file(corpus_id):
return make_response(form.errors, 400)
# Save the file
form.file.data.save(os.path.join(corpus.path, form.file.data.filename))
- corpus_file = CorpusFile(address=form.address.data,
- author=form.author.data,
- booktitle=form.booktitle.data,
- chapter=form.chapter.data,
- corpus=corpus,
- editor=form.editor.data,
- filename=form.file.data.filename,
- institution=form.institution.data,
- journal=form.journal.data,
- pages=form.pages.data,
- publisher=form.publisher.data,
- publishing_year=form.publishing_year.data,
- school=form.school.data,
- title=form.title.data)
+ corpus_file = CorpusFile(
+ address=form.address.data,
+ author=form.author.data,
+ booktitle=form.booktitle.data,
+ chapter=form.chapter.data,
+ corpus=corpus,
+ editor=form.editor.data,
+ filename=form.file.data.filename,
+ institution=form.institution.data,
+ journal=form.journal.data,
+ pages=form.pages.data,
+ publisher=form.publisher.data,
+ publishing_year=form.publishing_year.data,
+ school=form.school.data,
+ title=form.title.data
+ )
db.session.add(corpus_file)
corpus.status = 'unprepared'
db.session.commit()
@@ -298,122 +285,3 @@ def prepare_corpus(corpus_id):
else:
flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error') # noqa
return redirect(url_for('.corpus', corpus_id=corpus_id))
-
-
-# Following are view functions to add, view etc. exported results.
-@bp.route('/result/add', methods=['GET', 'POST'])
-@login_required
-def add_query_result():
- '''
- View to import a result as a json file.
- '''
- form = AddQueryResultForm(prefix='add-query-result-form')
- if form.is_submitted():
- if not form.validate():
- return make_response(form.errors, 400)
- query_result = QueryResult(creator=current_user,
- description=form.description.data,
- filename=form.file.data.filename,
- title=form.title.data)
- db.session.add(query_result)
- db.session.flush()
- db.session.refresh(query_result)
- try:
- os.makedirs(os.path.dirname(query_result.path))
- except OSError:
- current_app.logger.error(
- 'Make dir {} led to an OSError!'.format(query_result.path)
- )
- db.session.rollback()
- flash('Internal Server Error', 'error')
- return make_response(
- {'redirect_url': url_for('.add_query_result')}, 500)
- # save the uploaded file
- form.file.data.save(query_result.path)
- # parse json from file
- with open(query_result.path, 'r') as file:
- query_result_file_content = json.load(file)
- # parse json schema
- # with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa
- # schema = json.load(file)
- # try:
- # # validate imported json file
- # validate(instance=query_result_file_content, schema=schema)
- # except Exception:
- # tasks.delete_query_result(query_result.id)
- # flash('Uploaded file is invalid', 'result')
- # return make_response(
- # {'redirect_url': url_for('.add_query_result')}, 201)
- query_result_file_content.pop('matches')
- query_result_file_content.pop('cpos_lookup')
- query_result.query_metadata = query_result_file_content
- db.session.commit()
- flash('Query result added!', 'result')
- return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
- return render_template('corpora/query_results/add_query_result.html.j2',
- form=form, title='Add query result')
-
-
-@bp.route('/result/')
-@login_required
-def query_result(query_result_id):
- query_result = QueryResult.query.get_or_404(query_result_id)
- if not (query_result.creator == current_user
- or current_user.is_administrator()):
- abort(403)
- return render_template('corpora/query_results/query_result.html.j2',
- query_result=query_result, title='Query result')
-
-
-@bp.route('/result//inspect')
-@login_required
-def inspect_query_result(query_result_id):
- '''
- View to inspect imported result file in a corpus analysis like interface
- '''
- query_result = QueryResult.query.get_or_404(query_result_id)
- query_metadata = query_result.query_metadata
- if not (query_result.creator == current_user
- or current_user.is_administrator()):
- abort(403)
- display_options_form = DisplayOptionsForm(
- prefix='display-options-form',
- results_per_page=request.args.get('results_per_page', 30),
- result_context=request.args.get('context', 20)
- )
- inspect_display_options_form = InspectDisplayOptionsForm(
- prefix='inspect-display-options-form'
- )
- with open(query_result.path, 'r') as query_result_file:
- query_result_file_content = json.load(query_result_file)
- return render_template('corpora/query_results/inspect.html.j2',
- query_result=query_result,
- display_options_form=display_options_form,
- inspect_display_options_form=inspect_display_options_form, # noqa
- query_result_file_content=query_result_file_content,
- query_metadata=query_metadata,
- title='Inspect query result')
-
-
-@bp.route('/result//delete')
-@login_required
-def delete_query_result(query_result_id):
- query_result = QueryResult.query.get_or_404(query_result_id)
- if not (query_result.creator == current_user
- or current_user.is_administrator()):
- abort(403)
- flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
- tasks.delete_query_result(query_result_id)
- return redirect(url_for('services.service', service="corpus_analysis"))
-
-
-@bp.route('/result//download')
-@login_required
-def download_query_result(query_result_id):
- query_result = QueryResult.query.get_or_404(query_result_id)
- if not (query_result.creator == current_user
- or current_user.is_administrator()):
- abort(403)
- return send_from_directory(as_attachment=True,
- directory=os.path.dirname(query_result.path),
- filename=query_result.filename)
diff --git a/app/daemon/__init__.py b/app/daemon/__init__.py
index 461e0ca8..60adcf2a 100644
--- a/app/daemon/__init__.py
+++ b/app/daemon/__init__.py
@@ -1,4 +1,5 @@
from app import db
+from flask import current_app
from time import sleep
from .corpus_utils import CheckCorporaMixin
from .job_utils import CheckJobsMixin
@@ -8,6 +9,11 @@ import docker
class Daemon(CheckCorporaMixin, CheckJobsMixin):
def __init__(self):
self.docker = docker.from_env()
+ self.docker.login(
+ username=current_app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
+ password=current_app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
+ registry=current_app.config['NOPAQUE_DOCKER_REGISTRY']
+ )
def run(self):
while True:
diff --git a/app/daemon/corpus_utils.py b/app/daemon/corpus_utils.py
index 5bee8848..31cad929 100644
--- a/app/daemon/corpus_utils.py
+++ b/app/daemon/corpus_utils.py
@@ -8,21 +8,19 @@ import shutil
class CheckCorporaMixin:
def check_corpora(self):
corpora = Corpus.query.all()
- queued_corpora = list(filter(lambda corpus: corpus.status == 'queued', corpora)) # noqa
- running_corpora = list(filter(lambda corpus: corpus.status == 'running', corpora)) # noqa
- start_analysis_corpora = list(filter(lambda corpus: corpus.status == 'start analysis', corpora)) # noqa
- analysing_corpora = list(filter(lambda corpus: corpus.status == 'analysing', corpora)) # noqa
- stop_analysis_corpora = list(filter(lambda corpus: corpus.status == 'stop analysis', corpora)) # noqa
- submitted_corpora = list(filter(lambda corpus: corpus.status == 'submitted', corpora)) # noqa
- for corpus in submitted_corpora:
+ for corpus in (x for x in corpora if x.status == 'submitted'):
self.create_build_corpus_service(corpus)
- for corpus in queued_corpora + running_corpora:
+ for corpus in (x for x in corpora if x.status == 'queued' or x.status == 'running'): # noqa
self.checkout_build_corpus_service(corpus)
- for corpus in start_analysis_corpora:
- self.create_cqpserver_container(corpus)
- for corpus in analysing_corpora:
+ for corpus in (x for x in corpora if x.status == 'prepared' and x.num_analysis_sessions > 0): # noqa
+ corpus.status = 'start analysis'
+ for corpus in (x for x in corpora if x.status == 'analysing' and x.num_analysis_sessions == 0): # noqa
+ corpus.status = 'stop analysis'
+ for corpus in (x for x in corpora if x.status == 'analysing'):
self.checkout_analysing_corpus_container(corpus)
- for corpus in stop_analysis_corpora:
+ for corpus in (x for x in corpora if x.status == 'start analysis'):
+ self.create_cqpserver_container(corpus)
+ for corpus in (x for x in corpora if x.status == 'stop analysis'):
self.remove_cqpserver_container(corpus)
def create_build_corpus_service(self, corpus):
@@ -32,7 +30,7 @@ class CheckCorporaMixin:
''' ## Constraints ## '''
constraints = ['node.role==worker']
''' ## Image ## '''
- image = current_app.config['DOCKER_IMAGE_PREFIX'] + 'cqpserver:latest'
+ image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cqpserver:r1674' # noqa
''' ## Labels ## '''
labels = {
'origin': current_app.config['SERVER_NAME'],
@@ -43,27 +41,24 @@ class CheckCorporaMixin:
''' ### Corpus file mount ### '''
corpus_file_source = os.path.join(corpus.path, 'merged', 'corpus.vrt')
corpus_file_target = '/root/files/corpus.vrt'
- corpus_file_mount = \
- corpus_file_source + ':' + corpus_file_target + ':ro'
+ corpus_file_mount = f'{corpus_file_source}:{corpus_file_target}:ro'
''' ### Corpus data mount ### '''
corpus_data_source = os.path.join(corpus.path, 'data')
corpus_data_target = '/corpora/data'
- corpus_data_mount = \
- corpus_data_source + ':' + corpus_data_target + ':rw'
+ corpus_data_mount = f'{corpus_data_source}:{corpus_data_target}:rw'
# Make sure that their is no data in the corpus data directory
shutil.rmtree(corpus_data_source, ignore_errors=True)
os.mkdir(corpus_data_source)
''' ### Corpus registry mount ### '''
corpus_registry_source = os.path.join(corpus.path, 'registry')
corpus_registry_target = '/usr/local/share/cwb/registry'
- corpus_registry_mount = \
- corpus_registry_source + ':' + corpus_registry_target + ':rw'
+ corpus_registry_mount = f'{corpus_registry_source}:{corpus_registry_target}:rw' # noqa
# Make sure that their is no data in the corpus registry directory
shutil.rmtree(corpus_registry_source, ignore_errors=True)
os.mkdir(corpus_registry_source)
mounts = [corpus_file_mount, corpus_data_mount, corpus_registry_mount]
''' ## Name ## '''
- name = 'build-corpus_{}'.format(corpus.id)
+ name = f'build-corpus_{corpus.id}'
''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy()
try:
@@ -78,57 +73,48 @@ class CheckCorporaMixin:
)
except docker.errors.APIError as e:
current_app.logger.error(
- 'Create "{}" service raised '.format(name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
+ f'Create service "{name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
)
- else:
- corpus.status = 'queued'
+ return
+ corpus.status = 'queued'
def checkout_build_corpus_service(self, corpus):
- service_name = 'build-corpus_{}'.format(corpus.id)
+ service_name = f'build-corpus_{corpus.id}'
try:
service = self.docker.services.get(service_name)
- except docker.errors.NotFound:
+ except docker.errors.NotFound as e:
current_app.logger.error(
- 'Get "{}" service raised '.format(service_name)
- + '"docker.errors.NotFound" The service does not exist. '
- + '(corpus.status: {} -> failed)'.format(corpus.status)
+ f'Get service "{service_name}" failed '
+ + f'due to "docker.errors.NotFound": {e}'
)
corpus.status = 'failed'
+ return
except docker.errors.APIError as e:
current_app.logger.error(
- 'Get "{}" service raised '.format(service_name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
- )
- except docker.errors.InvalidVersion:
- current_app.logger.error(
- 'Get "{}" service raised '.format(service_name)
- + '"docker.errors.InvalidVersion" One of the arguments is '
- + 'not supported with the current API version.'
+ f'Get service "{service_name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
)
+ service_tasks = service.tasks()
+ if not service_tasks:
+ return
+ task_state = service_tasks[0].get('Status').get('State')
+ if corpus.status == 'queued' and task_state != 'pending':
+ corpus.status = 'running'
+ return
+ elif corpus.status == 'running' and task_state == 'complete':
+ corpus.status = 'prepared'
+ elif corpus.status == 'running' and task_state == 'failed':
+ corpus.status = 'failed'
else:
- service_tasks = service.tasks()
- if not service_tasks:
- return
- task_state = service_tasks[0].get('Status').get('State')
- if corpus.status == 'queued' and task_state != 'pending':
- corpus.status = 'running'
- elif (corpus.status == 'running'
- and task_state in ['complete', 'failed']):
- try:
- service.remove()
- except docker.errors.APIError as e:
- current_app.logger.error(
- 'Remove "{}" service raised '.format(service_name)
- + '"docker.errors.APIError" The server returned an error. ' # noqa
- + 'Details: {}'.format(e)
- )
- return
- else:
- corpus.status = \
- 'prepared' if task_state == 'complete' else 'failed'
+ return
+ try:
+ service.remove()
+ except docker.errors.APIError as e:
+ current_app.logger.error(
+ f'Remove service "{service_name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
+ )
def create_cqpserver_container(self, corpus):
''' # Docker container settings # '''
@@ -137,22 +123,20 @@ class CheckCorporaMixin:
''' ## Detach ## '''
detach = True
''' ## Image ## '''
- image = current_app.config['DOCKER_IMAGE_PREFIX'] + 'cqpserver:latest'
+ image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cqpserver:r1674' # noqa
''' ## Name ## '''
- name = 'cqpserver_{}'.format(corpus.id)
+ name = f'cqpserver_{corpus.id}'
''' ## Network ## '''
network = 'nopaque_default'
''' ## Volumes ## '''
''' ### Corpus data volume ### '''
corpus_data_source = os.path.join(corpus.path, 'data')
corpus_data_target = '/corpora/data'
- corpus_data_volume = \
- corpus_data_source + ':' + corpus_data_target + ':rw'
+ corpus_data_volume = f'{corpus_data_source}:{corpus_data_target}:rw'
''' ### Corpus registry volume ### '''
corpus_registry_source = os.path.join(corpus.path, 'registry')
corpus_registry_target = '/usr/local/share/cwb/registry'
- corpus_registry_volume = \
- corpus_registry_source + ':' + corpus_registry_target + ':rw'
+ corpus_registry_volume = f'{corpus_registry_source}:{corpus_registry_target}:rw' # noqa
volumes = [corpus_data_volume, corpus_registry_volume]
# Check if a cqpserver container already exists. If this is the case,
# remove it and create a new one
@@ -162,9 +146,8 @@ class CheckCorporaMixin:
pass
except docker.errors.APIError as e:
current_app.logger.error(
- 'Get "{}" container raised '.format(name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
+ f'Get container "{name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
)
return
else:
@@ -172,77 +155,68 @@ class CheckCorporaMixin:
container.remove(force=True)
except docker.errors.APIError as e:
current_app.logger.error(
- 'Remove "{}" container raised '.format(name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
+ f'Remove container "{name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
)
return
try:
- self.docker.containers.run(image, command=command, detach=detach,
- volumes=volumes, name=name,
- network=network)
- except docker.errors.ContainerError:
- # This case should not occur, because detach is True.
+ self.docker.containers.run(
+ image,
+ command=command,
+ detach=detach,
+ volumes=volumes,
+ name=name,
+ network=network
+ )
+ except docker.errors.ImageNotFound as e:
current_app.logger.error(
- 'Run "{}" container raised '.format(name)
- + '"docker.errors.ContainerError" The container exits with a '
- + 'non-zero exit code and detach is False.'
- )
- corpus.status = 'failed'
- except docker.errors.ImageNotFound:
- current_app.logger.error(
- 'Run "{}" container raised '.format(name)
- + '"docker.errors.ImageNotFound" The specified image does not '
- + 'exist.'
+ f'Run container "{name}" failed '
+ + f'due to "docker.errors.ImageNotFound" error: {e}'
)
corpus.status = 'failed'
+ return
except docker.errors.APIError as e:
current_app.logger.error(
- 'Run "{}" container raised '.format(name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
+ f'Run container "{name}" failed '
+ + f'due to "docker.errors.APIError" error: {e}'
)
- else:
- corpus.status = 'analysing'
+ return
+ corpus.status = 'analysing'
def checkout_analysing_corpus_container(self, corpus):
- container_name = 'cqpserver_{}'.format(corpus.id)
+ container_name = f'cqpserver_{corpus.id}'
try:
self.docker.containers.get(container_name)
- except docker.errors.NotFound:
+ except docker.errors.NotFound as e:
current_app.logger.error(
- 'Could not find "{}" but the corpus state is "analysing".'
+ f'Get container "{container_name}" failed '
+ + f'due to "docker.errors.NotFound": {e}'
)
+ corpus.num_analysis_sessions = 0
corpus.status = 'prepared'
except docker.errors.APIError as e:
current_app.logger.error(
- 'Get "{}" container raised '.format(container_name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
+ f'Get container "{container_name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
)
- return
def remove_cqpserver_container(self, corpus):
- container_name = 'cqpserver_{}'.format(corpus.id)
+ container_name = f'cqpserver_{corpus.id}'
try:
container = self.docker.containers.get(container_name)
except docker.errors.NotFound:
- pass
+ corpus.status = 'prepared'
+ return
except docker.errors.APIError as e:
current_app.logger.error(
- 'Get "{}" container raised '.format(container_name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
+ f'Get container "{container_name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
)
return
- else:
- try:
- container.remove(force=True)
- except docker.errors.APIError as e:
- current_app.logger.error(
- 'Remove "{}" container raised '.format(container_name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
- )
- return
- corpus.status = 'prepared'
+ try:
+ container.remove(force=True)
+ except docker.errors.APIError as e:
+ current_app.logger.error(
+ f'Remove container "{container_name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
+ )
diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py
index 47424a81..78bae839 100644
--- a/app/daemon/job_utils.py
+++ b/app/daemon/job_utils.py
@@ -12,15 +12,11 @@ import shutil
class CheckJobsMixin:
def check_jobs(self):
jobs = Job.query.all()
- canceling_jobs = list(filter(lambda job: job.status == 'canceling', jobs)) # noqa
- queued_jobs = list(filter(lambda job: job.status == 'queued', jobs))
- running_jobs = list(filter(lambda job: job.status == 'running', jobs))
- submitted_jobs = list(filter(lambda job: job.status == 'submitted', jobs)) # noqa
- for job in submitted_jobs:
+ for job in (x for x in jobs if x.status == 'submitted'):
self.create_job_service(job)
- for job in queued_jobs + running_jobs:
+ for job in (x for x in jobs if x.status in ['queued', 'running']):
self.checkout_job_service(job)
- for job in canceling_jobs:
+ for job in (x for x in jobs if x.status == 'canceling'):
self.remove_job_service(job)
def create_job_service(self, job):
@@ -30,26 +26,23 @@ class CheckJobsMixin:
mem_mb = 2048
n_cores = 2
executable = 'file-setup'
- image = (current_app.config['DOCKER_IMAGE_PREFIX']
- + 'file-setup:' + job.service_version)
+ image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}file-setup:{job.service_version}' # noqa
elif job.service == 'ocr':
mem_mb = 4096
n_cores = 4
executable = 'ocr'
- image = (current_app.config['DOCKER_IMAGE_PREFIX']
- + 'ocr:' + job.service_version)
+ image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}ocr:{job.service_version}' # noqa
elif job.service == 'nlp':
mem_mb = 2048
n_cores = 2
executable = 'nlp'
- image = (current_app.config['DOCKER_IMAGE_PREFIX']
- + 'nlp:' + job.service_version)
+ image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}nlp:{job.service_version}' # noqa
''' ## Command ## '''
- command = '{} -i /input -o /output'.format(executable)
+ command = f'{executable} -i /input -o /output'
command += ' --log-dir /input'
- command += ' --mem-mb {}'.format(mem_mb)
- command += ' --n-cores {}'.format(n_cores)
- command += ' --zip [' + job.service + ']_' + secure_filename(job.title)
+ command += f' --mem-mb {mem_mb}'
+ command += f' --n-cores {n_cores}'
+ command += f' --zip [{job.service}]_{secure_filename(job.title)}'
command += ' ' + ' '.join(json.loads(job.service_args))
''' ## Constraints ## '''
constraints = ['node.role==worker']
@@ -64,18 +57,18 @@ class CheckJobsMixin:
input_mount_source = job.path
input_mount_target = '/input'
if job.service == 'file-setup':
- input_mount_target += '/' + secure_filename(job.title)
- input_mount = input_mount_source + ':' + input_mount_target + ':rw'
+ input_mount_target += f'/{secure_filename(job.title)}'
+ input_mount = f'{input_mount_source}:{input_mount_target}:rw'
''' ### Output mount ### '''
output_mount_source = os.path.join(job.path, 'output')
output_mount_target = '/output'
- output_mount = output_mount_source + ':' + output_mount_target + ':rw'
+ output_mount = f'{output_mount_source}:{output_mount_target}:rw'
# Make sure that their is no data in the output directory
shutil.rmtree(output_mount_source, ignore_errors=True)
os.makedirs(output_mount_source)
mounts = [input_mount, output_mount]
''' ## Name ## '''
- name = 'job_{}'.format(job.id)
+ name = f'job_{job.id}'
''' ## Resources ## '''
resources = docker.types.Resources(
cpu_reservation=n_cores * (10 ** 9),
@@ -96,104 +89,83 @@ class CheckJobsMixin:
)
except docker.errors.APIError as e:
current_app.logger.error(
- 'Create "{}" service raised '.format(name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
+ f'Create service "{name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
)
return
- else:
- job.status = 'queued'
+ job.status = 'queued'
def checkout_job_service(self, job):
- service_name = 'job_{}'.format(job.id)
+ service_name = f'job_{job.id}'
try:
service = self.docker.services.get(service_name)
- except docker.errors.NotFound:
+ except docker.errors.NotFound as e:
current_app.logger.error(
- 'Get "{}" service raised '.format(service_name)
- + '"docker.errors.NotFound" The service does not exist. '
- + '(job.status: {} -> failed)'.format(job.status)
+ f'Get service "{service_name}" failed '
+ + f'due to "docker.errors.NotFound": {e}'
)
job.status = 'failed'
+ return
except docker.errors.APIError as e:
current_app.logger.error(
- 'Get "{}" service raised '.format(service_name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
+ f'Get service "{service_name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
)
return
- except docker.errors.InvalidVersion:
- current_app.logger.error(
- 'Get "{}" service raised '.format(service_name)
- + '"docker.errors.InvalidVersion" One of the arguments is '
- + 'not supported with the current API version.'
- )
+ service_tasks = service.tasks()
+ if not service_tasks:
return
+ task_state = service_tasks[0].get('Status').get('State')
+ if job.status == 'queued' and task_state != 'pending':
+ job.status = 'running'
+ return
+ elif job.status == 'running' and task_state == 'complete':
+ job.status = 'complete'
+ results_dir = os.path.join(job.path, 'output')
+ result_files = [x for x in os.listdir(results_dir) if x.endswith('.zip')] # noqa
+ for result_file in result_files:
+ job_result = JobResult(filename=result_file, job=job)
+ db.session.add(job_result)
+ db.session.flush()
+ db.session.refresh(job_result)
+ elif job.status == 'running' and task_state == 'failed':
+ job.status = 'failed'
else:
- service_tasks = service.tasks()
- if not service_tasks:
- return
- task_state = service_tasks[0].get('Status').get('State')
- if job.status == 'queued' and task_state != 'pending':
- job.status = 'running'
- elif job.status == 'running' and task_state in ['complete', 'failed']: # noqa
- try:
- service.remove()
- except docker.errors.APIError as e:
- current_app.logger.error(
- 'Remove "{}" service raised '.format(service_name)
- + '"docker.errors.APIError" The server returned an error. ' # noqa
- + 'Details: {}'.format(e)
- )
- return
- else:
- if task_state == 'complete':
- results_dir = os.path.join(job.path, 'output')
- result_files = filter(lambda x: x.endswith('.zip'),
- os.listdir(results_dir))
- for result_file in result_files:
- job_result = JobResult(filename=result_file, job=job) # noqa
- db.session.add(job_result)
- db.session.flush()
- db.session.refresh(job_result)
- job.end_date = datetime.utcnow()
- job.status = task_state
+ return
+ job.end_date = datetime.utcnow()
+ try:
+ service.remove()
+ except docker.errors.APIError as e:
+ current_app.logger.error(
+ f'Remove service "{service_name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
+ )
def remove_job_service(self, job):
- service_name = 'job_{}'.format(job.id)
+ service_name = f'job_{job.id}'
try:
service = self.docker.services.get(service_name)
except docker.errors.NotFound:
job.status = 'canceled'
+ return
except docker.errors.APIError as e:
current_app.logger.error(
- 'Get "{}" service raised '.format(service_name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
+ f'Get service "{service_name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
)
return
- except docker.errors.InvalidVersion:
+ try:
+ service.update(mounts=None)
+ except docker.errors.APIError as e:
current_app.logger.error(
- 'Get "{}" service raised '.format(service_name)
- + '"docker.errors.InvalidVersion" One of the arguments is '
- + 'not supported with the current API version.'
+ f'Update service "{service_name}" failed '
+ + f'due to "docker.errors.APIError": {e}'
)
return
- else:
- try:
- service.update(mounts=None)
- except docker.errors.APIError as e:
- current_app.logger.error(
- 'Update "{}" service raised '.format(service_name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
- )
- return
- try:
- service.remove()
- except docker.errors.APIError as e:
- current_app.logger.error(
- 'Remove "{}" service raised '.format(service_name)
- + '"docker.errors.APIError" The server returned an error. '
- + 'Details: {}'.format(e)
- )
+ try:
+ service.remove()
+ except docker.errors.APIError as e:
+ current_app.logger.error(
+ f'Remove "{service_name}" service failed '
+ + f'due to "docker.errors.APIError": {e}'
+ )
diff --git a/app/events/socketio.py b/app/events/socketio.py
index ff7f787a..81f40533 100644
--- a/app/events/socketio.py
+++ b/app/events/socketio.py
@@ -1,6 +1,6 @@
from flask import request
from flask_login import current_user
-from flask_socketio import join_room, leave_room
+from flask_socketio import join_room
from .. import socketio
from ..decorators import socketio_login_required
from ..models import User
@@ -25,7 +25,7 @@ def socketio_connect():
' On connect the sid is saved in the sessions list.
'''
sessions.append(request.sid)
- return {'code': 200, 'msg': 'OK'}
+ # return {'code': 200, 'msg': 'OK'}
@socketio.on('disconnect')
@@ -37,7 +37,7 @@ def socketio_disconnect():
sessions.remove(request.sid)
except ValueError:
pass
- return {'code': 200, 'msg': 'OK'}
+ # return {'code': 200, 'msg': 'OK'}
@socketio.on('start_user_session')
diff --git a/app/models.py b/app/models.py
index 91a812b6..0cc4e83c 100644
--- a/app/models.py
+++ b/app/models.py
@@ -567,16 +567,18 @@ class Corpus(db.Model):
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
- current_nr_of_tokens = db.Column(db.Integer, default=0)
description = db.Column(db.String(255))
last_edited_date = db.Column(db.DateTime(), default=datetime.utcnow)
- max_nr_of_tokens = 2147483647
status = db.Column(db.String(16), default='unprepared')
title = db.Column(db.String(32))
+ num_analysis_sessions = db.Column(db.Integer, default=0)
+ num_tokens = db.Column(db.Integer, default=0)
archive_file = db.Column(db.String(255))
# Relationships
files = db.relationship('CorpusFile', backref='corpus', lazy='dynamic',
cascade='save-update, merge, delete')
+ # Python class variables
+ max_num_tokens = 2147483647
@property
def analysis_url(self):
@@ -601,12 +603,13 @@ class Corpus(db.Model):
'id': self.id,
'user_id': self.user_id,
'creation_date': self.creation_date.isoformat() + 'Z',
- 'current_nr_of_tokens': self.current_nr_of_tokens,
'description': self.description,
+ 'max_num_tokens': self.max_num_tokens,
+ 'num_analysis_sessions': self.num_analysis_sessions,
+ 'num_tokens': self.num_tokens,
'status': self.status,
'last_edited_date': self.last_edited_date.isoformat() + 'Z',
- 'max_nr_of_tokens': self.max_nr_of_tokens,
- 'title': self.title,
+ 'title': self.title
}
if include_relationships:
dict_corpus['files'] = {file.id: file.to_dict()
@@ -617,30 +620,25 @@ class Corpus(db.Model):
output_dir = os.path.join(self.path, 'merged')
shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)
- master_element_tree = ET.ElementTree(
- ET.fromstring('\n ')
- )
+ output_file = os.path.join(output_dir, 'corpus.vrt')
+ corpus_element = ET.fromstring('\n ')
for corpus_file in self.files:
element_tree = ET.parse(corpus_file.path)
text_node = element_tree.find('text')
- text_node.set('address', corpus_file.address or "NULL")
+ text_node.set('address', corpus_file.address or 'NULL')
text_node.set('author', corpus_file.author)
- text_node.set('booktitle', corpus_file.booktitle or "NULL")
- text_node.set('chapter', corpus_file.chapter or "NULL")
- text_node.set('editor', corpus_file.editor or "NULL")
- text_node.set('institution', corpus_file.institution or "NULL")
- text_node.set('journal', corpus_file.journal or "NULL")
- text_node.set('pages', corpus_file.pages or "NULL")
- text_node.set('publisher', corpus_file.publisher or "NULL")
+ text_node.set('booktitle', corpus_file.booktitle or 'NULL')
+ text_node.set('chapter', corpus_file.chapter or 'NULL')
+ text_node.set('editor', corpus_file.editor or 'NULL')
+ text_node.set('institution', corpus_file.institution or 'NULL')
+ text_node.set('journal', corpus_file.journal or 'NULL')
+ text_node.set('pages', corpus_file.pages or 'NULL')
+ text_node.set('publisher', corpus_file.publisher or 'NULL')
text_node.set('publishing_year', str(corpus_file.publishing_year))
- text_node.set('school', corpus_file.school or "NULL")
+ text_node.set('school', corpus_file.school or 'NULL')
text_node.set('title', corpus_file.title)
- element_tree.write(corpus_file.path)
- master_element_tree.getroot().insert(1, text_node)
- output_file = os.path.join(output_dir, 'corpus.vrt')
- master_element_tree.write(output_file,
- xml_declaration=True,
- encoding='utf-8')
+ corpus_element.insert(1, text_node)
+ ET.ElementTree(corpus_element).write(output_file, encoding='utf-8')
self.last_edited_date = datetime.utcnow()
self.status = 'submitted'
diff --git a/app/static/css/nopaque.css b/app/static/css/nopaque.css
index 862d1d5d..41dfd628 100644
--- a/app/static/css/nopaque.css
+++ b/app/static/css/nopaque.css
@@ -112,3 +112,6 @@ h1 .nopaque-icons, h2 .nopaque-icons, h3 .nopaque-icons, h4 .nopaque-icons,
.nopaque-icons.service-icon[data-service="ocr"]:empty:before {content: "F";}
.status-text[data-status]:empty:before {content: attr(data-status);}
+
+.hoverable {cursor: pointer;}
+.s-attr.chip .p-attr.chip {background-color: inherit;}
diff --git a/app/static/js/nopaque/CorpusAnalysis/CQiClient.js b/app/static/js/nopaque/CorpusAnalysis/CQiClient.js
new file mode 100644
index 00000000..42efef51
--- /dev/null
+++ b/app/static/js/nopaque/CorpusAnalysis/CQiClient.js
@@ -0,0 +1,439 @@
+class CQiClient {
+ constructor(corpusId) {
+ this.socket = io(
+ '/corpora/corpus/corpus_analysis',
+ {auth: {corpus_id: corpusId}, transports: ['websocket'], upgrade: false}
+ );
+ this.connected = false;
+ this.corpora = new CQiCorpusCollection(this.socket);
+ }
+
+ connect() {
+ return new Promise((resolve, reject) => {
+ this.socket.emit('cqi.connect', response => {
+ if (response.code === 200) {
+ this.connected = true;
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ disconnect() {
+ return new Promise((resolve, reject) => {
+ this.socket.emit('cqi.disconnect', response => {
+ if (response.code === 200) {
+ this.connected = false;
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ ping() {
+ return new Promise((resolve, reject) => {
+ this.socket.emit('cqi.ping', response => {
+ if (response.code === 200) {
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+}
+
+
+class CQiCorpusCollection {
+ constructor(socket) {
+ this.socket = socket;
+ }
+
+ get(corpusName) {
+ return new Promise((resolve, reject) => {
+ let args = {corpus_name: corpusName};
+ this.socket.emit('cqi.corpora.get', args, response => {
+ if (response.code === 200) {
+ resolve(new CQiCorpus(this.socket, response.payload));
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ list() {
+ return new Promise((resolve, reject) => {
+ this.socket.emit('cqi.corpora.list', response => {
+ if (response.code === 200) {
+ resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, x);}));
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+}
+
+
+class CQiCorpus {
+ constructor(socket, attrs) {
+ this.socket = socket;
+ this.charset = attrs.charset;
+ this.name = attrs.name;
+ this.properties = attrs.properties;
+ this.size = attrs.size;
+ this.alignmentAttributes = new CQiAlignmentAttributeCollection(this.socket, this);
+ this.positionalAttributes = new CQiPositionalAttributeCollection(this.socket, this);
+ this.structuralAttributes = new CQiStructuralAttributeCollection(this.socket, this);
+ this.subcorpora = new CQiSubcorpusCollection(this.socket, this);
+ }
+
+ drop() {
+ return new Promise((resolve, reject) => {
+ let args = {corpus_name: this.name};
+ this.socket.emit('cqi.corpora.corpus.drop', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ query(subcorpus_name, queryString) {
+ return new Promise((resolve, reject) => {
+ let args = {
+ corpus_name: this.name,
+ subcorpus_name: subcorpus_name,
+ query: queryString
+ };
+ this.socket.emit('cqi.corpora.corpus.query', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ // nopaque specific CQi extension
+ paginate(page=1, perPage=20) {
+ return new Promise((resolve, reject) => {
+ let args = {corpus_name: this.name, page: page, per_page: perPage};
+ this.socket.emit('cqi.corpora.corpus.paginate', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ updateDb() {
+ let args = {corpus_name: this.name};
+ this.socket.emit('cqi.corpora.corpus.update_db', args);
+ }
+}
+
+
+class CQiAlignmentAttributeCollection {
+ constructor(socket, corpus) {
+ this.corpus = corpus;
+ this.socket = socket;
+ }
+
+ get(alignmentAttributeName) {
+ return new Promise((resolve, reject) => {
+ let args = {corpus_name: this.corpus.name,
+ alignment_attribute_name: alignmentAttributeName};
+ this.socket.emit('cqi.corpora.corpus.alignment_attributes.get', args, response => {
+ if (response.code === 200) {
+ resolve(new CQiAlignmentAttribute(this.socket, this.corpus, response.payload));
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ list() {
+ return new Promise((resolve, reject) => {
+ let args = {corpus_name: this.corpus.name};
+ this.socket.emit('cqi.corpus.alignment_attributes.list', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload.map(x => {return new CQiAlignmentAttribute(this.socket, this.corpus, x);}));
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+}
+
+
+class CQiAlignmentAttribute {
+ constructor(socket, corpus, attrs) {
+ this.socket = socket;
+ this.corpus = corpus;
+ this.name = attrs.name;
+ this.size = attrs.size;
+ }
+}
+
+
+class CQiPositionalAttributeCollection {
+ constructor(socket, corpus) {
+ this.corpus = corpus;
+ this.socket = socket;
+ }
+
+ get(positionalAttributeName) {
+ return new Promise((resolve, reject) => {
+ let args = {
+ corpus_name: this.corpus.name,
+ positional_attribute_name: positionalAttributeName
+ };
+ this.socket.emit('cqi.corpora.corpus.positional_attributes.get', args, response => {
+ if (response.code === 200) {
+ resolve(new CQiPositionalAttribute(this.socket, this.corpus, response.payload));
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ list() {
+ return new Promise((resolve, reject) => {
+ let args = {corpus_name: this.corpus.name};
+ this.socket.emit('cqi.corpus.positional_attributes.list', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload.map(x => {return new CQiPositionalAttribute(this.socket, this.corpus, x);}));
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+}
+
+
+class CQiPositionalAttribute {
+ constructor(socket, corpus, attrs) {
+ this.socket = socket;
+ this.corpus = corpus;
+ this.lexiconSize = attrs.lexicon_size;
+ this.name = attrs.name;
+ this.size = attrs.size;
+ }
+}
+
+
+class CQiStructuralAttributeCollection {
+ constructor(socket, corpus) {
+ this.corpus = corpus;
+ this.socket = socket;
+ }
+
+ get(structuralAttributeName) {
+ return new Promise((resolve, reject) => {
+ let args = {
+ corpus_name: this.corpus.name,
+ structural_attribute_name: structuralAttributeName
+ };
+ this.socket.emit('cqi.corpora.corpus.structural_attributes.get', args, response => {
+ if (response.code === 200) {
+ resolve(new CQiStructuralAttribute(this.socket, this.corpus, response.payload));
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ list() {
+ return new Promise((resolve, reject) => {
+ let args = {corpus_name: this.corpus.name};
+ this.socket.emit('cqi.corpus.structural_attributes.list', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload.map(x => {return new CQiStructuralAttribute(this.socket, this.corpus, x);}));
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+}
+
+
+class CQiStructuralAttribute {
+ constructor(socket, corpus, attrs) {
+ this.socket = socket;
+ this.corpus = corpus;
+ this.hasValues = attrs.has_values;
+ this.name = attrs.name;
+ this.size = attrs.size;
+ }
+}
+
+
+class CQiSubcorpusCollection {
+ constructor(socket, corpus) {
+ this.corpus = corpus;
+ this.socket = socket;
+ }
+
+ get(subcorpusName) {
+ return new Promise((resolve, reject) => {
+ let args = {corpus_name: this.corpus.name, subcorpus_name: subcorpusName};
+ this.socket.emit('cqi.corpora.corpus.subcorpora.get', args, response => {
+ if (response.code === 200) {
+ resolve(new CQiSubcorpus(this.socket, this.corpus, response.payload));
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ list() {
+ return new Promise((resolve, reject) => {
+ let args = {corpus_name: this.corpus.name};
+ this.socket.emit('cqi.corpora.corpus.subcorpora.list', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, this.corpus, x);}));
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+}
+
+
+class CQiSubcorpus {
+ constructor(socket, corpus, attrs) {
+ this.socket = socket;
+ this.corpus = corpus;
+ this.fields = attrs.fields;
+ this.name = attrs.name;
+ this.size = attrs.size;
+ }
+
+ drop() {
+ return new Promise((resolve, reject) => {
+ let args = {corpus_name: this.corpus.name, subcorpus_name: this.name};
+ this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.drop', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ dump(field, first, last) {
+ return new Promise((resolve, reject) => {
+ let args = {
+ corpus_name: this.corpus.name,
+ subcorpus_name: this.name,
+ field: field,
+ first: first,
+ last: last
+ };
+ this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.dump', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ export(context=50) {
+ return new Promise((resolve, reject) => {
+ let args = {
+ corpus_name: this.corpus.name,
+ subcorpus_name: this.name,
+ context: context
+ };
+ this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.export', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ fdst_1(cutoff, field, attribute) {
+ return new Promise((resolve, reject) => {
+ let args = {
+ corpus_name: this.corpus.name,
+ subcorpus_name: this.name,
+ cutoff: cutoff,
+ field: field,
+ attribute: attribute
+ };
+ this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ fdst_2(cutoff, field1, attribute1, field2, attribute2) {
+ return new Promise((resolve, reject) => {
+ let args = {
+ corpus_name: this.corpus.name,
+ subcorpus_name: this.name,
+ cutoff: cutoff,
+ field1: field1,
+ attribute1: attribute1,
+ field2: field2,
+ attribute2: attribute2
+ };
+ this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+
+ // nopaque specific CQi extension
+ paginate(page=1, perPage=20, context=50) {
+ return new Promise((resolve, reject) => {
+ let args = {
+ corpus_name: this.corpus.name,
+ subcorpus_name: this.name,
+ page: page,
+ per_page: perPage,
+ context: context
+ };
+ this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.paginate', args, response => {
+ if (response.code === 200) {
+ resolve(response.payload);
+ } else {
+ reject(response);
+ }
+ });
+ });
+ }
+}
diff --git a/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisApp.js b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisApp.js
new file mode 100644
index 00000000..71a4780a
--- /dev/null
+++ b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisApp.js
@@ -0,0 +1,118 @@
+class CorpusAnalysisApp {
+ static entitiyColors = {
+ PERSON: '#a6e22d',
+ PER: '#a6e22d',
+ NORP: '#ef60b4',
+ FACILITY: '#43c6fc',
+ ORG: '#43c6fc',
+ GPE: '#fd9720',
+ LOC: '#fd9720',
+ PRODUCT: '#a99dfb',
+ MISC: '#a99dfb',
+ EVENT: ':#fc0',
+ WORK_OF_ART: '#fc0',
+ LANGUAGE: '#fc0',
+ DATE: '#2fbbab',
+ TIME: '#2fbbab',
+ PERCENT: '#bbb',
+ MONEY: '#bbb',
+ QUANTITY: '#bbb',
+ ORDINAL: '#bbb',
+ CARDINAL: '#bbb'
+ };
+
+ constructor(corpusId) {
+ this.data = {};
+
+ // HTML elements
+ this.elements = {
+ container: document.querySelector('#corpus-analysis-app-container'),
+ extensionTabs: document.querySelector('#corpus-analysis-app-extension-tabs'),
+ initModal: document.querySelector('#corpus-analysis-app-init-modal'),
+ initError: document.querySelector('#corpus-analysis-app-init-error'),
+ initProgress: document.querySelector('#corpus-analysis-app-init-progress'),
+ overview: document.querySelector('#corpus-analysis-app-overview')
+ };
+ // Materialize elements
+ this.elements.m = {
+ extensionTabs: M.Tabs.init(this.elements.extensionTabs),
+ initModal: M.Modal.init(this.elements.initModal, {dismissible: false})
+ };
+
+ this.extensions = {};
+
+ this.settings = {
+ corpusId: corpusId
+ };
+ }
+
+ init() {
+ this.disableActionElements();
+ this.elements.m.initModal.open();
+ // Init data
+ this.data.cQiClient = new CQiClient(this.settings.corpusId);
+ this.data.cQiClient.connect()
+ .then(cQiStatus => {
+ return this.data.cQiClient.corpora.get('CORPUS');
+ })
+ .then(
+ cQiCorpus => {
+ this.data.corpus = {o: cQiCorpus};
+ // TODO: Don't do this here
+ cQiCorpus.updateDb();
+ this.enableActionElements();
+ for (let extension of Object.values(this.extensions)) {extension.init();}
+ this.elements.m.initModal.close();
+ },
+ cQiError => {
+ this.elements.initError.innerText = JSON.stringify(cQiError);
+ this.elements.initError.classList.remove('hide');
+ this.elements.initProgress.classList.add('hide');
+ if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
+ nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
+ }
+ }
+ );
+ // Add event listeners
+ for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) {
+ extensionSelectorElement.addEventListener('click', () => {
+ this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target);
+ });
+ }
+ }
+
+ registerExtension(extension) {
+ if (extension.name in this.extensions) {
+ console.error(`Can't register extension ${extension.name}: Already registered`);
+ return;
+ }
+ this.extensions[extension.name] = extension;
+ if ('cQiClient' in this.data && this.data.cQiClient.connected) {extension.init();}
+ }
+
+ disableActionElements() {
+ let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
+ for (let actionElement of actionElements) {
+ if (actionElement.nodeName === 'INPUT') {
+ actionElement.disabled = true;
+ } else if (actionElement.nodeName === 'SELECT') {
+ actionElement.parentNode.querySelector('input.select-dropdown').disabled = true;
+ } else {
+ actionElement.classList.add('disabled');
+ }
+ }
+ }
+
+ enableActionElements() {
+ let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
+ for (let actionElement of actionElements) {
+ if (actionElement.nodeName === 'INPUT') {
+ actionElement.disabled = false;
+ } else if (actionElement.nodeName === 'SELECT') {
+ actionElement.parentNode.querySelector('input.select-dropdown').disabled = false;
+ } else {
+ actionElement.classList.remove('disabled');
+ }
+ }
+ }
+}
diff --git a/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisConcordance.js b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisConcordance.js
new file mode 100644
index 00000000..24a9ab53
--- /dev/null
+++ b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisConcordance.js
@@ -0,0 +1,432 @@
+class CorpusAnalysisConcordance {
+ name = 'Concordance';
+
+ constructor(app) {
+ this.app = app;
+
+ this.data = {};
+
+ this.elements = {
+ // TODO: Prefix elements with "corpus-analysis-app-"
+ container: document.querySelector('#concordance-extension-container'),
+ error: document.querySelector('#concordance-extension-error'),
+ form: document.querySelector('#concordance-extension-form'),
+ progress: document.querySelector('#concordance-extension-progress'),
+ subcorpusInfo: document.querySelector('#concordance-extension-subcorpus-info'),
+ subcorpusActions: document.querySelector('#concordance-extension-subcorpus-actions'),
+ subcorpusItems: document.querySelector('#concordance-extension-subcorpus-items'),
+ subcorpusList: document.querySelector('#concordance-extension-subcorpus-list'),
+ subcorpusPagination: document.querySelector('#concordance-extension-subcorpus-pagination')
+ };
+
+ this.settings = {
+ context: parseInt(this.elements.form['context'].value),
+ perPage: parseInt(this.elements.form['per-page'].value),
+ selectedSubcorpus: undefined,
+ textStyle: parseInt(this.elements.form['text-style'].value),
+ tokenRepresentation: this.elements.form['token-representation'].value
+ };
+
+ this.app.registerExtension(this);
+ }
+
+ init() {
+ // Init data
+ this.data.corpus = this.app.data.corpus;
+ this.data.subcorpora = {};
+ // Add event listeners
+ this.elements.form.addEventListener('submit', event => {
+ event.preventDefault();
+ this.app.disableActionElements();
+ let query = this.elements.form.query.value.trim();
+ let subcorpusName = this.elements.form['subcorpus-name'].value;
+ this.elements.error.innerText = '';
+ this.elements.error.classList.add('hide');
+ this.elements.progress.classList.remove('hide');
+ let subcorpus = {};
+ this.data.corpus.o.query(subcorpusName, query)
+ .then(cQiStatus => {
+ subcorpus.q = query;
+ return this.data.corpus.o.subcorpora.get(subcorpusName);
+ })
+ .then(cQiSubcorpus => {
+ subcorpus.o = cQiSubcorpus;
+ return cQiSubcorpus.paginate(1, this.settings.perPage, this.settings.context);
+ })
+ .then(
+ paginatedSubcorpus => {
+ subcorpus.p = paginatedSubcorpus;
+ if (subcorpus !== 'Last') {this.data.subcorpora.Last = subcorpus;}
+ this.data.subcorpora[subcorpusName] = subcorpus;
+ this.settings.selectedSubcorpus = subcorpusName;
+ this.renderSubcorpusList();
+ this.renderSubcorpusInfo();
+ this.renderSubcorpusActions();
+ this.renderSubcorpusItems();
+ this.renderSubcorpusPagination();
+ this.elements.progress.classList.add('hide');
+ this.app.enableActionElements();
+ },
+ cQiError => {
+ this.elements.error.innerText = JSON.stringify(cQiError);
+ this.elements.error.classList.remove('hide');
+ if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
+ nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
+ }
+ this.elements.progress.classList.add('hide');
+ this.app.enableActionElements();
+ }
+ );
+ });
+ this.elements.form.addEventListener('change', event => {
+ if (event.target === this.elements.form['context']) {
+ this.settings.context = parseInt(this.elements.form['context'].value);
+ this.elements.form.submit.click();
+ }
+ if (event.target === this.elements.form['per-page']) {
+ this.settings.perPage = parseInt(this.elements.form['per-page'].value);
+ this.elements.form.submit.click();
+ }
+ if (event.target === this.elements.form['text-style']) {
+ this.settings.textStyle = parseInt(this.elements.form['text-style'].value);
+ this.setTextStyle();
+ }
+ if (event.target === this.elements.form['token-representation']) {
+ this.settings.tokenRepresentation = this.elements.form['token-representation'].value;
+ this.setTokenRepresentation();
+ }
+ });
+ }
+
+ clearSubcorpusList() {
+ this.elements.subcorpusList.innerHTML = '';
+ this.elements.subcorpusList.classList.add('hide');
+ }
+
+ renderSubcorpusList() {
+ this.clearSubcorpusList();
+ for (let subcorpusName in this.data.subcorpora) {
+ this.elements.subcorpusList.innerHTML += `
+ bookmark ${subcorpusName}
+ `.trim();
+ }
+ for (let subcorpusSelectorElement of this.elements.subcorpusList.querySelectorAll('.subcorpus-selector')) {
+ let subcorpusName = subcorpusSelectorElement.dataset.target;
+ if (subcorpusName === this.settings.selectedSubcorpus) {
+ subcorpusSelectorElement.classList.add('disabled');
+ continue;
+ }
+ subcorpusSelectorElement.addEventListener('click', () => {
+ this.settings.selectedSubcorpus = subcorpusName;
+ this.elements.progress.classList.remove('hide');
+ this.renderSubcorpusList();
+ this.renderSubcorpusInfo();
+ this.renderSubcorpusActions();
+ this.renderSubcorpusActions();
+ this.renderSubcorpusItems();
+ this.renderSubcorpusPagination();
+ this.elements.progress.classList.add('hide');
+ });
+ }
+ this.elements.subcorpusList.classList.remove('hide');
+ }
+
+ clearSubcorpusInfo() {
+ this.elements.subcorpusInfo.innerHTML = '';
+ this.elements.subcorpusInfo.classList.add('hide');
+ }
+
+ renderSubcorpusInfo() {
+ let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
+ this.clearSubcorpusInfo();
+ this.elements.subcorpusInfo.innerHTML = `${subcorpus.p.total} matches found for ${subcorpus.q.replace(//g, ">")}
`;
+ this.elements.subcorpusInfo.classList.remove('hide');
+ }
+
+ clearSubcorpusActions() {
+ for (let tooltippedElement of this.elements.subcorpusActions.querySelectorAll('.tooltipped')) {
+ M.Tooltip.getInstance(tooltippedElement).destroy();
+ }
+ this.elements.subcorpusActions.innerHTML = '';
+ }
+
+ renderSubcorpusActions() {
+ this.clearSubcorpusActions();
+ this.elements.subcorpusActions.innerHTML += `
+
+ file_download
+
+
+ delete
+
+ `.trim();
+ M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped'));
+ this.elements.subcorpusActions.querySelector('.delete-subcorpus-trigger').addEventListener('click', event => {
+ event.preventDefault();
+ let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
+ subcorpus.o.drop().then(
+ cQiStatus => {
+ nopaque.appClient.flash(`${subcorpus.o.name} deleted`, 'corpus');
+ delete this.data.subcorpora[subcorpus.o.name];
+ this.settings.selectedSubcorpus = undefined;
+ for (let subcorpusName in this.data.subcorpora) {
+ this.settings.selectedSubcorpus = subcorpusName;
+ break;
+ }
+ this.renderSubcorpusList();
+ if (this.settings.selectedSubcorpus) {
+ this.renderSubcorpusInfo();
+ this.renderSubcorpusActions();
+ this.renderSubcorpusItems();
+ this.renderSubcorpusPagination();
+ } else {
+ this.clearSubcorpusInfo();
+ this.clearSubcorpusActions();
+ this.clearSubcorpusItems();
+ this.clearSubcorpusPagination();
+ }
+ },
+ cQiError => {
+ nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
+ }
+ );
+ });
+ }
+
+ clearSubcorpusItems() {
+ // Destroy with .p-attr elements associated Materialize tooltips
+ for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr.tooltipped')) {
+ M.Tooltip.getInstance(pAttrElement)?.destroy();
+ }
+ this.elements.subcorpusItems.innerHTML = `
+
+
+
+ search Nothing here...
+ No matches available.
+
+
+
+ `.trim();
+ }
+
+ renderSubcorpusItems() {
+ let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
+ this.clearSubcorpusItems();
+ for (let item of subcorpus.p.items) {
+ this.elements.subcorpusItems.innerHTML += `
+
+ ${item.num}
+ ${this.foo(...item.c)}
+ ${item.lc ? this.cposRange2HTML(...item.lc) : ''}
+ ${this.cposRange2HTML(...item.c)}
+ ${item.rc ? this.cposRange2HTML(...item.rc) : ''}
+
+ search
+ add
+
+
+ `.trim();
+ }
+ this.setTextStyle();
+ this.setTokenRepresentation();
+ for (let gotoReaderTriggerElement of this.elements.subcorpusItems.querySelectorAll('.goto-reader-trigger')) {
+ gotoReaderTriggerElement.addEventListener('click', event => {
+ event.preventDefault();
+ let corpusAnalysisReader = this.app.extensions.Reader;
+ let itemId = parseInt(gotoReaderTriggerElement.closest('.item').dataset.id);
+ let item = undefined;
+ for (let x of subcorpus.p.items) {if (x.num === itemId) {item = x;}}
+ let page = Math.max(1, Math.ceil(item.c[0] / corpusAnalysisReader.settings.perPage));
+ corpusAnalysisReader.page(page, () => {
+ let range = new Range();
+ let leftCpos = corpusAnalysisReader.data.corpus.p.items[0].includes(item.c[0]) ? item.c[0] : corpusAnalysisReader.data.corpus.p.items[0][0];
+ let rightCpos = corpusAnalysisReader.data.corpus.p.items[0].includes(item.c[1]) ? item.c[1] : corpusAnalysisReader.data.corpus.p.items[0].at(-1);
+ let leftElement = corpusAnalysisReader.elements.corpus.querySelector(`.p-attr[data-cpos="${leftCpos}"]`);
+ let rightElement = corpusAnalysisReader.elements.corpus.querySelector(`.p-attr[data-cpos="${rightCpos}"]`);
+ range.setStartBefore(leftElement);
+ range.setEndAfter(rightElement);
+ document.getSelection().removeAllRanges();
+ document.getSelection().addRange(range);
+ });
+ this.app.elements.m.extensionTabs.select('reader-extension-container');
+ });
+ }
+ }
+
+ clearSubcorpusPagination() {
+ this.elements.subcorpusPagination.innerHTML = '';
+ this.elements.subcorpusPagination.classList.add('hide');
+ }
+
+ renderSubcorpusPagination() {
+ let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
+ this.clearSubcorpusPagination();
+ if (subcorpus.p.pages === 0) {return;}
+ this.elements.subcorpusPagination.innerHTML += `
+
+
+
+ `.trim();
+ this.elements.subcorpusPagination.innerHTML += `
+
+
+
+ `.trim();
+ for (let i = 1; i <= subcorpus.p.pages; i++) {
+ this.elements.subcorpusPagination.innerHTML += `
+
+
+
+ `.trim();
+ }
+ this.elements.subcorpusPagination.innerHTML += `
+
+
+
+ `.trim();
+ this.elements.subcorpusPagination.innerHTML += `
+
+
+
+ `.trim();
+ for (let paginationTriggerElement of this.elements.subcorpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
+ paginationTriggerElement.addEventListener('click', event => {
+ event.preventDefault();
+ this.app.disableActionElements();
+ this.elements.progress.classList.remove('hide');
+ let page = parseInt(paginationTriggerElement.dataset.target);
+ subcorpus.o.paginate(page, this.settings.perPage, this.settings.context)
+ .then(
+ paginatedSubcorpus => {
+ subcorpus.p = paginatedSubcorpus;
+ this.renderSubcorpusItems();
+ this.renderSubcorpusPagination();
+ this.elements.progress.classList.add('hide');
+ this.app.enableActionElements();
+ }
+ )
+ });
+ }
+ this.elements.subcorpusPagination.classList.remove('hide');
+ }
+
+ foo(firstCpos, lastCpos) {
+ let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
+ /* Returns a list of texts occuring in this cpos range */
+ let textIds = new Set();
+ for (let cpos = firstCpos; cpos <= lastCpos; cpos++) {
+ textIds.add(subcorpus.p.lookups.cpos_lookup[cpos].text);
+ }
+ return [...textIds].map(x => subcorpus.p.lookups.text_lookup[x].title).join(', ');
+ }
+
+ cposRange2HTML(firstCpos, lastCpos) {
+ let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
+ let prevPAttr, pAttr, nextPAttr;
+ let isEntityStart, isEntityEnd;
+ let html = '';
+ for (let cpos = firstCpos; cpos <= lastCpos; cpos++) {
+ prevPAttr = cpos > firstCpos ? subcorpus.p.lookups.cpos_lookup[cpos - 1] : null;
+ pAttr = subcorpus.p.lookups.cpos_lookup[cpos];
+ nextPAttr = cpos < lastCpos ? subcorpus.p.lookups.cpos_lookup[cpos + 1] : null;
+ isEntityStart = 'ent' in pAttr && pAttr.ent !== prevPAttr?.ent;
+ isEntityEnd = 'ent' in pAttr && pAttr.ent !== nextPAttr?.ent;
+ // Add a space before pAttr
+ if (cpos !== firstCpos || pAttr.simple_pos !== 'PUNCT') {html += ' ';}
+ // Add entity start
+ if (isEntityStart) {
+ html += ``;
+ }
+ // Add pAttr
+ html += ` `;
+ // Add entity end
+ if (isEntityEnd) {
+ html += ` ${subcorpus.p.lookups.ent_lookup[pAttr.ent].type} `;
+ html += ' ';
+ }
+ }
+ return html;
+ }
+
+ setTextStyle() {
+ let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
+ if (this.settings.textStyle >= 0) {
+ // Destroy with .p-attr elements associated Materialize tooltips
+ for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr.tooltipped')) {
+ M.Tooltip.getInstance(pAttrElement)?.destroy();
+ }
+ // Set basic styling on .p-attr elements
+ for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) {
+ pAttrElement.setAttribute('class', 'p-attr');
+ }
+ // Set basic styling on .s-attr[data-type="ent"] elements
+ for (let entElement of this.elements.subcorpusItems.querySelectorAll('.s-attr[data-type="ent"]')) {
+ entElement.querySelector('.s-attr[data-type="ent_type"]').classList.add('hide');
+ entElement.removeAttribute('style');
+ entElement.setAttribute('class', 's-attr');
+ }
+ }
+ if (this.settings.textStyle >= 1) {
+ // Set advanced styling on .s-attr[data-type="ent"] elements
+ for (let entElement of this.elements.subcorpusItems.querySelectorAll('.s-attr[data-type="ent"]')) {
+ let ent = subcorpus.p.lookups.ent_lookup[entElement.dataset.id];
+ entElement.classList.add('chip');
+ entElement.style.backgroundColor = CorpusAnalysisApp.entitiyColors[ent.type];
+ entElement.querySelector('.s-attr[data-type="ent_type"]').classList.remove('hide');
+ }
+ }
+ if (this.settings.textStyle >= 2) {
+ // Set advanced styling on .p-attr elements
+ for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) {
+ pAttrElement.classList.add('chip', 'hoverable', 'tooltipped');
+ let cpos = pAttrElement.dataset.cpos;
+ let pAttr = subcorpus.p.lookups.cpos_lookup[cpos];
+ let positionalPropertiesHTML = `
+
+ Positional properties
+ Token: ${cpos}
+ `.trim();
+ let structuralPropertiesHTML = `
+
+ Structural properties
+ `.trim();
+ for (let [property, propertyValue] of Object.entries(pAttr)) {
+ if (['lemma', 'ner', 'pos', 'simple_pos', 'word'].includes(property)) {
+ if (propertyValue === 'None') {continue;}
+ positionalPropertiesHTML += `subdirectory_arrow_right ${property}: ${propertyValue}`;
+ } else {
+ structuralPropertiesHTML += `${property}: ${propertyValue} `;
+ if (!(`${property}_lookup` in subcorpus.p.lookups)) {continue;}
+ for (let [subproperty, subpropertyValue] of Object.entries(subcorpus.p.lookups[`${property}_lookup`][propertyValue])) {
+ if (subpropertyValue === 'NULL') {continue;}
+ structuralPropertiesHTML += `subdirectory_arrow_right ${subproperty}: ${subpropertyValue}`
+ }
+ }
+ }
+ positionalPropertiesHTML += '
';
+ structuralPropertiesHTML += '
';
+ M.Tooltip.init(
+ pAttrElement,
+ {html: positionalPropertiesHTML + structuralPropertiesHTML}
+ );
+ }
+ }
+ }
+
+ setTokenRepresentation() {
+ let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
+ for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) {
+ let pAttr = subcorpus.p.lookups.cpos_lookup[pAttrElement.dataset.cpos];
+ pAttrElement.innerText = pAttr[this.settings.tokenRepresentation];
+ }
+ }
+}
diff --git a/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisReader.js b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisReader.js
new file mode 100644
index 00000000..2b2a5843
--- /dev/null
+++ b/app/static/js/nopaque/CorpusAnalysis/CorpusAnalysisReader.js
@@ -0,0 +1,270 @@
+class CorpusAnalysisReader {
+ name = 'Reader';
+
+ constructor(app) {
+ this.app = app;
+
+ this.data = {};
+
+ this.elements = {
+ // TODO: Prefix elements with "corpus-analysis-app-"
+ container: document.querySelector('#reader-extension-container'),
+ error: document.querySelector('#reader-extension-error'),
+ form: document.querySelector('#reader-extension-form'),
+ progress: document.querySelector('#reader-extension-progress'),
+ corpus: document.querySelector('#reader-extension-corpus'),
+ corpusPagination: document.querySelector('#reader-extension-corpus-pagination')
+ };
+
+ this.settings = {
+ perPage: parseInt(this.elements.form['per-page'].value),
+ textStyle: parseInt(this.elements.form['text-style'].value),
+ tokenRepresentation: this.elements.form['token-representation'].value
+ }
+
+ this.app.registerExtension(this);
+ }
+
+ init() {
+ // Init data
+ this.data.corpus = this.app.data.corpus;
+ this.data.subcorpora = {};
+ // Add event listeners
+ this.elements.form.addEventListener('submit', (event) => {
+ event.preventDefault();
+ this.app.disableActionElements();
+ this.elements.error.innerText = '';
+ this.elements.error.classList.add('hide');
+ this.elements.progress.classList.remove('hide');
+ this.data.corpus.o.paginate(1, this.settings.perPage)
+ .then(
+ paginatedCorpus => {
+ this.data.corpus.p = paginatedCorpus;
+ this.renderCorpus();
+ this.renderCorpusPagination();
+ this.elements.progress.classList.add('hide');
+ this.app.enableActionElements();
+ },
+ error => {
+ this.elements.error.innerText = JSON.stringify(error);
+ this.elements.error.classList.remove('hide');
+ if ('payload' in error && 'code' in error.payload && 'msg' in error.payload) {
+ nopaque.appClient.flash(`${error.payload.code}: ${error.payload.msg}`, 'error');
+ }
+ this.elements.progress.classList.add('hide');
+ this.app.enableActionElements();
+ }
+ );
+ });
+ this.elements.form.addEventListener('change', event => {
+ if (event.target === this.elements.form['per-page']) {
+ this.settings.perPage = parseInt(this.elements.form['per-page'].value);
+ this.elements.form.submit.click();
+ }
+ if (event.target === this.elements.form['text-style']) {
+ this.settings.textStyle = parseInt(this.elements.form['text-style'].value);
+ this.setTextStyle();
+ }
+ if (event.target === this.elements.form['token-representation']) {
+ this.settings.tokenRepresentation = this.elements.form['token-representation'].value;
+ this.setTokenRepresentation();
+ }
+ });
+ // Load initial data
+ this.elements.form.submit.click();
+ }
+
+ clearCorpus() {
+ // Destroy with .p-attr elements associated Materialize tooltips
+ for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr.tooltipped')) {
+ M.Tooltip.getInstance(pAttrElement)?.destroy();
+ }
+ this.elements.corpus.innerHTML = `
+
+ search Nothing here...
+ No text available.
+
+ `.trim();
+ }
+
+ renderCorpus() {
+ this.clearCorpus();
+ let item = this.data.corpus.p.items[0];
+ this.elements.corpus.innerHTML += `
+ ${this.cposRange2HTML(item[0], item[item.length - 1])}
+ `.trim();
+ this.setTextStyle();
+ this.setTokenRepresentation();
+ }
+
+ clearCorpusPagination() {
+ this.elements.corpusPagination.innerHTML = '';
+ this.elements.corpusPagination.classList.add('hide');
+ }
+
+ renderCorpusPagination() {
+ this.clearCorpusPagination();
+ if (this.data.corpus.p.pages === 0) {return;}
+ this.elements.corpusPagination.innerHTML += `
+
+
+
+ `.trim();
+ this.elements.corpusPagination.innerHTML += `
+
+
+
+ `.trim();
+ for (let i = 1; i <= this.data.corpus.p.pages; i++) {
+ this.elements.corpusPagination.innerHTML += `
+
+
+
+ `.trim();
+ }
+ this.elements.corpusPagination.innerHTML += `
+
+
+
+ `.trim();
+ this.elements.corpusPagination.innerHTML += `
+
+
+
+ `.trim();
+ for (let paginateTriggerElement of this.elements.corpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
+ paginateTriggerElement.addEventListener('click', event => {
+ event.preventDefault();
+ let page = parseInt(paginateTriggerElement.dataset.target);
+ this.page(page);
+ });
+ }
+ this.elements.corpusPagination.classList.remove('hide');
+ }
+
+ cposRange2HTML(firstCpos, lastCpos) {
+ let prevPAttr, pAttr, nextPAttr;
+ let isEntityStart, isEntityEnd;
+ let html = '';
+ for (let cpos = firstCpos; cpos <= lastCpos; cpos++) {
+ prevPAttr = cpos > firstCpos ? this.data.corpus.p.lookups.cpos_lookup[cpos - 1] : null;
+ pAttr = this.data.corpus.p.lookups.cpos_lookup[cpos];
+ nextPAttr = cpos < lastCpos ? this.data.corpus.p.lookups.cpos_lookup[cpos + 1] : null;
+ isEntityStart = 'ent' in pAttr && pAttr.ent !== prevPAttr?.ent;
+ isEntityEnd = 'ent' in pAttr && pAttr.ent !== nextPAttr?.ent;
+ // Add a space before pAttr
+ if (cpos !== firstCpos || pAttr.simple_pos !== 'PUNCT') {html += ' ';}
+ // Add entity start
+ if (isEntityStart) {
+ html += ``;
+ }
+ // Add pAttr
+ html += ` `;
+ // Add entity end
+ if (isEntityEnd) {
+ html += ` ${this.data.corpus.p.lookups.ent_lookup[pAttr.ent].type} `;
+ html += ' ';
+ }
+ }
+ return html;
+ }
+
+ page(pageNum, callback) {
+ if (this.data.corpus.p.page === pageNum && typeof callback === 'function') {
+ callback();
+ return;
+ }
+ this.app.disableActionElements();
+ this.elements.progress.classList.remove('hide');
+ this.data.corpus.o.paginate(pageNum, this.settings.perPage)
+ .then(
+ paginatedCorpus => {
+ this.data.corpus.p = paginatedCorpus;
+ this.renderCorpus();
+ this.renderCorpusPagination();
+ this.elements.progress.classList.add('hide');
+ this.app.enableActionElements();
+ if (typeof callback === 'function') {callback();}
+ }
+ )
+ }
+
+ setTextStyle() {
+ if (this.settings.textStyle >= 0) {
+ // Destroy with .p-attr elements associated Materialize tooltips
+ for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr.tooltipped')) {
+ M.Tooltip.getInstance(pAttrElement)?.destroy();
+ }
+ // Set basic styling on .p-attr elements
+ for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) {
+ pAttrElement.setAttribute('class', 'p-attr');
+ }
+ // Set basic styling on .s-attr[data-type="ent"] elements
+ for (let entElement of this.elements.corpus.querySelectorAll('.s-attr[data-type="ent"]')) {
+ entElement.querySelector('.s-attr[data-type="ent_type"]').classList.add('hide');
+ entElement.removeAttribute('style');
+ entElement.setAttribute('class', 's-attr');
+ }
+ }
+ if (this.settings.textStyle >= 1) {
+ // Set advanced styling on .s-attr[data-type="ent"] elements
+ for (let entElement of this.elements.corpus.querySelectorAll('.s-attr[data-type="ent"]')) {
+ let ent = this.data.corpus.p.lookups.ent_lookup[entElement.dataset.id];
+ entElement.classList.add('chip');
+ entElement.style.backgroundColor = CorpusAnalysisApp.entitiyColors[ent.type];
+ entElement.querySelector('.s-attr[data-type="ent_type"]').classList.remove('hide');
+ }
+ }
+ if (this.settings.textStyle >= 2) {
+ // Set advanced styling on .p-attr elements
+ for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) {
+ pAttrElement.classList.add('chip', 'hoverable', 'tooltipped');
+ let cpos = pAttrElement.dataset.cpos;
+ let pAttr = this.data.corpus.p.lookups.cpos_lookup[cpos];
+ let positionalPropertiesHTML = `
+
+ Positional properties
+ Token: ${cpos}
+ `.trim();
+ let structuralPropertiesHTML = `
+
+ Structural properties
+ `.trim();
+ for (let [property, propertyValue] of Object.entries(pAttr)) {
+ if (['lemma', 'ner', 'pos', 'simple_pos', 'word'].includes(property)) {
+ if (propertyValue === 'None') {continue;}
+ positionalPropertiesHTML += `subdirectory_arrow_right ${property}: ${propertyValue}`;
+ } else {
+ structuralPropertiesHTML += `${property}: ${propertyValue} `;
+ if (!(`${property}_lookup` in this.data.corpus.p.lookups)) {continue;}
+ for (let [subproperty, subpropertyValue] of Object.entries(this.data.corpus.p.lookups[`${property}_lookup`][propertyValue])) {
+ if (subpropertyValue === 'NULL') {continue;}
+ structuralPropertiesHTML += `subdirectory_arrow_right ${subproperty}: ${subpropertyValue}`
+ }
+ }
+ }
+ positionalPropertiesHTML += '
';
+ structuralPropertiesHTML += '';
+ M.Tooltip.init(
+ pAttrElement,
+ {html: positionalPropertiesHTML + structuralPropertiesHTML}
+ );
+ }
+ }
+ }
+
+ setTokenRepresentation() {
+ for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) {
+ let pAttr = this.data.corpus.p.lookups.cpos_lookup[pAttrElement.dataset.cpos];
+ pAttrElement.innerText = pAttr[this.settings.tokenRepresentation];
+ }
+ }
+}
diff --git a/app/static/js/nopaque/RessourceDisplays/CorpusDisplay.js b/app/static/js/nopaque/RessourceDisplays/CorpusDisplay.js
index 827fa02b..d6038ef6 100644
--- a/app/static/js/nopaque/RessourceDisplays/CorpusDisplay.js
+++ b/app/static/js/nopaque/RessourceDisplays/CorpusDisplay.js
@@ -13,7 +13,7 @@ class CorpusDisplay extends RessourceDisplay {
this.setLastEditedDate(this.user.data.corpora[this.corpusId].last_edited_date);
this.setStatus(this.user.data.corpora[this.corpusId].status);
this.setTitle(this.user.data.corpora[this.corpusId].title);
- this.setTokenRatio(this.user.data.corpora[this.corpusId].current_nr_of_tokens, this.user.data.corpora[this.corpusId].max_nr_of_tokens);
+ this.setTokenRatio(this.user.data.corpora[this.corpusId].num_tokens, this.user.data.corpora[this.corpusId].max_num_tokens);
}
patch(patch) {
@@ -53,9 +53,8 @@ class CorpusDisplay extends RessourceDisplay {
for (let element of this.displayElement.querySelectorAll('.corpus-title')) {this.setElement(element, title);}
}
- setTokenRatio(currentNrOfTokens, maxNrOfTokens) {
- let tokenRatio = `${currentNrOfTokens}/${maxNrOfTokens}`;
- for (let element of this.displayElement.querySelectorAll('.corpus-token-ratio')) {this.setElement(element, tokenRatio);}
+ setTokenRatio(numTokens, maxNumTokens) {
+ for (let element of this.displayElement.querySelectorAll('.corpus-token-ratio')) {this.setElement(element, `${numTokens}/${maxNumTokens}`);}
}
setDescription(description) {
diff --git a/app/templates/corpora/analyse_corpus.concordance.html.j2 b/app/templates/corpora/analyse_corpus.concordance.html.j2
new file mode 100644
index 00000000..4a9cc139
--- /dev/null
+++ b/app/templates/corpora/analyse_corpus.concordance.html.j2
@@ -0,0 +1,102 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Source
+ Left context
+ KWIC
+ Right Context
+
+
+
+
+
+
+
+
+
+
diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2
index 7666f550..ccf682e0 100644
--- a/app/templates/corpora/analyse_corpus.html.j2
+++ b/app/templates/corpora/analyse_corpus.html.j2
@@ -1,337 +1,73 @@
{% extends "base.html.j2" %}
-{% from "corpora/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% import "materialize/wtf.html.j2" as wtf %}
-{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %}
+{% block main_attribs %} class="service-scheme" data-service="corpus-analysis" id="corpus-analysis-app-container"{% endblock main_attribs %}
{% block page_content %}
-
+
+
+
-
-
+
+
+
+
+
list_alt Concordance
+
Query your corpus with the CQP query language utilizing a KWIC view.
-
-
-
-
-
- {% include 'tables/query_results.html.j2' %}
+
+
+
+
chrome_reader_mode Reader
+
Inspect your corpus in detail with a full text view, including annotations.
-
-
-{% include 'corpora/interactions/scroll_to_top.html.j2' %}
-
-
-{% include 'modals/query_builder.html.j2' %}
-{% include 'modals/show_metadata.html.j2' %}
-{% include 'modals/analysis_init.html.j2' %}
-{% include 'modals/export_query_results.html.j2' %}
-{% include 'modals/context_modal.html.j2' %}
-{% include 'modals/show_corpus_files.html.j2' %}
+{% include "corpora/analyse_corpus.reader.html.j2" %}
+{% include "corpora/analyse_corpus.concordance.html.j2" %}
{% endblock page_content %}
+{% block modals %}
+{{ super() }}
+
+
+
Initializing session...
+
If the loading takes to long or an error occured,
+ click here
+ to refresh your session or
+ go back !
+
+
+
+
+
+{% endblock modals %}
+
{% block scripts %}
{{ super() }}
-
-
+{% endassets %}
+
-{% endblock %}
+{% endblock scripts %}
diff --git a/app/templates/corpora/analyse_corpus.reader.html.j2 b/app/templates/corpora/analyse_corpus.reader.html.j2
new file mode 100644
index 00000000..99cdf56e
--- /dev/null
+++ b/app/templates/corpora/analyse_corpus.reader.html.j2
@@ -0,0 +1,67 @@
+
diff --git a/app/templates/corpora/interactions/analysis.html.j2 b/app/templates/corpora/interactions/analysis.html.j2
deleted file mode 100644
index 735ef951..00000000
--- a/app/templates/corpora/interactions/analysis.html.j2
+++ /dev/null
@@ -1,30 +0,0 @@
-
-
-
-
Analysis
-
-
-
- Action One
- cloud
-
-
-
- Action Two
- add
-
-
-
-
\ No newline at end of file
diff --git a/app/templates/corpora/interactions/cite.html.j2 b/app/templates/corpora/interactions/cite.html.j2
deleted file mode 100644
index 45767854..00000000
--- a/app/templates/corpora/interactions/cite.html.j2
+++ /dev/null
@@ -1,30 +0,0 @@
-
-
-
-
Cite Nopaque
-
-
-
- Action One
- cloud
-
-
-
- Action Two
- add
-
-
-
-
\ No newline at end of file
diff --git a/app/templates/corpora/interactions/create.html.j2 b/app/templates/corpora/interactions/create.html.j2
deleted file mode 100644
index 4d4172b0..00000000
--- a/app/templates/corpora/interactions/create.html.j2
+++ /dev/null
@@ -1,24 +0,0 @@
-
-
-
-
Create
-
-
-
-
Add matches to Sub-Results with the
- add
- button in the list or inspect view.
-
-
-
-
-
\ No newline at end of file
diff --git a/app/templates/corpora/interactions/display.html.j2 b/app/templates/corpora/interactions/display.html.j2
deleted file mode 100644
index 8eb96e8e..00000000
--- a/app/templates/corpora/interactions/display.html.j2
+++ /dev/null
@@ -1,30 +0,0 @@
-{% import "materialize/wtf.html.j2" as wtf %}
-
-
-
diff --git a/app/templates/corpora/interactions/export.html.j2 b/app/templates/corpora/interactions/export.html.j2
deleted file mode 100644
index c6102329..00000000
--- a/app/templates/corpora/interactions/export.html.j2
+++ /dev/null
@@ -1,79 +0,0 @@
-
-
-
-
Export
-
-
-
-
- Create Results
- build
-
- Results
- file_download
-
-
-
-
- Create Sub-Results
- build
-
- Sub-Results
- file_download
-
-
-
-
-
\ No newline at end of file
diff --git a/app/templates/corpora/interactions/infos.html.j2 b/app/templates/corpora/interactions/infos.html.j2
deleted file mode 100644
index 83959864..00000000
--- a/app/templates/corpora/interactions/infos.html.j2
+++ /dev/null
@@ -1,59 +0,0 @@
-
-
-
-
Infos
-
-
-
-
- dvr
- /
-
- matches loaded
-
-
-
-
- Corpus Meta Data
- info_outline
-
-
-
-
- info_outline
- Matches in
-
- files
-
-
-
-
- help
- Server is sending your results.
- Functions like "Export Results" and "Match Inspect" will be
- available after all matches have been loaded.
-
-
-
-
\ No newline at end of file
diff --git a/app/templates/corpora/interactions/scroll_to_top.html.j2 b/app/templates/corpora/interactions/scroll_to_top.html.j2
deleted file mode 100644
index 5d0ea728..00000000
--- a/app/templates/corpora/interactions/scroll_to_top.html.j2
+++ /dev/null
@@ -1,5 +0,0 @@
-
\ No newline at end of file
diff --git a/app/templates/corpora/query_results/_breadcrumbs.html.j2 b/app/templates/corpora/query_results/_breadcrumbs.html.j2
deleted file mode 100644
index 9fb0464a..00000000
--- a/app/templates/corpora/query_results/_breadcrumbs.html.j2
+++ /dev/null
@@ -1,12 +0,0 @@
-{% set breadcrumbs %}
-
navigate_next
-
My query results
-
navigate_next
-{% if request.path == url_for('.add_query_result') %}
-
{{ title }}
-{% elif request.path == url_for('.query_result', query_result_id=query_result.id) %}
-
{{ query_result.title }}
-{% elif request.path == url_for('.inspect_query_result', query_result_id=query_result.id) %}
-
{{ title }}
-{% endif %}
-{% endset %}
diff --git a/app/templates/corpora/query_results/add_query_result.html.j2 b/app/templates/corpora/query_results/add_query_result.html.j2
deleted file mode 100644
index 61933816..00000000
--- a/app/templates/corpora/query_results/add_query_result.html.j2
+++ /dev/null
@@ -1,56 +0,0 @@
-{% extends "base.html.j2" %}
-{% from "corpora/query_results/_breadcrumbs.html.j2" import breadcrumbs with context %}
-{% import "materialize/wtf.html.j2" as wtf %}
-
-{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %}
-
-{% block page_content %}
-
-
-
-
{{ title }}
-
-
-
-
-
-
-
-
-
-
-
file_upload Uploading file...
-
-
-
-
-{% endblock %}
diff --git a/app/templates/corpora/query_results/inspect.html.j2 b/app/templates/corpora/query_results/inspect.html.j2
deleted file mode 100644
index afc00f3b..00000000
--- a/app/templates/corpora/query_results/inspect.html.j2
+++ /dev/null
@@ -1,241 +0,0 @@
-{% extends "base.html.j2" %}
-{% from "corpora/query_results/_breadcrumbs.html.j2" import breadcrumbs with context %}
-
-{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %}
-
-{% block page_content %}
-
-
-
-
-
-
-
-
- {% include 'tables/query_results.html.j2' %}
-
-
-
-
-
-{# Import modals #}
-{% include 'modals/show_metadata.html.j2' %}
-{% include 'modals/show_corpus_files.html.j2' %}
-{% include 'modals/context_modal.html.j2' %}
-
-
-{% include 'corpora/interactions/scroll_to_top.html.j2' %}
-{% endblock page_content %}
-
-
-{% block scripts %}
-{{ super() }}
-
-{% endblock %}
diff --git a/app/templates/corpora/query_results/query_result.html.j2 b/app/templates/corpora/query_results/query_result.html.j2
deleted file mode 100644
index 51377129..00000000
--- a/app/templates/corpora/query_results/query_result.html.j2
+++ /dev/null
@@ -1,131 +0,0 @@
-{% extends "base.html.j2" %}
-{% from "corpora/query_results/_breadcrumbs.html.j2" import breadcrumbs with context %}
-
-{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %}
-
-{% block page_content %}
-
-
-
-
{{ title }}
-
-
-
-
Below the metadata for the results from the Corpus
- {{ query_result.query_metadata.corpus_name }} generated with the query
- {{ query_result.query_metadata.query }} are shown.
-
-
-
-
-
-
-
-
-
-
- Metadata Description
- Value
-
-
-
- {% for pair in query_result.query_metadata|dictsort %}
-
- {{ pair[0] }}
- {% if pair[0] == 'corpus_all_texts'
- or pair[0] == 'text_lookup' %}
-
-
- {% for key, value in pair[1].items() %}
-
-
- {{ value['title'] }} written
- by {{ value['author'] }}
- in {{ value['publishing_year'] }}
- More
-
- info_outline
-
-
-
-
- {% endfor %}
-
-
- {% else %}
- {{ pair[1] }}
- {% endif %}
-
- {% endfor %}
-
-
-
-
-
-
-
-
-
-
-{% endblock page_content %}
-
-{% block scripts %}
-{{ super() }}
-
-{% endblock %}
diff --git a/app/templates/main/dashboard.html.j2 b/app/templates/main/dashboard.html.j2
index f46b1101..b7ece533 100644
--- a/app/templates/main/dashboard.html.j2
+++ b/app/templates/main/dashboard.html.j2
@@ -82,7 +82,7 @@
diff --git a/app/templates/materialize/wtf.html.j2 b/app/templates/materialize/wtf.html.j2
index 428a4ee8..730fd337 100644
--- a/app/templates/materialize/wtf.html.j2
+++ b/app/templates/materialize/wtf.html.j2
@@ -31,7 +31,7 @@
{% endif %}
{% for error in field.errors %}
-
{{ error }}
+
{{ error }}
{% endfor %}
{% endmacro %}
@@ -47,7 +47,7 @@
{% for error in field.errors %}
-
{{ error }}
+
{{ error }}
{% endfor %}
{% endmacro %}
@@ -68,7 +68,7 @@
{{ field.label }}
{% endif %}
{% for error in field.errors %}
- {{ error }}
+ {{ error }}
{% endfor %}
{% endmacro %}
diff --git a/app/templates/tables/query_results.html.j2 b/app/templates/tables/query_results.html.j2
deleted file mode 100644
index 1b7503dc..00000000
--- a/app/templates/tables/query_results.html.j2
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-
-
-
-
- Nr.
- Title
- Left context
- Match
- {# Actions #}
- Right Context
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/migrations/versions/a4b3cf4ab098_.py b/migrations/versions/a4b3cf4ab098_.py
new file mode 100644
index 00000000..3876e3ef
--- /dev/null
+++ b/migrations/versions/a4b3cf4ab098_.py
@@ -0,0 +1,30 @@
+"""empty message
+
+Revision ID: a4b3cf4ab098
+Revises: c384d7b3268a
+Create Date: 2021-09-23 13:14:16.227784
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'a4b3cf4ab098'
+down_revision = 'c384d7b3268a'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.add_column('corpora', sa.Column('num_tokens', sa.Integer(), nullable=True))
+ op.drop_column('corpora', 'current_nr_of_tokens')
+ # ### end Alembic commands ###
+
+
+def downgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.add_column('corpora', sa.Column('current_nr_of_tokens', sa.INTEGER(), autoincrement=False, nullable=True))
+ op.drop_column('corpora', 'num_tokens')
+ # ### end Alembic commands ###
diff --git a/migrations/versions/be010d5d708d_.py b/migrations/versions/be010d5d708d_.py
new file mode 100644
index 00000000..4a2d93b8
--- /dev/null
+++ b/migrations/versions/be010d5d708d_.py
@@ -0,0 +1,28 @@
+"""empty message
+
+Revision ID: be010d5d708d
+Revises: a4b3cf4ab098
+Create Date: 2021-09-24 09:34:54.173653
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'be010d5d708d'
+down_revision = 'a4b3cf4ab098'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.add_column('corpora', sa.Column('num_analysis_sessions', sa.Integer(), nullable=True))
+ # ### end Alembic commands ###
+
+
+def downgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.drop_column('corpora', 'num_analysis_sessions')
+ # ### end Alembic commands ###