Big Corpus analysis update

This commit is contained in:
Patrick Jentsch 2021-11-16 15:23:57 +01:00
parent c1436c2a5d
commit f6c2292e03
47 changed files with 2549 additions and 1840 deletions

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 Bielefeld University - CRC 1288 - INF
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,4 +1,5 @@
from .models import Role
from . import db
from .models import Corpus, Role
from flask_migrate import upgrade
@ -19,6 +20,9 @@ def register(app):
@daemon.command('run')
def run_daemon():
"""Run daemon"""
for corpus in Corpus.query.filter(Corpus.num_analysis_sessions > 0):
corpus.num_analysis_sessions = 0
db.session.commit()
from app.daemon import Daemon
daemon = Daemon()
daemon.run()

View File

@ -2,4 +2,4 @@ from flask import Blueprint
bp = Blueprint('corpora', __name__)
from . import events, routes # noqa
from . import cqi_over_socketio, routes # noqa

View File

@ -0,0 +1,108 @@
from app import db, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
from flask import session
from flask_login import current_user
from flask_socketio import ConnectionRefusedError
from threading import Lock
import cqi
'''
This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
This module only handles the SIO connect/disconnect, which handles the setup
and teardown of necessary ressources for later use. Each CQi function has a
corresponding SIO event. The event handlers are spread across the different
modules within this package.
Basic concept:
1. A client connects to the SIO namespace and provides the id of a corpus to be
analysed.
1.1 The analysis session counter of the corpus is incremented.
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
1.3 Wait until the CQP server is running.
1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient and the Lock in the session for subsequential use.
2. A client emits an event and may provide a single json object with necessary
arguments for the targeted CQi function.
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
- The event handler function defines all arguments. Hence the client
is sent as a single json object, the decorator decomposes it to fit
the functions signature. This also includes type checking and proper
use of the lock (acquire/release) mechanism.
4. Wait for more events
5. The client disconnects from the SIO namespace
1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
'''
NAMESPACE = '/corpora/corpus/corpus_analysis'
# Import all CQi over Socket.IO event handlers
from .cqi_corpora_corpus_subcorpora import * # noqa
from .cqi_corpora_corpus_structural_attributes import * # noqa
from .cqi_corpora_corpus_positional_attributes import * # noqa
from .cqi_corpora_corpus_alignment_attributes import * # noqa
from .cqi_corpora_corpus import * # noqa
from .cqi_corpora import * # noqa
from .cqi import * # noqa
@socketio.on('connect', namespace=NAMESPACE)
@socketio_login_required
def connect(auth):
# the auth variable is used in a hacky way. It contains the corpus id for
# which a corpus analysis session should be started.
corpus_id = auth['corpus_id']
corpus = Corpus.query.get(corpus_id)
if corpus is None:
# return {'code': 404, 'msg': 'Not Found'}
raise ConnectionRefusedError('Not Found')
if not (corpus.creator == current_user or current_user.is_administrator()):
# return {'code': 403, 'msg': 'Forbidden'}
raise ConnectionRefusedError('Forbidden')
if corpus.status not in ['prepared', 'start analysis', 'analysing', 'stop analysis']:
# return {'code': 424, 'msg': 'Failed Dependency'}
raise ConnectionRefusedError('Failed Dependency')
if corpus.num_analysis_sessions is None:
corpus.num_analysis_sessions = 0
db.session.commit()
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
db.session.commit()
retry_counter = 20
while corpus.status != 'analysing':
if retry_counter == 0:
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
return {'code': 408, 'msg': 'Request Timeout'}
socketio.sleep(3)
retry_counter -= 1
db.session.refresh(corpus)
cqi_client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
session['d'] = {
'corpus_id': corpus_id,
'cqi_client': cqi_client,
'cqi_client_lock': Lock(),
}
# return {'code': 200, 'msg': 'OK'}
@socketio.on('disconnect', namespace=NAMESPACE)
def disconnect():
session['d']['cqi_client_lock'].acquire()
try:
session['d']['cqi_client'].disconnect()
except cqi.errors.CQiException:
pass
except BrokenPipeError:
pass
session['d']['cqi_client_lock'].release()
corpus = Corpus.query.get(session['d']['corpus_id'])
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
session.pop('d')
# return {'code': 200, 'msg': 'OK'}

View File

@ -0,0 +1,43 @@
from app import socketio
from app.decorators import socketio_login_required
from socket import gaierror
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
import cqi
@socketio.on('cqi.connect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_connect(cqi_client: cqi.CQiClient):
try:
cqi_status = cqi_client.connect()
except gaierror as e:
return {
'code': 500,
'msg': 'Internal Server Error',
'payload': {'code': e.args[0], 'desc': e.args[1]}
}
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.disconnect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_disconnect(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.disconnect()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.ping', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_ping(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.ping()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,22 @@
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
import cqi
@socketio.on('cqi.corpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = {**cqi_corpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_list(cqi_client: cqi.CQiClient):
payload = [{**x.attrs} for x in cqi_client.corpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,85 @@
from app import db, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
from flask import session
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, lookups_by_cpos
import cqi
import math
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.drop()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.query(subcorpus_name, query)
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
corpus.num_tokens = cqi_client.corpora.get('CORPUS').attrs['size']
db.session.commit()
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_corpus.attrs['size'] > 0
and page > math.ceil(cqi_corpus.attrs['size'] / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
# the lookups for the items
payload['lookups'] = lookups
# the total number of items matching the query
payload['total'] = cqi_corpus.attrs['size']
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,24 @@
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
import cqi
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
payload = {**cqi_alignment_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,24 @@
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
import cqi
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
payload = {**cqi_positional_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,24 @@
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
import cqi
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
payload = {**cqi_structural_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,123 @@
from app import socketio
from app.decorators import socketio_login_required
from app.models import Corpus
from flask import session
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, export_subcorpus
import cqi
import json
import math
import os
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
payload = {**cqi_subcorpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_status = cqi_subcorpus.drop()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_subcorpus.attrs['size'] > 0
and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
payload['items'] = cqi_results_export.pop('matches')
# the lookups for the items
payload['lookups'] = cqi_results_export
# the total number of items matching the query
payload['total'] = cqi_subcorpus.attrs['size']
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
corpus = Corpus.query.get(session['d']['corpus_id'])
file_path = os.path.join(corpus.path, f'{subcorpus_name}.json')
with open(file_path, 'w') as file:
json.dump(cqi_subcorpus_export, file)
return {'code': 200, 'msg': 'OK'}

View File

@ -0,0 +1,129 @@
from flask import session
from functools import wraps
from inspect import signature
import cqi
def cqi_over_socketio(f):
@wraps(f)
def wrapped(*args):
if 'd' not in session:
return {'code': 424, 'msg': 'Failed Dependency'}
f_args = {}
# Check for missing args and if all provided args are of the right type
for param in signature(f).parameters.values():
if param.annotation == cqi.CQiClient:
f_args[param.name] = session['d']['cqi_client']
continue
if param.default is param.empty:
# args
if param.name not in args[0]:
return {'code': 400, 'msg': 'Bad Request'}
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
else:
# kwargs
if param.name not in args[0]:
continue
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
session['d']['cqi_client_lock'].acquire()
try:
return_value = f(**f_args)
except BrokenPipeError:
pass
except cqi.errors.CQiException as e:
return_value = {
'code': 500,
'msg': 'Internal Server Error',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.name
}
}
finally:
session['d']['cqi_client_lock'].release()
return return_value
return wrapped
def lookups_by_cpos(corpus, cpos_list):
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
cpos_attr_values = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
cpos_attr_values[i]
for attr in corpus.structural_attributes.list():
# We only want to iterate over non subattributes, identifiable by
# attr.attrs['has_values']==False
if attr.attrs['has_values']:
continue
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1:
continue
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
if not occured_attr_ids:
continue
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
if not subattrs:
continue
lookup_name = f'{attr.attrs["name"]}_lookup'
lookups[lookup_name] = {}
for attr_id in occured_attr_ids:
lookups[lookup_name][attr_id] = {}
for subattr in subattrs:
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
return lookups
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
if subcorpus.attrs['size'] == 0:
return {"matches": []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
match_boundaries = zip(
subcorpus.dump(
subcorpus.attrs['fields']['match'], first_match, last_match),
subcorpus.dump(
subcorpus.attrs['fields']['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
match_num = offset + 1
for match_start, match_end in match_boundaries:
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - 1 - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if (match_end == (subcorpus.collection.corpus.attrs['size'] - 1)
or context == 0):
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(match_end + 1 + context,
subcorpus.collection.corpus.attrs['size'] - 1)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
match_num += 1
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}

View File

@ -1,304 +0,0 @@
from datetime import datetime
from flask import current_app, request
from flask_login import current_user
from socket import gaierror
from werkzeug.utils import secure_filename
from .. import db, socketio
from ..decorators import socketio_login_required
from ..events.socketio import sessions as socketio_sessions
from ..models import Corpus
import cqi
import math
import os
import shutil
'''
' A dictionary containing lists of, with corpus ids associated, Socket.IO
' session ids (sid). {<corpus_id>: [<sid>, ...], ...}
'''
corpus_analysis_sessions = {}
'''
' A dictionary containing Socket.IO session id - CQi client pairs.
' {<sid>: CQiClient, ...}
'''
corpus_analysis_clients = {}
@socketio.on('corpus_analysis_init')
@socketio_login_required
def init_corpus_analysis(corpus_id):
corpus = Corpus.query.get(corpus_id)
if corpus is None:
response = {'code': 404, 'desc': None, 'msg': 'Not Found'}
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if not (corpus.creator == current_user or current_user.is_administrator()):
response = {'code': 403, 'desc': None, 'msg': 'Forbidden'}
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if corpus.status not in ['prepared', 'start analysis', 'analysing']:
response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if corpus.status == 'prepared':
corpus.status = 'start analysis'
db.session.commit()
socketio.start_background_task(corpus_analysis_session_handler,
current_app._get_current_object(),
corpus_id, current_user.id, request.sid)
def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
with app.app_context():
''' Setup analysis session '''
corpus = Corpus.query.get(corpus_id)
retry_counter = 15
while corpus.status != 'analysing':
db.session.refresh(corpus)
retry_counter -= 1
if retry_counter == 0:
response = {'code': 408, 'desc': None, 'msg': 'Request Timeout'} # noqa
socketio.emit('corpus_analysis_init', response, room=session_id) # noqa
corpus.status = 'stop analysis'
db.session.commit()
return
socketio.sleep(3)
client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
try:
connect_status = client.connect()
payload = {'code': connect_status, 'msg': cqi.api.specification.lookup[connect_status]} # noqa
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_init', e, session_id)
corpus.status = 'stop analysis'
db.session.commit()
return
except gaierror:
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error'} # noqa
socketio.emit('corpus_analysis_init', response, room=session_id)
corpus.status = 'stop analysis'
db.session.commit()
return
corpus_analysis_clients[session_id] = client
if corpus_id in corpus_analysis_sessions:
corpus_analysis_sessions[corpus_id].append(session_id)
else:
corpus_analysis_sessions[corpus_id] = [session_id]
client.status = 'ready'
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_init', response, room=session_id)
''' Observe analysis session '''
while session_id in socketio_sessions:
socketio.sleep(3)
''' Teardown analysis session '''
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.3)
try:
client.disconnect()
except cqi.errors.CQiException:
pass
corpus_analysis_clients.pop(session_id, None)
corpus_analysis_sessions[corpus_id].remove(session_id)
if not corpus_analysis_sessions[corpus_id]:
corpus_analysis_sessions.pop(corpus_id, None)
corpus.status = 'stop analysis'
db.session.commit()
@socketio.on('corpus_analysis_meta_data')
@socketio_login_required
def corpus_analysis_get_meta_data(corpus_id):
# get meta data from db
db_corpus = Corpus.query.get(corpus_id)
metadata = {}
metadata['corpus_name'] = db_corpus.title
metadata['corpus_description'] = db_corpus.description
metadata['corpus_creation_date'] = db_corpus.creation_date.isoformat() + 'Z'
metadata['corpus_last_edited_date'] = \
db_corpus.last_edited_date.isoformat() + 'Z'
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
return
# check if client is busy or not
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.3)
# get meta data from corpus in cqp server
client.status = 'running'
try:
cwb_corpus = client.corpora.get('CORPUS')
metadata['corpus_properties'] = cwb_corpus.attrs['properties']
metadata['corpus_size_tokens'] = cwb_corpus.attrs['size']
text_attr = cwb_corpus.structural_attributes.get('text')
struct_attrs = cwb_corpus.structural_attributes.list(
filters={'part_of': text_attr})
text_ids = range(0, (text_attr.attrs['size']))
texts_metadata = {}
for text_id in text_ids:
texts_metadata[text_id] = {}
for struct_attr in struct_attrs:
texts_metadata[text_id][struct_attr.attrs['name'][(len(text_attr.attrs['name']) + 1):]] = struct_attr.values_by_ids(list(range(struct_attr.attrs['size'])))[text_id] # noqa
metadata['corpus_all_texts'] = texts_metadata
metadata['corpus_analysis_date'] = datetime.utcnow().isoformat() + 'Z'
metadata['corpus_cqi_py_protocol_version'] = client.api.version
metadata['corpus_cqi_py_package_version'] = cqi.__version__
# TODO: make this dynamically
metadata['corpus_cqpserver_version'] = 'CQPserver v3.4.22'
# write some metadata to the db
db_corpus.current_nr_of_tokens = metadata['corpus_size_tokens']
db.session.commit()
# emit data
payload = metadata
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
except cqi.errors.CQiException as e:
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
'payload': payload}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
client.status = 'ready'
@socketio.on('corpus_analysis_query')
@socketio_login_required
def corpus_analysis_query(query):
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_query', response, room=request.sid)
return
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.3)
client.status = 'running'
try:
corpus = client.corpora.get('CORPUS')
query_status = corpus.query(query)
results = corpus.subcorpora.get('Results')
except cqi.errors.CQiException as e:
client.status = 'ready'
handle_cqi_exception('corpus_analysis_query', e, request.sid)
return
payload = {'status': query_status,
'msg': cqi.api.specification.lookup[query_status],
'match_count': results.attrs['size']}
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_query', response, room=request.sid)
chunk_size = 100
chunk_start = 0
context = 50
progress = 0
while chunk_start <= results.attrs['size']:
if client.status == 'abort':
break
try:
chunk = results.export(context=context, cutoff=chunk_size, offset=chunk_start) # noqa
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_query', e, request.sid)
break
if (results.attrs['size'] == 0):
progress = 100
else:
progress = ((chunk_start + chunk_size) / results.attrs['size']) * 100 # noqa
progress = min(100, int(math.ceil(progress)))
payload = {'chunk': chunk, 'progress': progress}
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_query_results', response, room=request.sid) # noqa
chunk_start += chunk_size
client.status = 'ready'
@socketio.on('corpus_analysis_get_match_with_full_context')
@socketio_login_required
def corpus_analysis_get_match_with_full_context(payload):
type = payload['type']
data_indexes = payload['data_indexes']
first_cpos = payload['first_cpos']
last_cpos = payload['last_cpos']
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': 'No client found for this session',
'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_get_match_with_full_context', response,
room=request.sid)
return
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.3)
client.status = 'running'
try:
corpus = client.corpora.get('CORPUS')
s = corpus.structural_attributes.get('s')
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa
return
i = 0
# Send data one match at a time.
for index, f_cpos, l_cpos in zip(data_indexes, first_cpos, last_cpos):
if client.status == 'abort':
break
i += 1
matches = []
cpos_lookup = text_lookup = {}
try:
tmp = s.export(f_cpos, l_cpos, context=10)
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa
break
matches.append(tmp['matches'][0])
cpos_lookup.update(tmp['cpos_lookup'])
text_lookup.update(tmp['text_lookup'])
progress = i / len(data_indexes) * 100
payload = {'matches': matches, 'progress': progress,
'cpos_lookup': cpos_lookup, 'text_lookup': text_lookup}
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload,
'type': type, 'data_indexes': data_indexes}
socketio.emit('corpus_analysis_get_match_with_full_context',
response, room=request.sid)
client.status = 'ready'
@socketio.on('export_corpus')
@socketio_login_required
def export_corpus(corpus_id):
corpus = Corpus.query.get(corpus_id)
if corpus is None:
response = {'code': 404, 'msg': 'Not found'}
socketio.emit('export_corpus', response, room=request.sid)
return
if corpus.status != 'prepared':
response = {'code': 412, 'msg': 'Precondition Failed'}
socketio.emit('export_corpus', response, room=request.sid)
return
# delete old corpus archive if it exists/has been build before
if corpus.archive_file is not None and os.path.isfile(corpus.archive_file):
os.remove(corpus.archive_file)
archive_file_base_name = '[corpus]_' + secure_filename(corpus.title)
corpus.archive_file = archive_file_base_name + '.zip'
db.session.commit()
shutil.make_archive(
os.path.join(corpus.creator.path, 'corpora', archive_file_base_name),
'zip',
corpus.path
)
socketio.emit('export_corpus_{}'.format(corpus.id), room=request.sid)
def handle_cqi_exception(event, exception, room):
response = {'code': 500,
'desc': None,
'msg': 'Internal Server Error',
'payload': {'code': exception.code,
'desc': exception.description,
'msg': exception.name}}
socketio.emit(event, response, room=room)

View File

@ -1,8 +1,8 @@
from flask_wtf import FlaskForm
from werkzeug.utils import secure_filename
from wtforms import (BooleanField, FileField, StringField, SubmitField,
ValidationError, IntegerField, SelectField)
from wtforms.validators import DataRequired, Length, NumberRange
from wtforms import (FileField, StringField, SubmitField,
ValidationError, IntegerField)
from wtforms.validators import DataRequired, Length
class AddCorpusFileForm(FlaskForm):
@ -91,76 +91,3 @@ class ImportCorpusForm(FlaskForm):
raise ValidationError('File does not have an approved extension: '
'.zip')
field.data.filename = secure_filename(field.data.filename)
class QueryForm(FlaskForm):
'''
Form to submit a query to the server which is executed via cqi-py.
'''
query = StringField('Query',
validators=[DataRequired(), Length(1, 1024)])
submit = SubmitField('Search')
class DisplayOptionsForm(FlaskForm):
'''
Form to alter how the matches are represented to the user by the user.
'''
expert_mode = BooleanField('Expert mode')
result_context = SelectField('Result context',
choices=[('', 'Choose your option'),
('10', '10'),
('20', '20'),
('30', '30'),
('40', '40'),
('50', '50')])
results_per_page = SelectField('Results per page',
choices=[('', 'Choose your option'),
('10', '10'),
('20', '20'),
('30', '30'),
('40', '40'),
('50', '50')])
class InspectDisplayOptionsForm(FlaskForm):
'''
Form for the inspect modal where the user can interact with how the current
match is being represented to him.
'''
expert_mode_inspect = BooleanField('Expert mode')
highlight_sentences = BooleanField('Split sentences')
context_sentences = IntegerField('Context sentences',
validators=[NumberRange(min=0, max=10)],
default=3)
class QueryDownloadForm(FlaskForm):
'''
Form to choose in what file format the analysis results are being
downloaded. WIP.
'''
file_type = SelectField('File type',
choices=[('', 'Choose file type'),
('csv', 'csv'),
('json', 'json'),
('excel', 'excel'),
('html', 'html-table')],
validators=[DataRequired()])
class AddQueryResultForm(FlaskForm):
'''
Form used to import one result json file.
'''
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
file = FileField('File', validators=[DataRequired()])
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
submit = SubmitField()
def validate_file(self, field):
if not field.data.filename.lower().endswith('.json'):
raise ValidationError('File does not have an approved extension: '
'.json')
field.data.filename = secure_filename(field.data.filename)

View File

@ -0,0 +1,21 @@
from flask_wtf import FlaskForm
from werkzeug.utils import secure_filename
from wtforms import FileField, StringField, SubmitField, ValidationError
from wtforms.validators import DataRequired, Length
class AddQueryResultForm(FlaskForm):
'''
Form used to import one result json file.
'''
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
file = FileField('File', validators=[DataRequired()])
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
submit = SubmitField()
def validate_file(self, field):
if not field.data.filename.lower().endswith('.json'):
raise ValidationError('File does not have an approved extension: '
'.json')
field.data.filename = secure_filename(field.data.filename)

View File

@ -0,0 +1,134 @@
from flask import (abort, current_app, flash, make_response, redirect, request,
render_template, url_for, send_from_directory)
from flask_login import current_user, login_required
from . import bp
from . import tasks
from .forms import (AddQueryResultForm, DisplayOptionsForm,
InspectDisplayOptionsForm)
from .. import db
from ..models import QueryResult
import json
import os
@bp.route('/result/add', methods=['GET', 'POST'])
@login_required
def add_query_result():
'''
View to import a result as a json file.
'''
abort(503)
form = AddQueryResultForm(prefix='add-query-result-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
query_result = QueryResult(creator=current_user,
description=form.description.data,
filename=form.file.data.filename,
title=form.title.data)
db.session.add(query_result)
db.session.flush()
db.session.refresh(query_result)
try:
os.makedirs(os.path.dirname(query_result.path))
except OSError:
current_app.logger.error(
'Make dir {} led to an OSError!'.format(query_result.path)
)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
{'redirect_url': url_for('.add_query_result')}, 500)
# save the uploaded file
form.file.data.save(query_result.path)
# parse json from file
with open(query_result.path, 'r') as file:
query_result_file_content = json.load(file)
# parse json schema
# with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa
# schema = json.load(file)
# try:
# # validate imported json file
# validate(instance=query_result_file_content, schema=schema)
# except Exception:
# tasks.delete_query_result(query_result.id)
# flash('Uploaded file is invalid', 'result')
# return make_response(
# {'redirect_url': url_for('.add_query_result')}, 201)
query_result_file_content.pop('matches')
query_result_file_content.pop('cpos_lookup')
query_result.query_metadata = query_result_file_content
db.session.commit()
flash('Query result added!', 'result')
return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
return render_template('corpora/query_results/add_query_result.html.j2',
form=form, title='Add query result')
@bp.route('/result/<int:query_result_id>')
@login_required
def query_result(query_result_id):
abort(503)
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
return render_template('corpora/query_results/query_result.html.j2',
query_result=query_result, title='Query result')
@bp.route('/result/<int:query_result_id>/inspect')
@login_required
def inspect_query_result(query_result_id):
'''
View to inspect imported result file in a corpus analysis like interface
'''
abort(503)
query_result = QueryResult.query.get_or_404(query_result_id)
query_metadata = query_result.query_metadata
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
display_options_form = DisplayOptionsForm(
prefix='display-options-form',
results_per_page=request.args.get('results_per_page', 30),
result_context=request.args.get('context', 20)
)
inspect_display_options_form = InspectDisplayOptionsForm(
prefix='inspect-display-options-form'
)
with open(query_result.path, 'r') as query_result_file:
query_result_file_content = json.load(query_result_file)
return render_template('corpora/query_results/inspect.html.j2',
query_result=query_result,
display_options_form=display_options_form,
inspect_display_options_form=inspect_display_options_form, # noqa
query_result_file_content=query_result_file_content,
query_metadata=query_metadata,
title='Inspect query result')
@bp.route('/result/<int:query_result_id>/delete')
@login_required
def delete_query_result(query_result_id):
abort(503)
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
tasks.delete_query_result(query_result_id)
return redirect(url_for('services.service', service="corpus_analysis"))
@bp.route('/result/<int:query_result_id>/download')
@login_required
def download_query_result(query_result_id):
abort(503)
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
return send_from_directory(as_attachment=True,
directory=os.path.dirname(query_result.path),
filename=query_result.filename)

View File

@ -1,16 +1,12 @@
from flask import (abort, current_app, flash, make_response, redirect, request,
from flask import (abort, current_app, flash, make_response, redirect,
render_template, url_for, send_from_directory)
from flask_login import current_user, login_required
from . import bp
from . import tasks
from .forms import (AddCorpusFileForm, AddCorpusForm, AddQueryResultForm,
EditCorpusFileForm, QueryDownloadForm, QueryForm,
DisplayOptionsForm, InspectDisplayOptionsForm,
from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm,
ImportCorpusForm)
from jsonschema import validate
from .. import db
from ..models import Corpus, CorpusFile, QueryResult
import json
from ..models import Corpus, CorpusFile
import os
import shutil
import glob
@ -22,21 +18,22 @@ from .import_corpus import check_zip_contents
@bp.route('/add', methods=['GET', 'POST'])
@login_required
def add_corpus():
form = AddCorpusForm()
form = AddCorpusForm(prefix='add-corpus-form')
if form.validate_on_submit():
corpus = Corpus(creator=current_user,
description=form.description.data,
title=form.title.data)
corpus = Corpus(
creator=current_user,
description=form.description.data,
title=form.title.data
)
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
os.makedirs(corpus.path)
except OSError:
current_app.logger.error(
'Make dir {} led to an OSError!'.format(corpus.path)
)
except OSError as e:
current_app.logger.error(f'Could not add corpus: {e}')
db.session.rollback()
flash('Internal Server Error', 'error')
abort(500)
else:
db.session.commit()
@ -49,22 +46,23 @@ def add_corpus():
@bp.route('/import', methods=['GET', 'POST'])
@login_required
def import_corpus():
abort(503)
form = ImportCorpusForm()
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
corpus = Corpus(creator=current_user,
description=form.description.data,
title=form.title.data)
corpus = Corpus(
creator=current_user,
description=form.description.data,
title=form.title.data
)
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
os.makedirs(corpus.path)
except OSError:
current_app.logger.error(
'Make dir {} led to an OSError!'.format(corpus.path)
)
except OSError as e:
current_app.logger.error(f'Could not import corpus: {e}')
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
@ -128,9 +126,21 @@ def corpus(corpus_id):
corpus_files=corpus_files, title='Corpus')
@bp.route('/<int:corpus_id>/analyse')
@login_required
def analyse_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
return render_template(
'corpora/analyse_corpus.html.j2',
corpus=corpus,
title=f'Analyse Corpus {corpus.title}'
)
@bp.route('/<int:corpus_id>/download')
@login_required
def download_corpus(corpus_id):
abort(503)
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
@ -142,31 +152,6 @@ def download_corpus(corpus_id):
)
@bp.route('/<int:corpus_id>/analyse')
@login_required
def analyse_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
display_options_form = DisplayOptionsForm(
prefix='display-options-form',
result_context=request.args.get('context', 20),
results_per_page=request.args.get('results_per_page', 30)
)
query_form = QueryForm(prefix='query-form',
query=request.args.get('query'))
query_download_form = QueryDownloadForm(prefix='query-download-form')
inspect_display_options_form = InspectDisplayOptionsForm(
prefix='inspect-display-options-form')
return render_template(
'corpora/analyse_corpus.html.j2',
corpus=corpus,
display_options_form=display_options_form,
inspect_display_options_form=inspect_display_options_form,
query_form=query_form,
query_download_form=query_download_form,
title='Corpus analysis'
)
@bp.route('/<int:corpus_id>/delete')
@login_required
def delete_corpus(corpus_id):
@ -190,20 +175,22 @@ def add_corpus_file(corpus_id):
return make_response(form.errors, 400)
# Save the file
form.file.data.save(os.path.join(corpus.path, form.file.data.filename))
corpus_file = CorpusFile(address=form.address.data,
author=form.author.data,
booktitle=form.booktitle.data,
chapter=form.chapter.data,
corpus=corpus,
editor=form.editor.data,
filename=form.file.data.filename,
institution=form.institution.data,
journal=form.journal.data,
pages=form.pages.data,
publisher=form.publisher.data,
publishing_year=form.publishing_year.data,
school=form.school.data,
title=form.title.data)
corpus_file = CorpusFile(
address=form.address.data,
author=form.author.data,
booktitle=form.booktitle.data,
chapter=form.chapter.data,
corpus=corpus,
editor=form.editor.data,
filename=form.file.data.filename,
institution=form.institution.data,
journal=form.journal.data,
pages=form.pages.data,
publisher=form.publisher.data,
publishing_year=form.publishing_year.data,
school=form.school.data,
title=form.title.data
)
db.session.add(corpus_file)
corpus.status = 'unprepared'
db.session.commit()
@ -298,122 +285,3 @@ def prepare_corpus(corpus_id):
else:
flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error') # noqa
return redirect(url_for('.corpus', corpus_id=corpus_id))
# Following are view functions to add, view etc. exported results.
@bp.route('/result/add', methods=['GET', 'POST'])
@login_required
def add_query_result():
'''
View to import a result as a json file.
'''
form = AddQueryResultForm(prefix='add-query-result-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
query_result = QueryResult(creator=current_user,
description=form.description.data,
filename=form.file.data.filename,
title=form.title.data)
db.session.add(query_result)
db.session.flush()
db.session.refresh(query_result)
try:
os.makedirs(os.path.dirname(query_result.path))
except OSError:
current_app.logger.error(
'Make dir {} led to an OSError!'.format(query_result.path)
)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
{'redirect_url': url_for('.add_query_result')}, 500)
# save the uploaded file
form.file.data.save(query_result.path)
# parse json from file
with open(query_result.path, 'r') as file:
query_result_file_content = json.load(file)
# parse json schema
# with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa
# schema = json.load(file)
# try:
# # validate imported json file
# validate(instance=query_result_file_content, schema=schema)
# except Exception:
# tasks.delete_query_result(query_result.id)
# flash('Uploaded file is invalid', 'result')
# return make_response(
# {'redirect_url': url_for('.add_query_result')}, 201)
query_result_file_content.pop('matches')
query_result_file_content.pop('cpos_lookup')
query_result.query_metadata = query_result_file_content
db.session.commit()
flash('Query result added!', 'result')
return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
return render_template('corpora/query_results/add_query_result.html.j2',
form=form, title='Add query result')
@bp.route('/result/<int:query_result_id>')
@login_required
def query_result(query_result_id):
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
return render_template('corpora/query_results/query_result.html.j2',
query_result=query_result, title='Query result')
@bp.route('/result/<int:query_result_id>/inspect')
@login_required
def inspect_query_result(query_result_id):
'''
View to inspect imported result file in a corpus analysis like interface
'''
query_result = QueryResult.query.get_or_404(query_result_id)
query_metadata = query_result.query_metadata
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
display_options_form = DisplayOptionsForm(
prefix='display-options-form',
results_per_page=request.args.get('results_per_page', 30),
result_context=request.args.get('context', 20)
)
inspect_display_options_form = InspectDisplayOptionsForm(
prefix='inspect-display-options-form'
)
with open(query_result.path, 'r') as query_result_file:
query_result_file_content = json.load(query_result_file)
return render_template('corpora/query_results/inspect.html.j2',
query_result=query_result,
display_options_form=display_options_form,
inspect_display_options_form=inspect_display_options_form, # noqa
query_result_file_content=query_result_file_content,
query_metadata=query_metadata,
title='Inspect query result')
@bp.route('/result/<int:query_result_id>/delete')
@login_required
def delete_query_result(query_result_id):
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
tasks.delete_query_result(query_result_id)
return redirect(url_for('services.service', service="corpus_analysis"))
@bp.route('/result/<int:query_result_id>/download')
@login_required
def download_query_result(query_result_id):
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
return send_from_directory(as_attachment=True,
directory=os.path.dirname(query_result.path),
filename=query_result.filename)

View File

@ -1,4 +1,5 @@
from app import db
from flask import current_app
from time import sleep
from .corpus_utils import CheckCorporaMixin
from .job_utils import CheckJobsMixin
@ -8,6 +9,11 @@ import docker
class Daemon(CheckCorporaMixin, CheckJobsMixin):
def __init__(self):
self.docker = docker.from_env()
self.docker.login(
username=current_app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
password=current_app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
registry=current_app.config['NOPAQUE_DOCKER_REGISTRY']
)
def run(self):
while True:

View File

@ -8,21 +8,19 @@ import shutil
class CheckCorporaMixin:
def check_corpora(self):
corpora = Corpus.query.all()
queued_corpora = list(filter(lambda corpus: corpus.status == 'queued', corpora)) # noqa
running_corpora = list(filter(lambda corpus: corpus.status == 'running', corpora)) # noqa
start_analysis_corpora = list(filter(lambda corpus: corpus.status == 'start analysis', corpora)) # noqa
analysing_corpora = list(filter(lambda corpus: corpus.status == 'analysing', corpora)) # noqa
stop_analysis_corpora = list(filter(lambda corpus: corpus.status == 'stop analysis', corpora)) # noqa
submitted_corpora = list(filter(lambda corpus: corpus.status == 'submitted', corpora)) # noqa
for corpus in submitted_corpora:
for corpus in (x for x in corpora if x.status == 'submitted'):
self.create_build_corpus_service(corpus)
for corpus in queued_corpora + running_corpora:
for corpus in (x for x in corpora if x.status == 'queued' or x.status == 'running'): # noqa
self.checkout_build_corpus_service(corpus)
for corpus in start_analysis_corpora:
self.create_cqpserver_container(corpus)
for corpus in analysing_corpora:
for corpus in (x for x in corpora if x.status == 'prepared' and x.num_analysis_sessions > 0): # noqa
corpus.status = 'start analysis'
for corpus in (x for x in corpora if x.status == 'analysing' and x.num_analysis_sessions == 0): # noqa
corpus.status = 'stop analysis'
for corpus in (x for x in corpora if x.status == 'analysing'):
self.checkout_analysing_corpus_container(corpus)
for corpus in stop_analysis_corpora:
for corpus in (x for x in corpora if x.status == 'start analysis'):
self.create_cqpserver_container(corpus)
for corpus in (x for x in corpora if x.status == 'stop analysis'):
self.remove_cqpserver_container(corpus)
def create_build_corpus_service(self, corpus):
@ -32,7 +30,7 @@ class CheckCorporaMixin:
''' ## Constraints ## '''
constraints = ['node.role==worker']
''' ## Image ## '''
image = current_app.config['DOCKER_IMAGE_PREFIX'] + 'cqpserver:latest'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cqpserver:r1674' # noqa
''' ## Labels ## '''
labels = {
'origin': current_app.config['SERVER_NAME'],
@ -43,27 +41,24 @@ class CheckCorporaMixin:
''' ### Corpus file mount ### '''
corpus_file_source = os.path.join(corpus.path, 'merged', 'corpus.vrt')
corpus_file_target = '/root/files/corpus.vrt'
corpus_file_mount = \
corpus_file_source + ':' + corpus_file_target + ':ro'
corpus_file_mount = f'{corpus_file_source}:{corpus_file_target}:ro'
''' ### Corpus data mount ### '''
corpus_data_source = os.path.join(corpus.path, 'data')
corpus_data_target = '/corpora/data'
corpus_data_mount = \
corpus_data_source + ':' + corpus_data_target + ':rw'
corpus_data_mount = f'{corpus_data_source}:{corpus_data_target}:rw'
# Make sure that their is no data in the corpus data directory
shutil.rmtree(corpus_data_source, ignore_errors=True)
os.mkdir(corpus_data_source)
''' ### Corpus registry mount ### '''
corpus_registry_source = os.path.join(corpus.path, 'registry')
corpus_registry_target = '/usr/local/share/cwb/registry'
corpus_registry_mount = \
corpus_registry_source + ':' + corpus_registry_target + ':rw'
corpus_registry_mount = f'{corpus_registry_source}:{corpus_registry_target}:rw' # noqa
# Make sure that their is no data in the corpus registry directory
shutil.rmtree(corpus_registry_source, ignore_errors=True)
os.mkdir(corpus_registry_source)
mounts = [corpus_file_mount, corpus_data_mount, corpus_registry_mount]
''' ## Name ## '''
name = 'build-corpus_{}'.format(corpus.id)
name = f'build-corpus_{corpus.id}'
''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy()
try:
@ -78,57 +73,48 @@ class CheckCorporaMixin:
)
except docker.errors.APIError as e:
current_app.logger.error(
'Create "{}" service raised '.format(name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
f'Create service "{name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
else:
corpus.status = 'queued'
return
corpus.status = 'queued'
def checkout_build_corpus_service(self, corpus):
service_name = 'build-corpus_{}'.format(corpus.id)
service_name = f'build-corpus_{corpus.id}'
try:
service = self.docker.services.get(service_name)
except docker.errors.NotFound:
except docker.errors.NotFound as e:
current_app.logger.error(
'Get "{}" service raised '.format(service_name)
+ '"docker.errors.NotFound" The service does not exist. '
+ '(corpus.status: {} -> failed)'.format(corpus.status)
f'Get service "{service_name}" failed '
+ f'due to "docker.errors.NotFound": {e}'
)
corpus.status = 'failed'
return
except docker.errors.APIError as e:
current_app.logger.error(
'Get "{}" service raised '.format(service_name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
)
except docker.errors.InvalidVersion:
current_app.logger.error(
'Get "{}" service raised '.format(service_name)
+ '"docker.errors.InvalidVersion" One of the arguments is '
+ 'not supported with the current API version.'
f'Get service "{service_name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if corpus.status == 'queued' and task_state != 'pending':
corpus.status = 'running'
return
elif corpus.status == 'running' and task_state == 'complete':
corpus.status = 'prepared'
elif corpus.status == 'running' and task_state == 'failed':
corpus.status = 'failed'
else:
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if corpus.status == 'queued' and task_state != 'pending':
corpus.status = 'running'
elif (corpus.status == 'running'
and task_state in ['complete', 'failed']):
try:
service.remove()
except docker.errors.APIError as e:
current_app.logger.error(
'Remove "{}" service raised '.format(service_name)
+ '"docker.errors.APIError" The server returned an error. ' # noqa
+ 'Details: {}'.format(e)
)
return
else:
corpus.status = \
'prepared' if task_state == 'complete' else 'failed'
return
try:
service.remove()
except docker.errors.APIError as e:
current_app.logger.error(
f'Remove service "{service_name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
def create_cqpserver_container(self, corpus):
''' # Docker container settings # '''
@ -137,22 +123,20 @@ class CheckCorporaMixin:
''' ## Detach ## '''
detach = True
''' ## Image ## '''
image = current_app.config['DOCKER_IMAGE_PREFIX'] + 'cqpserver:latest'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cqpserver:r1674' # noqa
''' ## Name ## '''
name = 'cqpserver_{}'.format(corpus.id)
name = f'cqpserver_{corpus.id}'
''' ## Network ## '''
network = 'nopaque_default'
''' ## Volumes ## '''
''' ### Corpus data volume ### '''
corpus_data_source = os.path.join(corpus.path, 'data')
corpus_data_target = '/corpora/data'
corpus_data_volume = \
corpus_data_source + ':' + corpus_data_target + ':rw'
corpus_data_volume = f'{corpus_data_source}:{corpus_data_target}:rw'
''' ### Corpus registry volume ### '''
corpus_registry_source = os.path.join(corpus.path, 'registry')
corpus_registry_target = '/usr/local/share/cwb/registry'
corpus_registry_volume = \
corpus_registry_source + ':' + corpus_registry_target + ':rw'
corpus_registry_volume = f'{corpus_registry_source}:{corpus_registry_target}:rw' # noqa
volumes = [corpus_data_volume, corpus_registry_volume]
# Check if a cqpserver container already exists. If this is the case,
# remove it and create a new one
@ -162,9 +146,8 @@ class CheckCorporaMixin:
pass
except docker.errors.APIError as e:
current_app.logger.error(
'Get "{}" container raised '.format(name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
f'Get container "{name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
return
else:
@ -172,77 +155,68 @@ class CheckCorporaMixin:
container.remove(force=True)
except docker.errors.APIError as e:
current_app.logger.error(
'Remove "{}" container raised '.format(name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
f'Remove container "{name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
return
try:
self.docker.containers.run(image, command=command, detach=detach,
volumes=volumes, name=name,
network=network)
except docker.errors.ContainerError:
# This case should not occur, because detach is True.
self.docker.containers.run(
image,
command=command,
detach=detach,
volumes=volumes,
name=name,
network=network
)
except docker.errors.ImageNotFound as e:
current_app.logger.error(
'Run "{}" container raised '.format(name)
+ '"docker.errors.ContainerError" The container exits with a '
+ 'non-zero exit code and detach is False.'
)
corpus.status = 'failed'
except docker.errors.ImageNotFound:
current_app.logger.error(
'Run "{}" container raised '.format(name)
+ '"docker.errors.ImageNotFound" The specified image does not '
+ 'exist.'
f'Run container "{name}" failed '
+ f'due to "docker.errors.ImageNotFound" error: {e}'
)
corpus.status = 'failed'
return
except docker.errors.APIError as e:
current_app.logger.error(
'Run "{}" container raised '.format(name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
f'Run container "{name}" failed '
+ f'due to "docker.errors.APIError" error: {e}'
)
else:
corpus.status = 'analysing'
return
corpus.status = 'analysing'
def checkout_analysing_corpus_container(self, corpus):
container_name = 'cqpserver_{}'.format(corpus.id)
container_name = f'cqpserver_{corpus.id}'
try:
self.docker.containers.get(container_name)
except docker.errors.NotFound:
except docker.errors.NotFound as e:
current_app.logger.error(
'Could not find "{}" but the corpus state is "analysing".'
f'Get container "{container_name}" failed '
+ f'due to "docker.errors.NotFound": {e}'
)
corpus.num_analysis_sessions = 0
corpus.status = 'prepared'
except docker.errors.APIError as e:
current_app.logger.error(
'Get "{}" container raised '.format(container_name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
f'Get container "{container_name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
return
def remove_cqpserver_container(self, corpus):
container_name = 'cqpserver_{}'.format(corpus.id)
container_name = f'cqpserver_{corpus.id}'
try:
container = self.docker.containers.get(container_name)
except docker.errors.NotFound:
pass
corpus.status = 'prepared'
return
except docker.errors.APIError as e:
current_app.logger.error(
'Get "{}" container raised '.format(container_name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
f'Get container "{container_name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
return
else:
try:
container.remove(force=True)
except docker.errors.APIError as e:
current_app.logger.error(
'Remove "{}" container raised '.format(container_name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
)
return
corpus.status = 'prepared'
try:
container.remove(force=True)
except docker.errors.APIError as e:
current_app.logger.error(
f'Remove container "{container_name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)

View File

@ -12,15 +12,11 @@ import shutil
class CheckJobsMixin:
def check_jobs(self):
jobs = Job.query.all()
canceling_jobs = list(filter(lambda job: job.status == 'canceling', jobs)) # noqa
queued_jobs = list(filter(lambda job: job.status == 'queued', jobs))
running_jobs = list(filter(lambda job: job.status == 'running', jobs))
submitted_jobs = list(filter(lambda job: job.status == 'submitted', jobs)) # noqa
for job in submitted_jobs:
for job in (x for x in jobs if x.status == 'submitted'):
self.create_job_service(job)
for job in queued_jobs + running_jobs:
for job in (x for x in jobs if x.status in ['queued', 'running']):
self.checkout_job_service(job)
for job in canceling_jobs:
for job in (x for x in jobs if x.status == 'canceling'):
self.remove_job_service(job)
def create_job_service(self, job):
@ -30,26 +26,23 @@ class CheckJobsMixin:
mem_mb = 2048
n_cores = 2
executable = 'file-setup'
image = (current_app.config['DOCKER_IMAGE_PREFIX']
+ 'file-setup:' + job.service_version)
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}file-setup:{job.service_version}' # noqa
elif job.service == 'ocr':
mem_mb = 4096
n_cores = 4
executable = 'ocr'
image = (current_app.config['DOCKER_IMAGE_PREFIX']
+ 'ocr:' + job.service_version)
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}ocr:{job.service_version}' # noqa
elif job.service == 'nlp':
mem_mb = 2048
n_cores = 2
executable = 'nlp'
image = (current_app.config['DOCKER_IMAGE_PREFIX']
+ 'nlp:' + job.service_version)
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}nlp:{job.service_version}' # noqa
''' ## Command ## '''
command = '{} -i /input -o /output'.format(executable)
command = f'{executable} -i /input -o /output'
command += ' --log-dir /input'
command += ' --mem-mb {}'.format(mem_mb)
command += ' --n-cores {}'.format(n_cores)
command += ' --zip [' + job.service + ']_' + secure_filename(job.title)
command += f' --mem-mb {mem_mb}'
command += f' --n-cores {n_cores}'
command += f' --zip [{job.service}]_{secure_filename(job.title)}'
command += ' ' + ' '.join(json.loads(job.service_args))
''' ## Constraints ## '''
constraints = ['node.role==worker']
@ -64,18 +57,18 @@ class CheckJobsMixin:
input_mount_source = job.path
input_mount_target = '/input'
if job.service == 'file-setup':
input_mount_target += '/' + secure_filename(job.title)
input_mount = input_mount_source + ':' + input_mount_target + ':rw'
input_mount_target += f'/{secure_filename(job.title)}'
input_mount = f'{input_mount_source}:{input_mount_target}:rw'
''' ### Output mount ### '''
output_mount_source = os.path.join(job.path, 'output')
output_mount_target = '/output'
output_mount = output_mount_source + ':' + output_mount_target + ':rw'
output_mount = f'{output_mount_source}:{output_mount_target}:rw'
# Make sure that their is no data in the output directory
shutil.rmtree(output_mount_source, ignore_errors=True)
os.makedirs(output_mount_source)
mounts = [input_mount, output_mount]
''' ## Name ## '''
name = 'job_{}'.format(job.id)
name = f'job_{job.id}'
''' ## Resources ## '''
resources = docker.types.Resources(
cpu_reservation=n_cores * (10 ** 9),
@ -96,104 +89,83 @@ class CheckJobsMixin:
)
except docker.errors.APIError as e:
current_app.logger.error(
'Create "{}" service raised '.format(name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
f'Create service "{name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
return
else:
job.status = 'queued'
job.status = 'queued'
def checkout_job_service(self, job):
service_name = 'job_{}'.format(job.id)
service_name = f'job_{job.id}'
try:
service = self.docker.services.get(service_name)
except docker.errors.NotFound:
except docker.errors.NotFound as e:
current_app.logger.error(
'Get "{}" service raised '.format(service_name)
+ '"docker.errors.NotFound" The service does not exist. '
+ '(job.status: {} -> failed)'.format(job.status)
f'Get service "{service_name}" failed '
+ f'due to "docker.errors.NotFound": {e}'
)
job.status = 'failed'
return
except docker.errors.APIError as e:
current_app.logger.error(
'Get "{}" service raised '.format(service_name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
f'Get service "{service_name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
return
except docker.errors.InvalidVersion:
current_app.logger.error(
'Get "{}" service raised '.format(service_name)
+ '"docker.errors.InvalidVersion" One of the arguments is '
+ 'not supported with the current API version.'
)
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if job.status == 'queued' and task_state != 'pending':
job.status = 'running'
return
elif job.status == 'running' and task_state == 'complete':
job.status = 'complete'
results_dir = os.path.join(job.path, 'output')
result_files = [x for x in os.listdir(results_dir) if x.endswith('.zip')] # noqa
for result_file in result_files:
job_result = JobResult(filename=result_file, job=job)
db.session.add(job_result)
db.session.flush()
db.session.refresh(job_result)
elif job.status == 'running' and task_state == 'failed':
job.status = 'failed'
else:
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if job.status == 'queued' and task_state != 'pending':
job.status = 'running'
elif job.status == 'running' and task_state in ['complete', 'failed']: # noqa
try:
service.remove()
except docker.errors.APIError as e:
current_app.logger.error(
'Remove "{}" service raised '.format(service_name)
+ '"docker.errors.APIError" The server returned an error. ' # noqa
+ 'Details: {}'.format(e)
)
return
else:
if task_state == 'complete':
results_dir = os.path.join(job.path, 'output')
result_files = filter(lambda x: x.endswith('.zip'),
os.listdir(results_dir))
for result_file in result_files:
job_result = JobResult(filename=result_file, job=job) # noqa
db.session.add(job_result)
db.session.flush()
db.session.refresh(job_result)
job.end_date = datetime.utcnow()
job.status = task_state
return
job.end_date = datetime.utcnow()
try:
service.remove()
except docker.errors.APIError as e:
current_app.logger.error(
f'Remove service "{service_name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
def remove_job_service(self, job):
service_name = 'job_{}'.format(job.id)
service_name = f'job_{job.id}'
try:
service = self.docker.services.get(service_name)
except docker.errors.NotFound:
job.status = 'canceled'
return
except docker.errors.APIError as e:
current_app.logger.error(
'Get "{}" service raised '.format(service_name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
f'Get service "{service_name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
return
except docker.errors.InvalidVersion:
try:
service.update(mounts=None)
except docker.errors.APIError as e:
current_app.logger.error(
'Get "{}" service raised '.format(service_name)
+ '"docker.errors.InvalidVersion" One of the arguments is '
+ 'not supported with the current API version.'
f'Update service "{service_name}" failed '
+ f'due to "docker.errors.APIError": {e}'
)
return
else:
try:
service.update(mounts=None)
except docker.errors.APIError as e:
current_app.logger.error(
'Update "{}" service raised '.format(service_name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
)
return
try:
service.remove()
except docker.errors.APIError as e:
current_app.logger.error(
'Remove "{}" service raised '.format(service_name)
+ '"docker.errors.APIError" The server returned an error. '
+ 'Details: {}'.format(e)
)
try:
service.remove()
except docker.errors.APIError as e:
current_app.logger.error(
f'Remove "{service_name}" service failed '
+ f'due to "docker.errors.APIError": {e}'
)

View File

@ -1,6 +1,6 @@
from flask import request
from flask_login import current_user
from flask_socketio import join_room, leave_room
from flask_socketio import join_room
from .. import socketio
from ..decorators import socketio_login_required
from ..models import User
@ -25,7 +25,7 @@ def socketio_connect():
' On connect the sid is saved in the sessions list.
'''
sessions.append(request.sid)
return {'code': 200, 'msg': 'OK'}
# return {'code': 200, 'msg': 'OK'}
@socketio.on('disconnect')
@ -37,7 +37,7 @@ def socketio_disconnect():
sessions.remove(request.sid)
except ValueError:
pass
return {'code': 200, 'msg': 'OK'}
# return {'code': 200, 'msg': 'OK'}
@socketio.on('start_user_session')

View File

@ -567,16 +567,18 @@ class Corpus(db.Model):
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
current_nr_of_tokens = db.Column(db.Integer, default=0)
description = db.Column(db.String(255))
last_edited_date = db.Column(db.DateTime(), default=datetime.utcnow)
max_nr_of_tokens = 2147483647
status = db.Column(db.String(16), default='unprepared')
title = db.Column(db.String(32))
num_analysis_sessions = db.Column(db.Integer, default=0)
num_tokens = db.Column(db.Integer, default=0)
archive_file = db.Column(db.String(255))
# Relationships
files = db.relationship('CorpusFile', backref='corpus', lazy='dynamic',
cascade='save-update, merge, delete')
# Python class variables
max_num_tokens = 2147483647
@property
def analysis_url(self):
@ -601,12 +603,13 @@ class Corpus(db.Model):
'id': self.id,
'user_id': self.user_id,
'creation_date': self.creation_date.isoformat() + 'Z',
'current_nr_of_tokens': self.current_nr_of_tokens,
'description': self.description,
'max_num_tokens': self.max_num_tokens,
'num_analysis_sessions': self.num_analysis_sessions,
'num_tokens': self.num_tokens,
'status': self.status,
'last_edited_date': self.last_edited_date.isoformat() + 'Z',
'max_nr_of_tokens': self.max_nr_of_tokens,
'title': self.title,
'title': self.title
}
if include_relationships:
dict_corpus['files'] = {file.id: file.to_dict()
@ -617,30 +620,25 @@ class Corpus(db.Model):
output_dir = os.path.join(self.path, 'merged')
shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)
master_element_tree = ET.ElementTree(
ET.fromstring('<corpus>\n</corpus>')
)
output_file = os.path.join(output_dir, 'corpus.vrt')
corpus_element = ET.fromstring('<corpus>\n</corpus>')
for corpus_file in self.files:
element_tree = ET.parse(corpus_file.path)
text_node = element_tree.find('text')
text_node.set('address', corpus_file.address or "NULL")
text_node.set('address', corpus_file.address or 'NULL')
text_node.set('author', corpus_file.author)
text_node.set('booktitle', corpus_file.booktitle or "NULL")
text_node.set('chapter', corpus_file.chapter or "NULL")
text_node.set('editor', corpus_file.editor or "NULL")
text_node.set('institution', corpus_file.institution or "NULL")
text_node.set('journal', corpus_file.journal or "NULL")
text_node.set('pages', corpus_file.pages or "NULL")
text_node.set('publisher', corpus_file.publisher or "NULL")
text_node.set('booktitle', corpus_file.booktitle or 'NULL')
text_node.set('chapter', corpus_file.chapter or 'NULL')
text_node.set('editor', corpus_file.editor or 'NULL')
text_node.set('institution', corpus_file.institution or 'NULL')
text_node.set('journal', corpus_file.journal or 'NULL')
text_node.set('pages', corpus_file.pages or 'NULL')
text_node.set('publisher', corpus_file.publisher or 'NULL')
text_node.set('publishing_year', str(corpus_file.publishing_year))
text_node.set('school', corpus_file.school or "NULL")
text_node.set('school', corpus_file.school or 'NULL')
text_node.set('title', corpus_file.title)
element_tree.write(corpus_file.path)
master_element_tree.getroot().insert(1, text_node)
output_file = os.path.join(output_dir, 'corpus.vrt')
master_element_tree.write(output_file,
xml_declaration=True,
encoding='utf-8')
corpus_element.insert(1, text_node)
ET.ElementTree(corpus_element).write(output_file, encoding='utf-8')
self.last_edited_date = datetime.utcnow()
self.status = 'submitted'

View File

@ -112,3 +112,6 @@ h1 .nopaque-icons, h2 .nopaque-icons, h3 .nopaque-icons, h4 .nopaque-icons,
.nopaque-icons.service-icon[data-service="ocr"]:empty:before {content: "F";}
.status-text[data-status]:empty:before {content: attr(data-status);}
.hoverable {cursor: pointer;}
.s-attr.chip .p-attr.chip {background-color: inherit;}

View File

@ -0,0 +1,439 @@
class CQiClient {
constructor(corpusId) {
this.socket = io(
'/corpora/corpus/corpus_analysis',
{auth: {corpus_id: corpusId}, transports: ['websocket'], upgrade: false}
);
this.connected = false;
this.corpora = new CQiCorpusCollection(this.socket);
}
connect() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.connect', response => {
if (response.code === 200) {
this.connected = true;
resolve(response.payload);
} else {
reject(response);
}
});
});
}
disconnect() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.disconnect', response => {
if (response.code === 200) {
this.connected = false;
resolve(response.payload);
} else {
reject(response);
}
});
});
}
ping() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.ping', response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
}
class CQiCorpusCollection {
constructor(socket) {
this.socket = socket;
}
get(corpusName) {
return new Promise((resolve, reject) => {
let args = {corpus_name: corpusName};
this.socket.emit('cqi.corpora.get', args, response => {
if (response.code === 200) {
resolve(new CQiCorpus(this.socket, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.corpora.list', response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiCorpus {
constructor(socket, attrs) {
this.socket = socket;
this.charset = attrs.charset;
this.name = attrs.name;
this.properties = attrs.properties;
this.size = attrs.size;
this.alignmentAttributes = new CQiAlignmentAttributeCollection(this.socket, this);
this.positionalAttributes = new CQiPositionalAttributeCollection(this.socket, this);
this.structuralAttributes = new CQiStructuralAttributeCollection(this.socket, this);
this.subcorpora = new CQiSubcorpusCollection(this.socket, this);
}
drop() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.drop', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
query(subcorpus_name, queryString) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.name,
subcorpus_name: subcorpus_name,
query: queryString
};
this.socket.emit('cqi.corpora.corpus.query', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
// nopaque specific CQi extension
paginate(page=1, perPage=20) {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.name, page: page, per_page: perPage};
this.socket.emit('cqi.corpora.corpus.paginate', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
updateDb() {
let args = {corpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.update_db', args);
}
}
class CQiAlignmentAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(alignmentAttributeName) {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name,
alignment_attribute_name: alignmentAttributeName};
this.socket.emit('cqi.corpora.corpus.alignment_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiAlignmentAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpus.alignment_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiAlignmentAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiAlignmentAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiPositionalAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(positionalAttributeName) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
positional_attribute_name: positionalAttributeName
};
this.socket.emit('cqi.corpora.corpus.positional_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiPositionalAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpus.positional_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiPositionalAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiPositionalAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.lexiconSize = attrs.lexicon_size;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiStructuralAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(structuralAttributeName) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
structural_attribute_name: structuralAttributeName
};
this.socket.emit('cqi.corpora.corpus.structural_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiStructuralAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpus.structural_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiStructuralAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiStructuralAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.hasValues = attrs.has_values;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiSubcorpusCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(subcorpusName) {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name, subcorpus_name: subcorpusName};
this.socket.emit('cqi.corpora.corpus.subcorpora.get', args, response => {
if (response.code === 200) {
resolve(new CQiSubcorpus(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpora.corpus.subcorpora.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiSubcorpus {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.fields = attrs.fields;
this.name = attrs.name;
this.size = attrs.size;
}
drop() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name, subcorpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.drop', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
dump(field, first, last) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
field: field,
first: first,
last: last
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.dump', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
export(context=50) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
context: context
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.export', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
fdst_1(cutoff, field, attribute) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
cutoff: cutoff,
field: field,
attribute: attribute
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
fdst_2(cutoff, field1, attribute1, field2, attribute2) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
cutoff: cutoff,
field1: field1,
attribute1: attribute1,
field2: field2,
attribute2: attribute2
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
// nopaque specific CQi extension
paginate(page=1, perPage=20, context=50) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
page: page,
per_page: perPage,
context: context
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.paginate', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
}

View File

@ -0,0 +1,118 @@
class CorpusAnalysisApp {
static entitiyColors = {
PERSON: '#a6e22d',
PER: '#a6e22d',
NORP: '#ef60b4',
FACILITY: '#43c6fc',
ORG: '#43c6fc',
GPE: '#fd9720',
LOC: '#fd9720',
PRODUCT: '#a99dfb',
MISC: '#a99dfb',
EVENT: ':#fc0',
WORK_OF_ART: '#fc0',
LANGUAGE: '#fc0',
DATE: '#2fbbab',
TIME: '#2fbbab',
PERCENT: '#bbb',
MONEY: '#bbb',
QUANTITY: '#bbb',
ORDINAL: '#bbb',
CARDINAL: '#bbb'
};
constructor(corpusId) {
this.data = {};
// HTML elements
this.elements = {
container: document.querySelector('#corpus-analysis-app-container'),
extensionTabs: document.querySelector('#corpus-analysis-app-extension-tabs'),
initModal: document.querySelector('#corpus-analysis-app-init-modal'),
initError: document.querySelector('#corpus-analysis-app-init-error'),
initProgress: document.querySelector('#corpus-analysis-app-init-progress'),
overview: document.querySelector('#corpus-analysis-app-overview')
};
// Materialize elements
this.elements.m = {
extensionTabs: M.Tabs.init(this.elements.extensionTabs),
initModal: M.Modal.init(this.elements.initModal, {dismissible: false})
};
this.extensions = {};
this.settings = {
corpusId: corpusId
};
}
init() {
this.disableActionElements();
this.elements.m.initModal.open();
// Init data
this.data.cQiClient = new CQiClient(this.settings.corpusId);
this.data.cQiClient.connect()
.then(cQiStatus => {
return this.data.cQiClient.corpora.get('CORPUS');
})
.then(
cQiCorpus => {
this.data.corpus = {o: cQiCorpus};
// TODO: Don't do this here
cQiCorpus.updateDb();
this.enableActionElements();
for (let extension of Object.values(this.extensions)) {extension.init();}
this.elements.m.initModal.close();
},
cQiError => {
this.elements.initError.innerText = JSON.stringify(cQiError);
this.elements.initError.classList.remove('hide');
this.elements.initProgress.classList.add('hide');
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
}
}
);
// Add event listeners
for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) {
extensionSelectorElement.addEventListener('click', () => {
this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target);
});
}
}
registerExtension(extension) {
if (extension.name in this.extensions) {
console.error(`Can't register extension ${extension.name}: Already registered`);
return;
}
this.extensions[extension.name] = extension;
if ('cQiClient' in this.data && this.data.cQiClient.connected) {extension.init();}
}
disableActionElements() {
let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
for (let actionElement of actionElements) {
if (actionElement.nodeName === 'INPUT') {
actionElement.disabled = true;
} else if (actionElement.nodeName === 'SELECT') {
actionElement.parentNode.querySelector('input.select-dropdown').disabled = true;
} else {
actionElement.classList.add('disabled');
}
}
}
enableActionElements() {
let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
for (let actionElement of actionElements) {
if (actionElement.nodeName === 'INPUT') {
actionElement.disabled = false;
} else if (actionElement.nodeName === 'SELECT') {
actionElement.parentNode.querySelector('input.select-dropdown').disabled = false;
} else {
actionElement.classList.remove('disabled');
}
}
}
}

View File

@ -0,0 +1,432 @@
class CorpusAnalysisConcordance {
name = 'Concordance';
constructor(app) {
this.app = app;
this.data = {};
this.elements = {
// TODO: Prefix elements with "corpus-analysis-app-"
container: document.querySelector('#concordance-extension-container'),
error: document.querySelector('#concordance-extension-error'),
form: document.querySelector('#concordance-extension-form'),
progress: document.querySelector('#concordance-extension-progress'),
subcorpusInfo: document.querySelector('#concordance-extension-subcorpus-info'),
subcorpusActions: document.querySelector('#concordance-extension-subcorpus-actions'),
subcorpusItems: document.querySelector('#concordance-extension-subcorpus-items'),
subcorpusList: document.querySelector('#concordance-extension-subcorpus-list'),
subcorpusPagination: document.querySelector('#concordance-extension-subcorpus-pagination')
};
this.settings = {
context: parseInt(this.elements.form['context'].value),
perPage: parseInt(this.elements.form['per-page'].value),
selectedSubcorpus: undefined,
textStyle: parseInt(this.elements.form['text-style'].value),
tokenRepresentation: this.elements.form['token-representation'].value
};
this.app.registerExtension(this);
}
init() {
// Init data
this.data.corpus = this.app.data.corpus;
this.data.subcorpora = {};
// Add event listeners
this.elements.form.addEventListener('submit', event => {
event.preventDefault();
this.app.disableActionElements();
let query = this.elements.form.query.value.trim();
let subcorpusName = this.elements.form['subcorpus-name'].value;
this.elements.error.innerText = '';
this.elements.error.classList.add('hide');
this.elements.progress.classList.remove('hide');
let subcorpus = {};
this.data.corpus.o.query(subcorpusName, query)
.then(cQiStatus => {
subcorpus.q = query;
return this.data.corpus.o.subcorpora.get(subcorpusName);
})
.then(cQiSubcorpus => {
subcorpus.o = cQiSubcorpus;
return cQiSubcorpus.paginate(1, this.settings.perPage, this.settings.context);
})
.then(
paginatedSubcorpus => {
subcorpus.p = paginatedSubcorpus;
if (subcorpus !== 'Last') {this.data.subcorpora.Last = subcorpus;}
this.data.subcorpora[subcorpusName] = subcorpus;
this.settings.selectedSubcorpus = subcorpusName;
this.renderSubcorpusList();
this.renderSubcorpusInfo();
this.renderSubcorpusActions();
this.renderSubcorpusItems();
this.renderSubcorpusPagination();
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
},
cQiError => {
this.elements.error.innerText = JSON.stringify(cQiError);
this.elements.error.classList.remove('hide');
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
}
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
}
);
});
this.elements.form.addEventListener('change', event => {
if (event.target === this.elements.form['context']) {
this.settings.context = parseInt(this.elements.form['context'].value);
this.elements.form.submit.click();
}
if (event.target === this.elements.form['per-page']) {
this.settings.perPage = parseInt(this.elements.form['per-page'].value);
this.elements.form.submit.click();
}
if (event.target === this.elements.form['text-style']) {
this.settings.textStyle = parseInt(this.elements.form['text-style'].value);
this.setTextStyle();
}
if (event.target === this.elements.form['token-representation']) {
this.settings.tokenRepresentation = this.elements.form['token-representation'].value;
this.setTokenRepresentation();
}
});
}
clearSubcorpusList() {
this.elements.subcorpusList.innerHTML = '';
this.elements.subcorpusList.classList.add('hide');
}
renderSubcorpusList() {
this.clearSubcorpusList();
for (let subcorpusName in this.data.subcorpora) {
this.elements.subcorpusList.innerHTML += `
<a class="btn waves-effect waves-light subcorpus-selector" data-target="${subcorpusName}"><i class="material-icons left">bookmark</i>${subcorpusName}</a>
`.trim();
}
for (let subcorpusSelectorElement of this.elements.subcorpusList.querySelectorAll('.subcorpus-selector')) {
let subcorpusName = subcorpusSelectorElement.dataset.target;
if (subcorpusName === this.settings.selectedSubcorpus) {
subcorpusSelectorElement.classList.add('disabled');
continue;
}
subcorpusSelectorElement.addEventListener('click', () => {
this.settings.selectedSubcorpus = subcorpusName;
this.elements.progress.classList.remove('hide');
this.renderSubcorpusList();
this.renderSubcorpusInfo();
this.renderSubcorpusActions();
this.renderSubcorpusActions();
this.renderSubcorpusItems();
this.renderSubcorpusPagination();
this.elements.progress.classList.add('hide');
});
}
this.elements.subcorpusList.classList.remove('hide');
}
clearSubcorpusInfo() {
this.elements.subcorpusInfo.innerHTML = '';
this.elements.subcorpusInfo.classList.add('hide');
}
renderSubcorpusInfo() {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
this.clearSubcorpusInfo();
this.elements.subcorpusInfo.innerHTML = `${subcorpus.p.total} matches found for <code>${subcorpus.q.replace(/</g, "&lt;").replace(/>/g, "&gt;")}</code>`;
this.elements.subcorpusInfo.classList.remove('hide');
}
clearSubcorpusActions() {
for (let tooltippedElement of this.elements.subcorpusActions.querySelectorAll('.tooltipped')) {
M.Tooltip.getInstance(tooltippedElement).destroy();
}
this.elements.subcorpusActions.innerHTML = '';
}
renderSubcorpusActions() {
this.clearSubcorpusActions();
this.elements.subcorpusActions.innerHTML += `
<a class="btn-floating btn-small tooltipped waves-effect waves-light corpus-analysis-action download-subcorpus-trigger" data-tooltip="Download subcorpus">
<i class="material-icons">file_download</i>
</a>
<a class="btn-floating btn-small red tooltipped waves-effect waves-light corpus-analysis-action delete-subcorpus-trigger" data-tooltip="Delete subcorpus">
<i class="material-icons">delete</i>
</a>
`.trim();
M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped'));
this.elements.subcorpusActions.querySelector('.delete-subcorpus-trigger').addEventListener('click', event => {
event.preventDefault();
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
subcorpus.o.drop().then(
cQiStatus => {
nopaque.appClient.flash(`${subcorpus.o.name} deleted`, 'corpus');
delete this.data.subcorpora[subcorpus.o.name];
this.settings.selectedSubcorpus = undefined;
for (let subcorpusName in this.data.subcorpora) {
this.settings.selectedSubcorpus = subcorpusName;
break;
}
this.renderSubcorpusList();
if (this.settings.selectedSubcorpus) {
this.renderSubcorpusInfo();
this.renderSubcorpusActions();
this.renderSubcorpusItems();
this.renderSubcorpusPagination();
} else {
this.clearSubcorpusInfo();
this.clearSubcorpusActions();
this.clearSubcorpusItems();
this.clearSubcorpusPagination();
}
},
cQiError => {
nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
}
);
});
}
clearSubcorpusItems() {
// Destroy with .p-attr elements associated Materialize tooltips
for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr.tooltipped')) {
M.Tooltip.getInstance(pAttrElement)?.destroy();
}
this.elements.subcorpusItems.innerHTML = `
<tr class="show-if-only-child">
<td colspan="100%">
<p>
<span class="card-title"><i class="left material-icons" style="font-size: inherit;">search</i>Nothing here...</span><br>
No matches available.
</p>
</td>
</tr>
`.trim();
}
renderSubcorpusItems() {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
this.clearSubcorpusItems();
for (let item of subcorpus.p.items) {
this.elements.subcorpusItems.innerHTML += `
<tr class="item" data-id="${item.num}">
<td class="num">${item.num}</td>
<td class="text-title">${this.foo(...item.c)}</td>
<td class="left-context">${item.lc ? this.cposRange2HTML(...item.lc) : ''}</td>
<td class="kwic">${this.cposRange2HTML(...item.c)}</td>
<td class="right-context">${item.rc ? this.cposRange2HTML(...item.rc) : ''}</td>
<td class="actions right-align">
<a class="btn-floating btn-small waves-effect waves-light corpus-analysis-action goto-reader-trigger"><i class="material-icons prefix">search</i></a>
<a class="btn-floating btn-small waves-effect waves-light corpus-analysis-action export-trigger"><i class="material-icons prefix">add</i></a>
</td>
</tr>
`.trim();
}
this.setTextStyle();
this.setTokenRepresentation();
for (let gotoReaderTriggerElement of this.elements.subcorpusItems.querySelectorAll('.goto-reader-trigger')) {
gotoReaderTriggerElement.addEventListener('click', event => {
event.preventDefault();
let corpusAnalysisReader = this.app.extensions.Reader;
let itemId = parseInt(gotoReaderTriggerElement.closest('.item').dataset.id);
let item = undefined;
for (let x of subcorpus.p.items) {if (x.num === itemId) {item = x;}}
let page = Math.max(1, Math.ceil(item.c[0] / corpusAnalysisReader.settings.perPage));
corpusAnalysisReader.page(page, () => {
let range = new Range();
let leftCpos = corpusAnalysisReader.data.corpus.p.items[0].includes(item.c[0]) ? item.c[0] : corpusAnalysisReader.data.corpus.p.items[0][0];
let rightCpos = corpusAnalysisReader.data.corpus.p.items[0].includes(item.c[1]) ? item.c[1] : corpusAnalysisReader.data.corpus.p.items[0].at(-1);
let leftElement = corpusAnalysisReader.elements.corpus.querySelector(`.p-attr[data-cpos="${leftCpos}"]`);
let rightElement = corpusAnalysisReader.elements.corpus.querySelector(`.p-attr[data-cpos="${rightCpos}"]`);
range.setStartBefore(leftElement);
range.setEndAfter(rightElement);
document.getSelection().removeAllRanges();
document.getSelection().addRange(range);
});
this.app.elements.m.extensionTabs.select('reader-extension-container');
});
}
}
clearSubcorpusPagination() {
this.elements.subcorpusPagination.innerHTML = '';
this.elements.subcorpusPagination.classList.add('hide');
}
renderSubcorpusPagination() {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
this.clearSubcorpusPagination();
if (subcorpus.p.pages === 0) {return;}
this.elements.subcorpusPagination.innerHTML += `
<li class="${subcorpus.p.page === 1 ? 'disabled' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${subcorpus.p.page === 1 ? '' : 'data-target="1"'}>
<i class="material-icons">first_page</i>
</a>
</li>
`.trim();
this.elements.subcorpusPagination.innerHTML += `
<li class="${subcorpus.p.has_prev ? 'waves-effect' : 'disabled'}">
<a class="corpus-analysis-action pagination-trigger" ${subcorpus.p.has_prev ? 'data-target="' + subcorpus.p.prev_num + '"' : ''}>
<i class="material-icons">chevron_left</i>
</a>
</li>
`.trim();
for (let i = 1; i <= subcorpus.p.pages; i++) {
this.elements.subcorpusPagination.innerHTML += `
<li class="${i === subcorpus.p.page ? 'active' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${i === subcorpus.p.page ? '' : 'data-target="' + i + '"'}>${i}</a>
</li>
`.trim();
}
this.elements.subcorpusPagination.innerHTML += `
<li class="${subcorpus.p.has_next ? 'waves-effect' : 'disabled'}">
<a class="corpus-analysis-action pagination-trigger" ${subcorpus.p.has_next ? 'data-target="' + subcorpus.p.next_num + '"' : ''}>
<i class="material-icons">chevron_right</i>
</a>
</li>
`.trim();
this.elements.subcorpusPagination.innerHTML += `
<li class="${subcorpus.p.page === subcorpus.p.pages ? 'disabled' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${subcorpus.p.page === subcorpus.p.pages ? '' : 'data-target="' + subcorpus.p.pages + '"'}>
<i class="material-icons">last_page</i>
</a>
</li>
`.trim();
for (let paginationTriggerElement of this.elements.subcorpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
paginationTriggerElement.addEventListener('click', event => {
event.preventDefault();
this.app.disableActionElements();
this.elements.progress.classList.remove('hide');
let page = parseInt(paginationTriggerElement.dataset.target);
subcorpus.o.paginate(page, this.settings.perPage, this.settings.context)
.then(
paginatedSubcorpus => {
subcorpus.p = paginatedSubcorpus;
this.renderSubcorpusItems();
this.renderSubcorpusPagination();
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
}
)
});
}
this.elements.subcorpusPagination.classList.remove('hide');
}
foo(firstCpos, lastCpos) {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
/* Returns a list of texts occuring in this cpos range */
let textIds = new Set();
for (let cpos = firstCpos; cpos <= lastCpos; cpos++) {
textIds.add(subcorpus.p.lookups.cpos_lookup[cpos].text);
}
return [...textIds].map(x => subcorpus.p.lookups.text_lookup[x].title).join(', ');
}
cposRange2HTML(firstCpos, lastCpos) {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
let prevPAttr, pAttr, nextPAttr;
let isEntityStart, isEntityEnd;
let html = '';
for (let cpos = firstCpos; cpos <= lastCpos; cpos++) {
prevPAttr = cpos > firstCpos ? subcorpus.p.lookups.cpos_lookup[cpos - 1] : null;
pAttr = subcorpus.p.lookups.cpos_lookup[cpos];
nextPAttr = cpos < lastCpos ? subcorpus.p.lookups.cpos_lookup[cpos + 1] : null;
isEntityStart = 'ent' in pAttr && pAttr.ent !== prevPAttr?.ent;
isEntityEnd = 'ent' in pAttr && pAttr.ent !== nextPAttr?.ent;
// Add a space before pAttr
if (cpos !== firstCpos || pAttr.simple_pos !== 'PUNCT') {html += ' ';}
// Add entity start
if (isEntityStart) {
html += `<span class="s-attr" data-cpos="${cpos}" data-id="${pAttr.ent}" data-type="ent">`;
}
// Add pAttr
html += `<span class="p-attr" data-cpos="${cpos}"></span>`;
// Add entity end
if (isEntityEnd) {
html += ` <span class="badge black-text hide new s-attr white" cpos="${cpos}" data-type="ent_type" data-badge-caption="">${subcorpus.p.lookups.ent_lookup[pAttr.ent].type}</span>`;
html += '</span>';
}
}
return html;
}
setTextStyle() {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
if (this.settings.textStyle >= 0) {
// Destroy with .p-attr elements associated Materialize tooltips
for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr.tooltipped')) {
M.Tooltip.getInstance(pAttrElement)?.destroy();
}
// Set basic styling on .p-attr elements
for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) {
pAttrElement.setAttribute('class', 'p-attr');
}
// Set basic styling on .s-attr[data-type="ent"] elements
for (let entElement of this.elements.subcorpusItems.querySelectorAll('.s-attr[data-type="ent"]')) {
entElement.querySelector('.s-attr[data-type="ent_type"]').classList.add('hide');
entElement.removeAttribute('style');
entElement.setAttribute('class', 's-attr');
}
}
if (this.settings.textStyle >= 1) {
// Set advanced styling on .s-attr[data-type="ent"] elements
for (let entElement of this.elements.subcorpusItems.querySelectorAll('.s-attr[data-type="ent"]')) {
let ent = subcorpus.p.lookups.ent_lookup[entElement.dataset.id];
entElement.classList.add('chip');
entElement.style.backgroundColor = CorpusAnalysisApp.entitiyColors[ent.type];
entElement.querySelector('.s-attr[data-type="ent_type"]').classList.remove('hide');
}
}
if (this.settings.textStyle >= 2) {
// Set advanced styling on .p-attr elements
for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) {
pAttrElement.classList.add('chip', 'hoverable', 'tooltipped');
let cpos = pAttrElement.dataset.cpos;
let pAttr = subcorpus.p.lookups.cpos_lookup[cpos];
let positionalPropertiesHTML = `
<p class="left-align">
<b>Positional properties</b><br>
<span>Token: ${cpos}</span>
`.trim();
let structuralPropertiesHTML = `
<p class="left-align">
<b>Structural properties</b>
`.trim();
for (let [property, propertyValue] of Object.entries(pAttr)) {
if (['lemma', 'ner', 'pos', 'simple_pos', 'word'].includes(property)) {
if (propertyValue === 'None') {continue;}
positionalPropertiesHTML += `<br><i class="material-icons" style="font-size: inherit;">subdirectory_arrow_right</i>${property}: ${propertyValue}`;
} else {
structuralPropertiesHTML += `<br><span>${property}: ${propertyValue}</span>`;
if (!(`${property}_lookup` in subcorpus.p.lookups)) {continue;}
for (let [subproperty, subpropertyValue] of Object.entries(subcorpus.p.lookups[`${property}_lookup`][propertyValue])) {
if (subpropertyValue === 'NULL') {continue;}
structuralPropertiesHTML += `<br><i class="material-icons" style="font-size: inherit;">subdirectory_arrow_right</i>${subproperty}: ${subpropertyValue}`
}
}
}
positionalPropertiesHTML += '</p>';
structuralPropertiesHTML += '</p>';
M.Tooltip.init(
pAttrElement,
{html: positionalPropertiesHTML + structuralPropertiesHTML}
);
}
}
}
setTokenRepresentation() {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) {
let pAttr = subcorpus.p.lookups.cpos_lookup[pAttrElement.dataset.cpos];
pAttrElement.innerText = pAttr[this.settings.tokenRepresentation];
}
}
}

View File

@ -0,0 +1,270 @@
class CorpusAnalysisReader {
name = 'Reader';
constructor(app) {
this.app = app;
this.data = {};
this.elements = {
// TODO: Prefix elements with "corpus-analysis-app-"
container: document.querySelector('#reader-extension-container'),
error: document.querySelector('#reader-extension-error'),
form: document.querySelector('#reader-extension-form'),
progress: document.querySelector('#reader-extension-progress'),
corpus: document.querySelector('#reader-extension-corpus'),
corpusPagination: document.querySelector('#reader-extension-corpus-pagination')
};
this.settings = {
perPage: parseInt(this.elements.form['per-page'].value),
textStyle: parseInt(this.elements.form['text-style'].value),
tokenRepresentation: this.elements.form['token-representation'].value
}
this.app.registerExtension(this);
}
init() {
// Init data
this.data.corpus = this.app.data.corpus;
this.data.subcorpora = {};
// Add event listeners
this.elements.form.addEventListener('submit', (event) => {
event.preventDefault();
this.app.disableActionElements();
this.elements.error.innerText = '';
this.elements.error.classList.add('hide');
this.elements.progress.classList.remove('hide');
this.data.corpus.o.paginate(1, this.settings.perPage)
.then(
paginatedCorpus => {
this.data.corpus.p = paginatedCorpus;
this.renderCorpus();
this.renderCorpusPagination();
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
},
error => {
this.elements.error.innerText = JSON.stringify(error);
this.elements.error.classList.remove('hide');
if ('payload' in error && 'code' in error.payload && 'msg' in error.payload) {
nopaque.appClient.flash(`${error.payload.code}: ${error.payload.msg}`, 'error');
}
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
}
);
});
this.elements.form.addEventListener('change', event => {
if (event.target === this.elements.form['per-page']) {
this.settings.perPage = parseInt(this.elements.form['per-page'].value);
this.elements.form.submit.click();
}
if (event.target === this.elements.form['text-style']) {
this.settings.textStyle = parseInt(this.elements.form['text-style'].value);
this.setTextStyle();
}
if (event.target === this.elements.form['token-representation']) {
this.settings.tokenRepresentation = this.elements.form['token-representation'].value;
this.setTokenRepresentation();
}
});
// Load initial data
this.elements.form.submit.click();
}
clearCorpus() {
// Destroy with .p-attr elements associated Materialize tooltips
for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr.tooltipped')) {
M.Tooltip.getInstance(pAttrElement)?.destroy();
}
this.elements.corpus.innerHTML = `
<p class="show-if-only-child">
<span class="card-title"><i class="left material-icons" style="font-size: inherit;">search</i>Nothing here...</span><br>
No text available.
</p>
`.trim();
}
renderCorpus() {
this.clearCorpus();
let item = this.data.corpus.p.items[0];
this.elements.corpus.innerHTML += `
<p>${this.cposRange2HTML(item[0], item[item.length - 1])}</p>
`.trim();
this.setTextStyle();
this.setTokenRepresentation();
}
clearCorpusPagination() {
this.elements.corpusPagination.innerHTML = '';
this.elements.corpusPagination.classList.add('hide');
}
renderCorpusPagination() {
this.clearCorpusPagination();
if (this.data.corpus.p.pages === 0) {return;}
this.elements.corpusPagination.innerHTML += `
<li class="${this.data.corpus.p.page === 1 ? 'disabled' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.page === 1 ? '' : 'data-target="1"'}>
<i class="material-icons">first_page</i>
</a>
</li>
`.trim();
this.elements.corpusPagination.innerHTML += `
<li class="${this.data.corpus.p.has_prev ? 'waves-effect' : 'disabled'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.has_prev ? 'data-target="' + this.data.corpus.p.prev_num + '"' : ''}>
<i class="material-icons">chevron_left</i>
</a>
</li>
`.trim();
for (let i = 1; i <= this.data.corpus.p.pages; i++) {
this.elements.corpusPagination.innerHTML += `
<li class="${i === this.data.corpus.p.page ? 'active' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${i === this.data.corpus.p.page ? '' : 'data-target="' + i + '"'}>${i}</a>
</li>
`.trim();
}
this.elements.corpusPagination.innerHTML += `
<li class="${this.data.corpus.p.has_next ? 'waves-effect' : 'disabled'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.has_next ? 'data-target="' + this.data.corpus.p.next_num + '"' : ''}>
<i class="material-icons">chevron_right</i>
</a>
</li>
`.trim();
this.elements.corpusPagination.innerHTML += `
<li class="${this.data.corpus.p.page === this.data.corpus.p.pages ? 'disabled' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.page === this.data.corpus.p.pages ? '' : 'data-target="' + this.data.corpus.p.pages + '"'}>
<i class="material-icons">last_page</i>
</a>
</li>
`.trim();
for (let paginateTriggerElement of this.elements.corpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
paginateTriggerElement.addEventListener('click', event => {
event.preventDefault();
let page = parseInt(paginateTriggerElement.dataset.target);
this.page(page);
});
}
this.elements.corpusPagination.classList.remove('hide');
}
cposRange2HTML(firstCpos, lastCpos) {
let prevPAttr, pAttr, nextPAttr;
let isEntityStart, isEntityEnd;
let html = '';
for (let cpos = firstCpos; cpos <= lastCpos; cpos++) {
prevPAttr = cpos > firstCpos ? this.data.corpus.p.lookups.cpos_lookup[cpos - 1] : null;
pAttr = this.data.corpus.p.lookups.cpos_lookup[cpos];
nextPAttr = cpos < lastCpos ? this.data.corpus.p.lookups.cpos_lookup[cpos + 1] : null;
isEntityStart = 'ent' in pAttr && pAttr.ent !== prevPAttr?.ent;
isEntityEnd = 'ent' in pAttr && pAttr.ent !== nextPAttr?.ent;
// Add a space before pAttr
if (cpos !== firstCpos || pAttr.simple_pos !== 'PUNCT') {html += ' ';}
// Add entity start
if (isEntityStart) {
html += `<span class="s-attr" data-cpos="${cpos}" data-id="${pAttr.ent}" data-type="ent">`;
}
// Add pAttr
html += `<span class="p-attr" data-cpos="${cpos}"></span>`;
// Add entity end
if (isEntityEnd) {
html += ` <span class="badge black-text hide new s-attr white" cpos="${cpos}" data-type="ent_type" data-badge-caption="">${this.data.corpus.p.lookups.ent_lookup[pAttr.ent].type}</span>`;
html += '</span>';
}
}
return html;
}
page(pageNum, callback) {
if (this.data.corpus.p.page === pageNum && typeof callback === 'function') {
callback();
return;
}
this.app.disableActionElements();
this.elements.progress.classList.remove('hide');
this.data.corpus.o.paginate(pageNum, this.settings.perPage)
.then(
paginatedCorpus => {
this.data.corpus.p = paginatedCorpus;
this.renderCorpus();
this.renderCorpusPagination();
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
if (typeof callback === 'function') {callback();}
}
)
}
setTextStyle() {
if (this.settings.textStyle >= 0) {
// Destroy with .p-attr elements associated Materialize tooltips
for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr.tooltipped')) {
M.Tooltip.getInstance(pAttrElement)?.destroy();
}
// Set basic styling on .p-attr elements
for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) {
pAttrElement.setAttribute('class', 'p-attr');
}
// Set basic styling on .s-attr[data-type="ent"] elements
for (let entElement of this.elements.corpus.querySelectorAll('.s-attr[data-type="ent"]')) {
entElement.querySelector('.s-attr[data-type="ent_type"]').classList.add('hide');
entElement.removeAttribute('style');
entElement.setAttribute('class', 's-attr');
}
}
if (this.settings.textStyle >= 1) {
// Set advanced styling on .s-attr[data-type="ent"] elements
for (let entElement of this.elements.corpus.querySelectorAll('.s-attr[data-type="ent"]')) {
let ent = this.data.corpus.p.lookups.ent_lookup[entElement.dataset.id];
entElement.classList.add('chip');
entElement.style.backgroundColor = CorpusAnalysisApp.entitiyColors[ent.type];
entElement.querySelector('.s-attr[data-type="ent_type"]').classList.remove('hide');
}
}
if (this.settings.textStyle >= 2) {
// Set advanced styling on .p-attr elements
for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) {
pAttrElement.classList.add('chip', 'hoverable', 'tooltipped');
let cpos = pAttrElement.dataset.cpos;
let pAttr = this.data.corpus.p.lookups.cpos_lookup[cpos];
let positionalPropertiesHTML = `
<p class="left-align">
<b>Positional properties</b><br>
<span>Token: ${cpos}</span>
`.trim();
let structuralPropertiesHTML = `
<p class="left-align">
<b>Structural properties</b>
`.trim();
for (let [property, propertyValue] of Object.entries(pAttr)) {
if (['lemma', 'ner', 'pos', 'simple_pos', 'word'].includes(property)) {
if (propertyValue === 'None') {continue;}
positionalPropertiesHTML += `<br><i class="material-icons" style="font-size: inherit;">subdirectory_arrow_right</i>${property}: ${propertyValue}`;
} else {
structuralPropertiesHTML += `<br><span>${property}: ${propertyValue}</span>`;
if (!(`${property}_lookup` in this.data.corpus.p.lookups)) {continue;}
for (let [subproperty, subpropertyValue] of Object.entries(this.data.corpus.p.lookups[`${property}_lookup`][propertyValue])) {
if (subpropertyValue === 'NULL') {continue;}
structuralPropertiesHTML += `<br><i class="material-icons" style="font-size: inherit;">subdirectory_arrow_right</i>${subproperty}: ${subpropertyValue}`
}
}
}
positionalPropertiesHTML += '</p>';
structuralPropertiesHTML += '</p>';
M.Tooltip.init(
pAttrElement,
{html: positionalPropertiesHTML + structuralPropertiesHTML}
);
}
}
}
setTokenRepresentation() {
for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) {
let pAttr = this.data.corpus.p.lookups.cpos_lookup[pAttrElement.dataset.cpos];
pAttrElement.innerText = pAttr[this.settings.tokenRepresentation];
}
}
}

View File

@ -13,7 +13,7 @@ class CorpusDisplay extends RessourceDisplay {
this.setLastEditedDate(this.user.data.corpora[this.corpusId].last_edited_date);
this.setStatus(this.user.data.corpora[this.corpusId].status);
this.setTitle(this.user.data.corpora[this.corpusId].title);
this.setTokenRatio(this.user.data.corpora[this.corpusId].current_nr_of_tokens, this.user.data.corpora[this.corpusId].max_nr_of_tokens);
this.setTokenRatio(this.user.data.corpora[this.corpusId].num_tokens, this.user.data.corpora[this.corpusId].max_num_tokens);
}
patch(patch) {
@ -53,9 +53,8 @@ class CorpusDisplay extends RessourceDisplay {
for (let element of this.displayElement.querySelectorAll('.corpus-title')) {this.setElement(element, title);}
}
setTokenRatio(currentNrOfTokens, maxNrOfTokens) {
let tokenRatio = `${currentNrOfTokens}/${maxNrOfTokens}`;
for (let element of this.displayElement.querySelectorAll('.corpus-token-ratio')) {this.setElement(element, tokenRatio);}
setTokenRatio(numTokens, maxNumTokens) {
for (let element of this.displayElement.querySelectorAll('.corpus-token-ratio')) {this.setElement(element, `${numTokens}/${maxNumTokens}`);}
}
setDescription(description) {

View File

@ -0,0 +1,102 @@
<div class="row" id="concordance-extension-container">
<div class="col s12">
<div class="card">
<div class="card-content">
<form id="concordance-extension-form">
<div class="row">
<div class="input-field col s12 m9">
<i class="material-icons prefix">search</i>
<input class="validate corpus-analysis-action" id="concordance-extension-form-query" name="query" type="text" required pattern=".*\S+.*"></input>
<label for="concordance-extension-form-query">Query</label>
<span class="error-color-text helper-text hide" id="concordance-extension-error"></span>
</div>
<div class="input-field col s12 m3">
<i class="material-icons prefix">arrow_forward</i>
<input class="validate corpus-analysis-action" id="concordance-extension-form-subcorpus-name" name="subcorpus-name" type="text" required pattern="^[A-Z][a-z0-9\-]*" value="Last"></input>
<label for="concordance-extension-form-subcorpus-name">Subcorpus name</label>
</div>
<div class="col s12 m9 l9">
<div class="row">
<div class="input-field col s4 l3">
<i class="material-icons prefix">short_text</i>
<select class="corpus-analysis-action" name="context">
<option value="10" selected>10</option>
<option value="15">15</option>
<option value="20">20</option>
<option value="25">25</option>
<option value="30">30</option>
</select>
<label>Context</label>
</div>
<div class="input-field col s4 l3">
<i class="material-icons prefix">format_list_numbered</i>
<select class="corpus-analysis-action" name="per-page">
<option value="10" selected>10</option>
<option value="15">15</option>
<option value="20">20</option>
<option value="25">25</option>
</select>
<label>Matches per page</label>
</div>
<div class="input-field col s4 l3">
<i class="material-icons prefix">format_shapes</i>
<select name="text-style">
<option value="0">Plain text</option>
<option value="1" selected>Highlight entities</option>
<option value="2">Token text</option>
</select>
<label>Text style</label>
</div>
<div class="input-field col s4 l3">
<i class="material-icons prefix">format_quote</i>
<select name="token-representation">
<option value="lemma">lemma</option>
<option value="pos">pos</option>
<option value="simple_pos">simple_pos</option>
<option value="word" selected>word</option>
</select>
<label>Token representation</label>
</div>
</div>
</div>
<div class="col s12 m3 l3 right-align">
<p class="hide-on-small-only">&nbsp;</p>
<a class="btn disabled tooltipped waves-effect waves-light" data-tooltip="Comming soon..."><i class="material-icons left">build</i> Query builder</a>
<button class="btn waves-effect waves-light corpus-analysis-action" id="concordance-extension-form-submit" type="submit" name="submit">Send <i class="material-icons right">send</i></button>
</div>
</div>
</form>
</div>
</div>
</div>
<div class="col s12">
<div id="concordance-extension-subcorpus-list"></div>
<div class="card">
<div class="card-content">
<div class="progress hide" id="concordance-extension-progress">
<div class="indeterminate"></div>
</div>
<div class="row">
<div class="col s9"><p class="hide" id="concordance-extension-subcorpus-info"></p></div>
<div class="col s3 right-align" id="concordance-extension-subcorpus-actions"></div>
</div>
<table class="highlight">
<thead>
<tr>
<th style="width: 2%;"></th>
<th style="width: 8%;">Source</th>
<th class="right-align" style="width: 22.5%;">Left context</th>
<th class="center-align" style="width: 40%;">KWIC</th>
<th class="left-align" style="width: 22.5%;">Right Context</th>
<th class="left-align" style="width: 5%;"></th>
</tr>
</thead>
<tbody id="concordance-extension-subcorpus-items"></tbody>
</table>
<ul class="pagination hide" id="concordance-extension-subcorpus-pagination"></ul>
</div>
</div>
</div>
</div>

View File

@ -1,337 +1,73 @@
{% extends "base.html.j2" %}
{% from "corpora/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% import "materialize/wtf.html.j2" as wtf %}
{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %}
{% block main_attribs %} class="service-scheme" data-service="corpus-analysis" id="corpus-analysis-app-container"{% endblock main_attribs %}
{% block page_content %}
<div class="row">
<ul class="row tabs no-autoinit" id="corpus-analysis-app-extension-tabs">
<li class="tab col s3"><a class="active" href="#corpus-analysis-app-overview"><i class="nopaque-icons service-icon left" data-service="corpus-analysis"></i>Corpus analysis</a></li>
<li class="tab col s3"><a href="#concordance-extension-container"><i class="material-icons left">list_alt</i>Concordance</a></li>
<li class="tab col s3"><a href="#reader-extension-container"><i class="material-icons left">chrome_reader_mode</i>Reader</a></li>
</ul>
<div class="row" id="corpus-analysis-app-overview">
<div class="col s12">
<div class="card">
<div class="card-content" style="padding-top: 5px;
padding-bottom: 0px;">
<!-- Query form -->
<div class="row">
<form class="col s12" id="query-form">
<div class="row">
<div class="input-field col s12 m10">
<i class="material-icons prefix">search</i>
{{ query_form.query() }}
{{ query_form.query.label }}
<span class="helper-text">
<a href="http://cwb.sourceforge.net/files/CQP_Tutorial/" target="_blank">
<i class="material-icons" style="font-size: inherit;">help
</i>
CQP query language tutorial
</a>
</span>
</div>
<div class="col s12 m2 right-align" style="margin-top: 1.75em;">
<a class="waves-effect waves-light btn modal-trigger" href="#query-builder-modal"><i class="material-icons left">build</i> Query builder</a>
{{ wtf.render_field(query_form.submit, material_icon='send') }}
</div>
</div>
</form>
</div>
<h1>{{ title }}</h1>
</div>
<div class="col s3">
<div class="card extension-selector hoverable" data-target="concordance-extension-container">
<div class="card-content">
<span class="card-title"><i class="material-icons left">list_alt</i>Concordance</span>
<p>Query your corpus with the CQP query language utilizing a KWIC view.</p>
</div>
</div>
</div>
<!-- entire results div/card -->
<div class="col s12" id="query-display">
<div class="card">
<div class="card-content" id="result-list" style="overflow: hidden;">
<div id="interactions-menu" class="row hide"
style="margin-bottom: 0px;">
{# Importing menus for query settings, export etc. #}
{% include 'corpora/interactions/infos.html.j2' %}
{% include 'corpora/interactions/export.html.j2' %}
{% include 'corpora/interactions/create.html.j2' %}
{% include 'corpora/interactions/display.html.j2' %}
</div>
{% include 'tables/query_results.html.j2' %}
<div class="col s3">
<div class="card extension-selector hoverable" data-target="reader-extension-container">
<div class="card-content">
<span class="card-title"><i class="material-icons left">chrome_reader_mode</i>Reader</span>
<p>Inspect your corpus in detail with a full text view, including annotations.</p>
</div>
</div>
</div>
</div>
<!-- Scroll to top element -->
{% include 'corpora/interactions/scroll_to_top.html.j2' %}
<!-- Modals -->
{% include 'modals/query_builder.html.j2' %}
{% include 'modals/show_metadata.html.j2' %}
{% include 'modals/analysis_init.html.j2' %}
{% include 'modals/export_query_results.html.j2' %}
{% include 'modals/context_modal.html.j2' %}
{% include 'modals/show_corpus_files.html.j2' %}
{% include "corpora/analyse_corpus.reader.html.j2" %}
{% include "corpora/analyse_corpus.concordance.html.j2" %}
{% endblock page_content %}
{% block modals %}
{{ super() }}
<div class="modal no-autoinit" id="corpus-analysis-app-init-modal">
<div class="modal-content">
<h4>Initializing session...</h4>
<p>If the loading takes to long or an error occured,
<a onclick="window.location.reload()" href="#">click here</a>
to refresh your session or
<a href="{{ url_for('corpora.corpus', corpus_id=corpus.id) }}">go back</a>!
</p>
<div class="progress" id="corpus-analysis-app-init-progress">
<div class="indeterminate"></div>
</div>
<p class="error-color-text hide" id="corpus-analysis-app-init-error"></p>
</div>
</div>
{% endblock modals %}
{% block scripts %}
{{ super() }}
<!-- import modules -->
<script type="module">
/**
* First Phase:
* Document content is loaded and scripts are being imported and executed.
*/
// Import Client classes. Client handles the server client communication.
import {
Client,
ClientEventListener,
ListenerCallback,
} from '../../static/js/modules/corpus_analysis/client/Client.js';
/**
* Import Client listener functions which will listen for defined socket or
* javascript events.
*/
import {
recieveConnected,
recieveMetaData,
recieveQueryStatus,
recieveQueryData,
recieveViewNotification,
recieveResultsData,
} from '../../static/js/modules/corpus_analysis/client/listeners.js';
// Import client listener callbacks so they can be registered to the listeners.
import {
prepareQueryData,
saveQueryData,
saveMetaData,
getResultsData,
saveResultsData,
} from '../../static/js/modules/corpus_analysis/client/callbacks.js';
// Import Results class which will be used to save results data of a query etc.
import {
Results,
} from '../../static/js/modules/corpus_analysis/model/Results.js';
/**
* Import the ResultsList which can be understood as a View class that handles
* how the data from Results is represented to the user. The ViewEventListener
* is used to register listener functions which listen for events emitred by
* the Client.
*/
import {
ResultsList,
ViewEventListener,
} from '../../static/js/modules/corpus_analysis/view/ResultsView.js';
// Import listener which will be registered to the ViewEventListener class.
import {
// listener listening for client dispatched 'notify-vie' custom event.
recieveClientNotification,
// vanilla javascript Event listeners which are listening for button clicks.
pageNavigation,
expertModeSwitch,
actionButtons,
displayOptions,
showMetaData,
showCorpusFiles,
exportFullContextSwitch,
createFullResults,
createSubResults,
exportFullResults,
exportSubResults,
exportSingleMatch,
} from '../../static/js/modules/corpus_analysis/view/listeners.js';
// Import script that implements the scroll to top button.
import {
scrollToTop,
} from '../../static/js/modules/corpus_analysis/view/scrollToTop.js';
// vanilla javascript Event listeners which are listening for button clicks etc
/**
* Second Phase:
* Asynchronus and event driven code.
*/
document.addEventListener("DOMContentLoaded", () => {
// Initialize the client for server client communication in dynamic mode
let corpusId = {{ corpus.id }}
const client = new Client({'corpusId': corpusId,
'socket': nopaque.appClient.socket,
'logging': true,
'dynamicMode': true});
/**
* Initializing the results object as a model holding all the data of a
* query. Also holds the metadata of one query and results data.
* After that initialize the ResultsList object as the View handeling the
* representation of the data for the user.
*/
let results = new Results();
let resultsList = new ResultsList('result-list', ResultsList.options);
/**
* Register listeners listening to socket.io events and their callbacks
* Afterwards load them. Also registers listeners listening for custom
* javascript events emitted by the View.
*/
const listenForConnected = new ClientEventListener('corpus_analysis_init',
recieveConnected);
const listenForMetaData = new ClientEventListener('corpus_analysis_meta_data',
recieveMetaData);
const metaDataCallback = new ListenerCallback('corpus_analysis_meta_data',
saveMetaData,
[client, results]);
listenForMetaData.setCallbacks([metaDataCallback]);
const listenForQueryStatus = new ClientEventListener('corpus_analysis_query',
recieveQueryStatus);
const queryStatusCallback = new ListenerCallback('corpus_analysis_query',
prepareQueryData,
[client, results]);
listenForQueryStatus.setCallbacks([queryStatusCallback]);
const listenForQueryData = new ClientEventListener('corpus_analysis_query_results',
recieveQueryData);
const queryDataCallback = new ListenerCallback('corpus_analysis_query_results',
saveQueryData,
[client, results]);
listenForQueryData.setCallbacks([queryDataCallback]);
const listenForResults = new ClientEventListener('corpus_analysis_get_match_with_full_context',
recieveResultsData);
const resultsDataCallback = new ListenerCallback('corpus_analysis_get_match_with_full_context',
saveResultsData,
[client, results]);
listenForResults.setCallbacks([resultsDataCallback]);
// Listen for javascript custom notifications emitted by the View.
const listenForViewNotification = new ClientEventListener('notify-client',
recieveViewNotification);
const getResultsCallback = new ListenerCallback('get-results',
getResultsData,
[client, results]);
listenForViewNotification.setCallbacks([getResultsCallback]);
client.setSocketEventListeners([listenForConnected,
listenForQueryStatus,
listenForQueryData,
listenForMetaData,
listenForViewNotification,
listenForResults]);
// Load the listeners so that they will be executed if triggered
client.loadSocketEventListeners();
/**
* Register resultsList listeners listening to notification events emitted by
* the Client class.
*/
const listenForClientNotification = new ViewEventListener('notify-view',
recieveClientNotification);
/**
* Register vanilla Javascript events to the resultList listening for button
* clicks etc. done by the user.
* Get all needed HTMLElements for those event listeners before.
*/
resultsList.getHTMLElements([
'.pagination',
'#display-options-form-expert_mode',
'#display-options-form-result_context',
'#display-options-form-results_per_page',
'#download-results-json',
'#full-results-create',
'#full-results-export',
'#inspect-results-export',
'#meta-data-modal-content',
['#meta-data-modal', {
'preventScrolling': false,
'opacity': 0.0,
'dismissible': false,
'onOpenEnd': (() => {document.querySelector(".modal-overlay").remove()})
}
],
['#query-results-download-modal', {}],
'#query-results-table',
'#show-meta-data',
'#show-corpus-files',
'#show-corpus-files-modal-content',
['#show-corpus-files-modal', {
'preventScrolling': false,
'opacity': 0.0,
'dismissible': false,
'onOpenEnd': (() => {document.querySelector(".modal-overlay").remove()})
}
],
'#sub-results-create',
'#sub-results-export',
'#export-full-inspect-context',
]);
let args = [resultsList, results, client];
const listenForPageNavigation = new ViewEventListener('page-navigation',
pageNavigation,
args);
const listenForExpertModeSwitch = new ViewEventListener('expert-mode',
expertModeSwitch,
args);
const listenForActionButtons = new ViewEventListener('action-buttons',
actionButtons,
args);
const listenForDisplayOptions = new ViewEventListener('display-otions',
displayOptions,
args);
const listenForShowMetaData = new ViewEventListener('show-meta-data',
showMetaData,
args);
const listenForShowCorpusFiles = new ViewEventListener('show-corpus-files',
showCorpusFiles,
args);
const listenForExportFullContextSwitch = new ViewEventListener('export-full-context-switch',
exportFullContextSwitch,
args);
const listenForCreateFullResults = new ViewEventListener('create-full-results',
createFullResults,
args);
const listenForCreateSubResults = new ViewEventListener('create-sub-results',
createSubResults,
args);
const listenForExportFullResults = new ViewEventListener('export-full-results',
exportFullResults,
args);
const listenForExportSubResults = new ViewEventListener('export-sub-results',
exportSubResults,
args);
const listenForExportSingleMatch = new ViewEventListener('export-single-match',
exportSingleMatch,
args);
// Set and load defined listeners
resultsList.setViewEventListeners([
listenForClientNotification,
listenForPageNavigation,
listenForExpertModeSwitch,
listenForActionButtons,
listenForDisplayOptions,
listenForShowMetaData,
listenForShowCorpusFiles,
listenForExportFullContextSwitch,
listenForCreateFullResults,
listenForCreateSubResults,
listenForExportFullResults,
listenForExportSubResults,
listenForExportSingleMatch,
]);
resultsList.loadViewEventListeners();
// Connect client to server.
client.notifyView('connecting');
client.connect();
// Send a query and recieve its answer data.
let queryFormElement = document.querySelector('#query-form');
queryFormElement.addEventListener('submit', (event) => {
try {
/**
* Selects first page of result list if pagination is already available
* from an query submitted before.
* This avoids confusion for the user e.g.: The user was on page 24
* reviewing the results and issues a new query. He would not see any
* results until the new results reach page 24 or he clicks on another
* valid result page element from the new pagination.
*/
let firstPageElement = document.querySelector('a.page');
firstPageElement.click();
} catch (e) {
// No page element is present if first query is submitted.
}
// Prevent page from reloading on submit.
event.preventDefault();
// Get query string and send query to server.
results.data.getQueryStr(queryFormElement);
client.query(results.data.query);
});
// Enable scroll to Top functionality.
scrollToTop('header', '#menu-scroll-to-top-div');
});
{% assets output="js/nopaque/CorpusAnalysis.min.bundle.js",
"js/nopaque/CorpusAnalysis/CQiClient.js",
"js/nopaque/CorpusAnalysis/CorpusAnalysisApp.js",
"js/nopaque/CorpusAnalysis/CorpusAnalysisConcordance.js",
"js/nopaque/CorpusAnalysis/CorpusAnalysisReader.js" %}
<script src="{{ ASSET_URL }}"></script>
{% endassets %}
<script>
let corpusAnalysisApp = new CorpusAnalysisApp({{ corpus.id }});
let corpusAnalysisConcordance = new CorpusAnalysisConcordance(corpusAnalysisApp);
let corpusAnalysisReader = new CorpusAnalysisReader(corpusAnalysisApp);
corpusAnalysisApp.init();
</script>
{% endblock %}
{% endblock scripts %}

View File

@ -0,0 +1,67 @@
<div class="row" id="reader-extension-container">
<div class="col s12">
<div class="card">
<div class="card-content">
<form id="reader-extension-form">
<div class="row">
<div class="col s12 m9 l10">
<div class="row">
<div class="input-field col s4 m3 l2">
<i class="material-icons prefix">format_list_numbered</i>
<select class="corpus-analysis-action" name="per-page">
<option value="500" selected>500</option>
<option value="1000">1000</option>
<option value="1500">1500</option>
</select>
<label>Tokens per page</label>
</div>
<div class="input-field col s4 m3 l2">
<i class="material-icons prefix">format_shapes</i>
<select name="text-style">
<option value="0">Plain text</option>
<option value="1" selected>Highlight entities</option>
<option value="2">Token text</option>
</select>
<label>Text style</label>
</div>
<div class="input-field col s4 m3 l2">
<i class="material-icons prefix">format_quote</i>
<select name="token-representation">
<option value="lemma">lemma</option>
<option value="pos">pos</option>
<option value="simple_pos">simple_pos</option>
<option value="word" selected>word</option>
</select>
<label>Token representation</label>
</div>
<div class="col s12">
<span class="error-color-text helper-text hide" id="reader-extension-error"></span>
</div>
</div>
</div>
<div class="col s12 m3 l2 right-align">
<p class="hide-on-small-only">&nbsp;</p>
<button class="btn hide waves-effect waves-light corpus-analysis-action" id="reader-extension-form-submit" type="submit" name="submit">Send <i class="material-icons right">send</i></button>
</div>
</div>
</form>
</div>
</div>
</div>
<div class="col s12">
<div class="card">
<div class="card-content">
<div class="progress hide" id="reader-extension-progress">
<div class="indeterminate"></div>
</div>
<table>
<tbody>
<tr><td id="reader-extension-corpus"></td></tr>
</tbody>
</table>
<ul class="pagination hide" id="reader-extension-corpus-pagination"></ul>
</div>
</div>
</div>
</div>

View File

@ -1,30 +0,0 @@
<!-- WIP. The user should be able to call several analysis tools from here.-->
<div class="col s12 m3 l2" id="anlysis">
<h6 style="margin-top: 0px;">Analysis</h6>
<div class="divider" style="margin-bottom: 10px;"></div>
<div class="row">
<div class="col s12">
<button id="placeholder1"
class="waves-effect
waves-light
btn-flat
disabled
flat-interaction"
type="submit">Action One
<i class="material-icons left">cloud</i>
</button>
</div>
<div class="col s12">
<button id="placeholder2"
class="waves-effect
waves-light
btn-flat
disabled
flat-interaction"
type="submit">Action Two
<i class="material-icons left">add</i>
</button>
</div>
</div>
</div>

View File

@ -1,30 +0,0 @@
<!-- WIP. The user should be able to cite nopaque and the current service easily using these buttons.-->
<div class="col s12 m3 l2" id="cite">
<h6 style="margin-top: 0px;">Cite Nopaque</h6>
<div class="divider" style="margin-bottom: 10px;"></div>
<div class="row">
<div class="col s12">
<button id="placeholder1"
class="waves-effect
waves-light
btn-flat
disabled
flat-interaction"
type="submit">Action One
<i class="material-icons left">cloud</i>
</button>
</div>
<div class="col s12">
<button id="placeholder2"
class="waves-effect
waves-light
btn-flat
disabled
flat-interaction"
type="submit">Action Two
<i class="material-icons left">add</i>
</button>
</div>
</div>
</div>

View File

@ -1,24 +0,0 @@
<!-- HTML to allow the user to create sub results from the current query
results.-->
<div class="col s12 m3 l2" id="create">
<h6 style="margin-top: 0px;">Create</h6>
<div class="divider" style="margin-bottom: 10px;"></div>
<div class="row">
<div class="col s12">
<p>Add matches to Sub-Results with the
<i class="material-icons tiny">add</i>
button in the list or inspect view.
</p>
</div>
<div class="col s12">
<div class="input-field">
<p><span id="nr-marked-matches"></span> matches added for sub-results:</p>
<textarea id="sub-results-indexes-display"
class="materialize-textarea"
disabled>
</textarea>
</div>
</div>
</div>
</div>

View File

@ -1,30 +0,0 @@
{% import "materialize/wtf.html.j2" as wtf %}
<!-- HTML to allow the user to change how the results are being displayed.-->
<div class="col s12 m3 l2" id="display">
<h6 style="margin-top: 0px;">Display</h6>
<div class="divider" style="margin-bottom: 10px;"></div>
<div class="row">
<div class="col s12">
<form id="display-options-form">
{{ wtf.render_field(display_options_form.results_per_page,
material_icon='format_list_numbered') }}
{{ wtf.render_field(display_options_form.result_context,
material_icon='short_text') }}
<div class="col s12" style="line-height: 38px;">
<div class="col s8">
{{ display_options_form.expert_mode.label.text }}
</div>
<div class="class col s4 right-align">
<div class="switch">
<label style="margin-left: -20px;">
{{ display_options_form.expert_mode() }}
<span class="lever"></span>
</label>
</div>
</div>
</div>
</form>
</div>
</div>
</div>

View File

@ -1,79 +0,0 @@
<!-- HTML to allow the user to export the current querey results in full or
the selected sub results.-->
<div class="col s12 m3 l2" id="export">
<h6 style="margin-top: 0px;">Export</h6>
<div class="divider" style="margin-bottom: 10px;"></div>
<div class="row">
<div class="col s12" style="line-height: 38px;">
<div class="col s8">
Full context
<a class="tooltipped black-text" data-tooltip="Check this switch to
create results for the download with full context. Creating
results like this will take much longer but you will be able to
inspect your matches in detail when you import them into the query
results viewer.">
<i class="material-icons tiny">info_outline</i>
</a>
</div>
<div class="class col s4 right-align">
<div class="switch">
<label style="margin-left: -20px;">
<input type="checkbox" id="export-full-inspect-context">
<span class="lever"></span>
</label>
</div>
</div>
</div>
<div class="col s12">
<button class="waves-effect
waves-light
btn-flat
disabled
flat-interaction"
type="submit"
id="full-results-create">Create Results
<i class="material-icons left">build</i>
</button>
<button id="full-results-export"
class="waves-effect
waves-light
btn-flat
hide
flat-interaction"
type="submit">Results
<i class="material-icons left">file_download</i>
</button>
</div>
<div class="col s12">
<div class="progress hide" id="full-results-progress-bar">
<div class="determinate"></div>
</div>
</div>
<div class="col s12">
<button class="waves-effect
waves-light
btn-flat
disabled
flat-interaction"
type="submit"
id="sub-results-create">Create Sub-Results
<i class="material-icons left">build</i>
</button>
<button id="sub-results-export"
class="waves-effect
waves-light
btn-flat
hide
flat-interaction"
type="submit">Sub-Results
<i class="material-icons left">file_download</i>
</button>
</div>
<div class="col s12">
<div class="progress hide" id="sub-results-progress-bar">
<div class="determinate"></div>
</div>
</div>
</div>
</div>

View File

@ -1,59 +0,0 @@
<!-- HTML for showing infos about the current query or result. Also gives
the user the abiltiy to access the meta data for the current query or
result.-->
<div class="col s12 m3 l2" id="infos">
<h6 style="margin-top: 0px;">Infos</h6>
<div class="divider" style="margin-bottom: 10px;"></div>
<div class="row">
<div class="col s12"
style="height: 39px;
margin-top: 0px;
padding-top: 5px;
padding-left: 1.75rem;">
<span id="loading-matches"
class="black-text">
<i class="material-icons left">dvr</i>
<span id="recieved-match-count"></span>/
<span id="total-match-count"></span>
matches loaded
</span>
</div>
<div class="col s12">
<div class="progress hide" id="query-progress-bar">
<div class="determinate"></div>
</div>
</div>
<div class="col s12">
<button id="show-meta-data"
class="waves-effect
waves-light
btn-flat
flat-interaction"
type="submit">Corpus Meta Data
<i class="material-icons left">info_outline</i>
</button>
</div>
<div class="col s12">
<button id="show-corpus-files"
class="waves-effect
waves-light
btn-flat
flat-interaction"
type="submit">
<i class="material-icons left">info_outline</i>
Matches in
<span id="text-lookup-count"></span>
files
</button>
</div>
<div class="col s12">
<p class="hide" id="query-results-user-feedback">
<i class="material-icons tiny">help</i>
Server is sending your results.
Functions like "Export Results" and "Match Inspect" will be
available after all matches have been loaded.
</p>
</div>
</div>
</div>

View File

@ -1,5 +0,0 @@
<div id="menu-scroll-to-top-div" class="fixed-action-btn direction-top active hide" style="bottom: 45px; right: 24px;">
<a id="menu-scroll-to-top" class="btn btn-floating btn-large corpus-analysis-color.lighten">
<i class="material-icons">arrow_upward</i>
</a>
</div>

View File

@ -1,12 +0,0 @@
{% set breadcrumbs %}
<li class="tab disabled"><i class="material-icons">navigate_next</i></li>
<li class="tab"><a href="{{ url_for('main.dashboard', _anchor='query-results') }}" target="_self">My query results</a></li>
<li class="tab disabled"><i class="material-icons">navigate_next</i></li>
{% if request.path == url_for('.add_query_result') %}
<li class="tab"><a class="active" href="{{ url_for('.add_query_result') }}" target="_self">{{ title }}</a></li>
{% elif request.path == url_for('.query_result', query_result_id=query_result.id) %}
<li class="tab"><a class="active" href="{{ url_for('.query_result', query_result_id=query_result.id) }}" target="_self">{{ query_result.title }}</a></li>
{% elif request.path == url_for('.inspect_query_result', query_result_id=query_result.id) %}
<li class="tab"><a class="active" href="{{ url_for('.inspect_query_result', query_result_id=query_result.id) }}" target="_self">{{ title }}</a></li>
{% endif %}
{% endset %}

View File

@ -1,56 +0,0 @@
{% extends "base.html.j2" %}
{% from "corpora/query_results/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% import "materialize/wtf.html.j2" as wtf %}
{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12 m4">
<p>Fill out the following form to upload and view your exported query data from the corpus analsis.</p>
<a class="waves-effect waves-light btn" href="{{ url_for('main.dashboard') }}"><i class="material-icons left">arrow_back</i>Back to dashboard</a>
</div>
<div class="col s12 m8">
<form class="nopaque-submit-form" data-progress-modal="progress-modal">
<div class="card">
<div class="card-content">
{{ form.hidden_tag() }}
<div class="row">
<div class="col s12 m4">
{{ wtf.render_field(form.title, data_length='32', material_icon='title') }}
</div>
<div class="col s12 m8">
{{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.file, accept='.json', placeholder='Choose your .json file') }}
</div>
</div>
</div>
<div class="card-action right-align">
{{ wtf.render_field(form.submit, material_icon='send') }}
</div>
</div>
</form>
</div>
</div>
</div>
<div id="progress-modal" class="modal">
<div class="modal-content">
<h4><i class="material-icons prefix">file_upload</i> Uploading file...</h4>
<div class="progress">
<div class="determinate" style="width: 0%"></div>
</div>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-light btn red abort-request">Cancel</a>
</div>
</div>
{% endblock %}

View File

@ -1,241 +0,0 @@
{% extends "base.html.j2" %}
{% from "corpora/query_results/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %}
{% block page_content %}
<div class="row">
<div class="col s12">
<div class="card">
<div class="card-content" style="padding-top: 5px; padding-bottom: 0px;">
<!-- Query form -->
<div class="row">
<form id="query-form">
<div class="col s12 m10">
<div class="input-field">
<i class="material-icons prefix">search</i>
<input disabled value="{{ query_metadata.query|escape }}" id="disabled" type="text" class="validate">
<label for="disabled">Query</label>
</div>
</div>
<div class="col s12 m2 right-align">
<br class="hide-on-small-only">
</div>
</form>
</div>
</div>
</div>
</div>
<!-- entire results div/card -->
<div class="col s12" id="query-display">
<div class="card">
<div class="card-content" id="result-list" style="overflow: hidden;">
<div class="row" id="interactions-menu">
{% include 'corpora/interactions/infos.html.j2' %}
{% include 'corpora/interactions/display.html.j2' %}
{% include 'corpora/interactions/analysis.html.j2' %}
{% include 'corpora/interactions/cite.html.j2' %}
<div class="hide">
{# Hide those because they are not needed when inspecting results.
But some of their elements are being asked for by the client. #}
{% include 'corpora/interactions/export.html.j2' %}
{% include 'corpora/interactions/create.html.j2' %}
</div>
</div>
{% include 'tables/query_results.html.j2' %}
</div>
</div>
</div>
</div>
{# Import modals #}
{% include 'modals/show_metadata.html.j2' %}
{% include 'modals/show_corpus_files.html.j2' %}
{% include 'modals/context_modal.html.j2' %}
<!-- Scroll to top element -->
{% include 'corpora/interactions/scroll_to_top.html.j2' %}
{% endblock page_content %}
{% block scripts %}
{{ super() }}
<script type="module">
/**
* First Phase:
* Document content is loaded and scripts are being imported and executed.
*/
// Import Client classes. Client handles the server client communication.
import {
Client,
ClientEventListener,
ListenerCallback,
} from '../../../static/js/modules/corpus_analysis/client/Client.js';
/**
* Import Client listener functions which will listen for defined socket or
* javascript events.
*/
import {
recieveQueryStatus,
recieveQueryData,
} from '../../../static/js/modules/corpus_analysis/client/listeners.js';
// Import client listener callbacks so they can be registered to the listeners.
import {
prepareQueryData,
saveQueryData,
} from '../../../static/js/modules/corpus_analysis/client/callbacks.js';
// Import Results class which will be used to save results data of a query etc.
import {
Results,
} from '../../../static/js/modules/corpus_analysis/model/Results.js';
/**
* Import the ResultsList which can be understood as a View class that handles
* how the data from Results is represented to the user. The ViewEventListener
* is used to register listener functions which listen for events emitred by
* the Client.
*/
import {
ResultsList,
ViewEventListener,
} from '../../../static/js/modules/corpus_analysis/view/ResultsView.js';
// Import listener which will be registered to the ViewEventListener class.
import {
// listener listening for client dispatched 'notify-vie' custom event.
recieveClientNotification,
// vanilla javascript Event listeners which are listening for button clicks.
pageNavigation,
expertModeSwitch,
actionButtons,
displayOptions,
showMetaData,
showCorpusFiles,
} from '../../../static/js/modules/corpus_analysis/view/listeners.js';
import {
scrollToTop,
} from '../../../static/js/modules/corpus_analysis/view/scrollToTop.js'
/**
* Second Phase:
* Asynchronus and event driven code.
*/
/**
* Initializing the results object as a model holding all the data of a
* query. Also holds the metadata of one query and results data.
* After that initialize the ResultsList object as the View handeling the
* representation of the data for the user.
*/
let results = new Results();
let resultsList = new ResultsList('result-list', ResultsList.options);
// Import results data from json file.
const resultsJson = {{ query_result_file_content|tojson|safe }};
// Import metadata from DB passed to this view
const metaDataJson = {{ query_metadata|tojson|safe }};
// Initialize the client with dynamicMode set to false.
const client = new Client({'logging': true,
'dynamicMode': false,
'fullContext': metaDataJson.fullContext});
/**
* Register needed listeners and their callbacks. But we will
* just call the attached callbacks manually. Because dynamicMode is false.
*/
const listenForQueryStatus = new ClientEventListener('corpus_analysis_query',
recieveQueryStatus);
const queryStatusCallback = new ListenerCallback('corpus_analysis_query',
prepareQueryData,
[client, results]);
listenForQueryStatus.setCallbacks([queryStatusCallback]);
const listenForQueryData = new ClientEventListener('corpus_analysis_query_results',
recieveQueryData);
const queryDataCallback = new ListenerCallback('corpus_analysis_query_results',
saveQueryData,
[client, results]);
listenForQueryData.setCallbacks([queryDataCallback]);
// Set the event listeners
client.setSocketEventListeners([
listenForQueryStatus,
listenForQueryData,
]);
/**
* Register resultsList listeners listening to notification events emitted by
* the Client class.
*/
const listenForClientNotification = new ViewEventListener('notify-view',
recieveClientNotification);
/**
* Register vanilla Javascript events to the resultList listening for button
* clicks etc. done by the user.
* Get all needed HTMLElements for those event listeners before.
*/
resultsList.getHTMLElements([
'.add-btn',
'.pagination',
'#display-options-form-expert_mode',
'#display-options-form-result_context',
'#display-options-form-results_per_page',
'#full-results-create',
'#full-results-export',
'#inspect-results-export',
'#meta-data-modal-content',
['#meta-data-modal', {
'preventScrolling': false,
'opacity': 0.0,
'dismissible': false,
'onOpenEnd': (() => {document.querySelector(".modal-overlay").remove()})
}
],
'#query-results-table',
'#show-meta-data',
'#show-corpus-files',
'#show-corpus-files-modal-content',
['#show-corpus-files-modal', {
'preventScrolling': false,
'opacity': 0.0,
'dismissible': false,
'onOpenEnd': (() => {document.querySelector(".modal-overlay").remove()})
}
],
'#sub-results-create',
'#sub-results-export',
]);
let args = [resultsList, results, client];
const listenForPageNavigation = new ViewEventListener('page-navigation',
pageNavigation,
args);
const listenForExpertModeSwitch = new ViewEventListener('expert-mode',
expertModeSwitch,
args);
const listenForActionButtons = new ViewEventListener('action-buttons',
actionButtons,
args);
const listenForDisplayOptions = new ViewEventListener('display-otions',
displayOptions,
args);
const listenForShowMetaData = new ViewEventListener('show-meta-data',
showMetaData,
args);
const listenForShowCorpusFiles = new ViewEventListener('show-corpus-files',
showCorpusFiles,
args);
// Set and load defined listeners
resultsList.setViewEventListeners([
listenForClientNotification,
listenForPageNavigation,
listenForExpertModeSwitch,
listenForActionButtons,
listenForDisplayOptions,
listenForShowMetaData,
listenForShowCorpusFiles,
]);
resultsList.loadViewEventListeners();
// Hide buttons which are not needed when just inspecting results
resultsList.inspectResultsExport.classList.add('hide');
// Execute client event listener callbacks manually because dynamicMode is false
client.eventListeners['corpus_analysis_query'].executeCallbacks([resultsJson]);
// Save meta data to results after the init callback from line above
results.metaData = metaDataJson;
client.eventListeners['corpus_analysis_query_results'].executeCallbacks([resultsJson]);
// Enable scroll to Top functionality.
scrollToTop('#headline', '#menu-scroll-to-top-div');
</script>
{% endblock %}

View File

@ -1,131 +0,0 @@
{% extends "base.html.j2" %}
{% from "corpora/query_results/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% block main_attribs %} class="service-scheme" data-service="corpus-analysis"{% endblock main_attribs %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12">
<p>Below the metadata for the results from the Corpus
<i>{{ query_result.query_metadata.corpus_name }}</i> generated with the query
<i>{{ query_result.query_metadata.query }}</i> are shown.
</p>
</div>
<div class="col s12">
<div class="card">
<div class="card-action right-align">
<a class="waves-effect waves-light btn left-align" href="{{ url_for('services.service', service='corpus_analysis') }}">Back To Overview<i class="material-icons right">arrow_back</i></a>
<a class="waves-effect waves-light btn" href="{{ url_for('corpora.inspect_query_result', query_result_id=query_result.id) }}">Inspect Results<i class="material-icons right">search</i></a>
</div>
<div class="card-content" id="results">
<table class="responsive-table highlight">
<thead>
<tr>
<th>Metadata Description</th>
<th>Value</th>
</tr>
</thead>
<tbody>
{% for pair in query_result.query_metadata|dictsort %}
<tr>
<td>{{ pair[0] }}</td>
{% if pair[0] == 'corpus_all_texts'
or pair[0] == 'text_lookup' %}
<td>
<table>
{% for key, value in pair[1].items() %}
<tr style="border-bottom: none;">
<td>
<i>{{ value['title'] }}</i> written
by <i>{{ value['author'] }}</i>
in <i>{{ value['publishing_year'] }}</i>
<a class="waves-effect
waves-light
btn
right
more-text-detials"
data-metadata-key="{{ pair[0] }}"
data-text-key="{{ key }}">More
<i class="material-icons right"
data-metadata-key="{{ pair[0] }}"
data-text-key="{{ key }}">
info_outline
</i>
</a>
</td>
</tr>
{% endfor %}
</table>
</td>
{% else %}
<td>{{ pair[1] }}</td>
{% endif %}
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="card-action right-align">
<a class="waves-effect waves-light btn left-align" href="{{ url_for('services.service', service='corpus_analysis') }}">Back To Overview<i class="material-icons right">arrow_back</i></a>
<a class="waves-effect waves-light btn" href="{{ url_for('corpora.inspect_query_result', query_result_id=query_result.id) }}">Inspect Results<i class="material-icons right">search</i></a>
</div>
</div>
</div>
</div>
</div>
<div id="modal-text-details" class="modal modal-fixed-footer">
<div class="modal-content">
<h4>Bibliographic data</h4>
<p id="bibliographic-data"></p>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-green red btn">Close</a>
</div>
</div>
{% endblock page_content %}
{% block scripts %}
{{ super() }}
<script>
var moreTextDetailsButtons;
moreTextDetailsButtons = document.getElementsByClassName("more-text-detials");
for (var btn of moreTextDetailsButtons) {
btn.onclick = () => {
let modal = document.getElementById("modal-text-details");
modal = M.Modal.init(modal, {"dismissible": true});
modal.open();
let metadataKey = event.target.dataset.metadataKey;
let textKey = event.target.dataset.textKey;
let textData = {{ query_result.query_metadata|tojson|safe }}[metadataKey][textKey];
console.log(textData);
let bibliographicData = document.getElementById("bibliographic-data");
bibliographicData.innerHTML = "";
let table = document.createElement("table");
for (let [key, value] of Object.entries(textData)) {
table.insertAdjacentHTML("afterbegin",
`
<tr>
<td>${key}</td>
<td>${value}</td>
</tr>
`);
}
table.insertAdjacentHTML("afterbegin",
`
<thead>
<th>Description</th>
<th>Value</th>
</thead>
`)
bibliographicData.appendChild(table);
}
}
</script>
{% endblock %}

View File

@ -82,7 +82,7 @@
<ul class="pagination"></ul>
</div>
<div class="card-action right-align">
<a class="waves-effect waves-light btn" href="{{ url_for('corpora.add_query_result') }}">Add query result<i class="material-icons right">file_upload</i></a>
<a class="waves-effect waves-light btn disabled">Add query result<i class="material-icons right">file_upload</i></a>
</div>
</div>
</div>

View File

@ -31,7 +31,7 @@
{% endif %}
</label>
{% for error in field.errors %}
<span class="helper-text red-text">{{ error }}</span>
<span class="helper-text error-color-text">{{ error }}</span>
{% endfor %}
</div>
{% endmacro %}
@ -47,7 +47,7 @@
<input class="file-path validate" type="text" placeholder="{{ placeholder }}">
</div>
{% for error in field.errors %}
<span class="helper-text red-text">{{ error }}</span>
<span class="helper-text error-color-text">{{ error }}</span>
{% endfor %}
</div>
{% endmacro %}
@ -68,7 +68,7 @@
{{ field.label }}
{% endif %}
{% for error in field.errors %}
<span class="helper-text red-text">{{ error }}</span>
<span class="helper-text error-color-text">{{ error }}</span>
{% endfor %}
</div>
{% endmacro %}

View File

@ -1,21 +0,0 @@
<!-- Table showing the query results of the current query or the imported
results. -->
<div class="col s12">
<ul class="pagination paginationTop"></ul>
<table class="responsive-table highlight">
<thead>
<tr>
<th style="width: 2%">Nr.</th>
<th style="width: 3%">Title</th>
<th style="width: 25%">Left context</th>
<th style="width: 35%">Match</th>
<th style="width: 10%">{# Actions #}</th>
<th style="width: 25%">Right Context</th>
</tr>
</thead>
<tbody class="list" id="query-results-table">
</tbody>
</table>
<ul class="pagination paginationBottom"></ul>
</div>

View File

@ -0,0 +1,30 @@
"""empty message
Revision ID: a4b3cf4ab098
Revises: c384d7b3268a
Create Date: 2021-09-23 13:14:16.227784
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'a4b3cf4ab098'
down_revision = 'c384d7b3268a'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('corpora', sa.Column('num_tokens', sa.Integer(), nullable=True))
op.drop_column('corpora', 'current_nr_of_tokens')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('corpora', sa.Column('current_nr_of_tokens', sa.INTEGER(), autoincrement=False, nullable=True))
op.drop_column('corpora', 'num_tokens')
# ### end Alembic commands ###

View File

@ -0,0 +1,28 @@
"""empty message
Revision ID: be010d5d708d
Revises: a4b3cf4ab098
Create Date: 2021-09-24 09:34:54.173653
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'be010d5d708d'
down_revision = 'a4b3cf4ab098'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('corpora', sa.Column('num_analysis_sessions', sa.Integer(), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('corpora', 'num_analysis_sessions')
# ### end Alembic commands ###