diff --git a/app/__init__.py b/app/__init__.py index 3a03e00b..41b3eeb1 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -74,8 +74,10 @@ def create_app(config: Config = Config) -> Flask: app.register_blueprint(contributions_blueprint, url_prefix='/contributions') from .corpora import bp as corpora_blueprint + from .corpora.cqi_over_sio import CQiNamespace default_breadcrumb_root(corpora_blueprint, '.corpora') app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora') + socketio.on_namespace(CQiNamespace('/cqi_over_sio')) from .errors import bp as errors_bp app.register_blueprint(errors_bp) @@ -100,4 +102,7 @@ def create_app(config: Config = Config) -> Flask: default_breadcrumb_root(users_blueprint, '.users') app.register_blueprint(users_blueprint, url_prefix='/users') + from .workshops import bp as workshops_blueprint + app.register_blueprint(workshops_blueprint, url_prefix='/workshops') + return app diff --git a/app/corpora/__init__.py b/app/corpora/__init__.py index 34663b69..8365ce90 100644 --- a/app/corpora/__init__.py +++ b/app/corpora/__init__.py @@ -16,4 +16,4 @@ def before_request(): pass -from . import cli, cqi_over_socketio, files, followers, routes, json_routes +from . import cli, files, followers, routes, json_routes diff --git a/app/corpora/cli.py b/app/corpora/cli.py index 67658825..8c1a0970 100644 --- a/app/corpora/cli.py +++ b/app/corpora/cli.py @@ -19,6 +19,9 @@ def reset(): for corpus in [x for x in Corpus.query.all() if x.status in status]: print(f'Resetting corpus {corpus}') shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True) + os.mkdir(os.path.join(corpus.path, 'cwb')) + os.mkdir(os.path.join(corpus.path, 'cwb', 'data')) + os.mkdir(os.path.join(corpus.path, 'cwb', 'registry')) corpus.status = CorpusStatus.UNPREPARED corpus.num_analysis_sessions = 0 db.session.commit() diff --git a/app/corpora/cqi_over_sio/__init__.py b/app/corpora/cqi_over_sio/__init__.py new file mode 100644 index 00000000..6c093a9e --- /dev/null +++ b/app/corpora/cqi_over_sio/__init__.py @@ -0,0 +1,199 @@ +from cqi import CQiClient +from cqi.errors import CQiException +from cqi.status import CQiStatus +from flask import session +from flask_login import current_user +from flask_socketio import Namespace +from inspect import signature +from threading import Lock +from typing import Callable, Dict, List +from app import db, hashids, socketio +from app.decorators import socketio_login_required +from app.models import Corpus, CorpusStatus +from . import extensions + + +''' +This package tunnels the Corpus Query interface (CQi) protocol through +Socket.IO (SIO) by tunneling CQi API calls through an event called "exec". + +Basic concept: +1. A client connects to the "/cqi_over_sio" namespace. +2. The client emits the "init" event and provides a corpus id for the corpus + that should be analysed in this session. + 1.1 The analysis session counter of the corpus is incremented. + 1.2 A CQiClient and a (Mutex) Lock belonging to it is created. + 1.3 Wait until the CQP server is running. + 1.4 Connect the CQiClient to the server. + 1.5 Save the CQiClient, the Lock and the corpus id in the session for + subsequential use. +2. The client emits the "exec" event provides the name of a CQi API function + arguments (optional). + - The event "exec" handler will execute the function, make sure that the + result is serializable and returns the result back to the client. +4. Wait for more events +5. The client disconnects from the "/cqi_over_sio" namespace + 1.1 The analysis session counter of the corpus is decremented. + 1.2 The CQiClient and (Mutex) Lock belonging to it are teared down. +''' + +CQI_API_FUNCTION_NAMES: List[str] = [ + 'ask_feature_cl_2_3', + 'ask_feature_cqi_1_0', + 'ask_feature_cqp_2_3', + 'cl_alg2cpos', + 'cl_attribute_size', + 'cl_cpos2alg', + 'cl_cpos2id', + 'cl_cpos2lbound', + 'cl_cpos2rbound', + 'cl_cpos2str', + 'cl_cpos2struc', + 'cl_drop_attribute', + 'cl_id2cpos', + 'cl_id2freq', + 'cl_id2str', + 'cl_idlist2cpos', + 'cl_lexicon_size', + 'cl_regex2id', + 'cl_str2id', + 'cl_struc2cpos', + 'cl_struc2str', + 'corpus_alignment_attributes', + 'corpus_charset', + 'corpus_drop_corpus', + 'corpus_full_name', + 'corpus_info', + 'corpus_list_corpora', + 'corpus_positional_attributes', + 'corpus_properties', + 'corpus_structural_attribute_has_values', + 'corpus_structural_attributes', + 'cqp_drop_subcorpus', + 'cqp_dump_subcorpus', + 'cqp_fdist_1', + 'cqp_fdist_2', + 'cqp_list_subcorpora', + 'cqp_query', + 'cqp_subcorpus_has_field', + 'cqp_subcorpus_size', + 'ctrl_bye', + 'ctrl_connect', + 'ctrl_last_general_error', + 'ctrl_ping', + 'ctrl_user_abort' +] + + +class CQiNamespace(Namespace): + @socketio_login_required + def on_connect(self): + pass + + @socketio_login_required + def on_init(self, db_corpus_hashid: str): + db_corpus_id = hashids.decode(db_corpus_hashid) + db_corpus = Corpus.query.get(db_corpus_id) + if db_corpus is None: + return {'code': 404, 'msg': 'Not Found'} + if not (db_corpus.user == current_user + or current_user.is_following_corpus(db_corpus) + or current_user.is_administrator()): + return {'code': 403, 'msg': 'Forbidden'} + if db_corpus.status not in [ + CorpusStatus.BUILT, + CorpusStatus.STARTING_ANALYSIS_SESSION, + CorpusStatus.RUNNING_ANALYSIS_SESSION, + CorpusStatus.CANCELING_ANALYSIS_SESSION + ]: + return {'code': 424, 'msg': 'Failed Dependency'} + if db_corpus.num_analysis_sessions is None: + db_corpus.num_analysis_sessions = 0 + db.session.commit() + db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1 + db.session.commit() + retry_counter = 20 + while db_corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION: + if retry_counter == 0: + db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 + db.session.commit() + return {'code': 408, 'msg': 'Request Timeout'} + socketio.sleep(3) + retry_counter -= 1 + db.session.refresh(db_corpus) + cqi_client = CQiClient(f'cqpserver_{db_corpus_id}', timeout=float('inf')) + session['cqi_over_sio'] = {} + session['cqi_over_sio']['cqi_client'] = cqi_client + session['cqi_over_sio']['cqi_client_lock'] = Lock() + session['cqi_over_sio']['db_corpus_id'] = db_corpus_id + return {'code': 200, 'msg': 'OK'} + + @socketio_login_required + def on_exec(self, fn_name: str, fn_args: Dict = {}): + try: + cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] + cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock'] + except KeyError: + return {'code': 424, 'msg': 'Failed Dependency'} + if fn_name in CQI_API_FUNCTION_NAMES: + fn: Callable = getattr(cqi_client.api, fn_name) + elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES: + fn: Callable = getattr(extensions, fn_name) + else: + return {'code': 400, 'msg': 'Bad Request'} + for param in signature(fn).parameters.values(): + if param.default is param.empty: + if param.name not in fn_args: + return {'code': 400, 'msg': 'Bad Request'} + else: + if param.name not in fn_args: + continue + if type(fn_args[param.name]) is not param.annotation: + return {'code': 400, 'msg': 'Bad Request'} + cqi_client_lock.acquire() + try: + fn_return_value = fn(**fn_args) + except BrokenPipeError as e: + return {'code': 500, 'msg': 'Internal Server Error'} + except CQiException as e: + return { + 'code': 502, + 'msg': 'Bad Gateway', + 'payload': { + 'code': e.code, + 'desc': e.description, + 'msg': e.__class__.__name__ + } + } + finally: + cqi_client_lock.release() + if isinstance(fn_return_value, CQiStatus): + payload = { + 'code': fn_return_value.code, + 'msg': fn_return_value.__class__.__name__ + } + else: + payload = fn_return_value + return {'code': 200, 'msg': 'OK', 'payload': payload} + + def on_disconnect(self): + try: + cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] + cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock'] + db_corpus_id: int = session['cqi_over_sio']['db_corpus_id'] + except KeyError: + return + cqi_client_lock.acquire() + try: + session.pop('cqi_over_sio') + except KeyError: + pass + try: + cqi_client.api.ctrl_bye() + except (BrokenPipeError, CQiException): + pass + cqi_client_lock.release() + db_corpus = Corpus.query.get(db_corpus_id) + if db_corpus is not None: + db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 + db.session.commit() diff --git a/app/corpora/cqi_over_sio/extensions.py b/app/corpora/cqi_over_sio/extensions.py new file mode 100644 index 00000000..70ee5d97 --- /dev/null +++ b/app/corpora/cqi_over_sio/extensions.py @@ -0,0 +1,316 @@ +from collections import Counter +from cqi import CQiClient +from cqi.models.corpora import Corpus as CQiCorpus +from cqi.models.attributes import ( + PositionalAttribute as CQiPositionalAttribute, + StructuralAttribute as CQiStructuralAttribute +) +from cqi.status import StatusOk as CQiStatusOk +from flask import session +from typing import Dict, List +import gzip +import json +import math +import os +from app import db +from app.models import Corpus +from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus + + +CQI_EXTENSION_FUNCTION_NAMES: List[str] = [ + 'ext_corpus_update_db', + 'ext_corpus_static_data', + 'ext_corpus_paginate_corpus', + 'ext_cqp_paginate_subcorpus', + 'ext_cqp_partial_export_subcorpus', + 'ext_cqp_export_subcorpus', +] + + +def ext_corpus_update_db(corpus: str) -> CQiStatusOk: + cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] + db_corpus_id: int = session['cqi_over_sio']['db_corpus_id'] + db_corpus: Corpus = Corpus.query.get(db_corpus_id) + cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus) + db_corpus.num_tokens = cqi_corpus.size + db.session.commit() + return CQiStatusOk() + + +def ext_corpus_static_data(corpus: str) -> Dict: + db_corpus_id: int = session['cqi_over_sio']['db_corpus_id'] + db_corpus: Corpus = Corpus.query.get(db_corpus_id) + cache_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz') + if os.path.exists(cache_file_path): + with open(cache_file_path, 'rb') as f: + return f.read() + cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] + cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus) + cqi_p_attrs: Dict[str, CQiPositionalAttribute] = { + p_attr.name: p_attr + for p_attr in cqi_corpus.positional_attributes.list() + } + cqi_s_attrs: Dict[str, CQiStructuralAttribute] = { + s_attr.name: s_attr + for s_attr in cqi_corpus.structural_attributes.list() + } + static_corpus_data = { + 'corpus': { + 'bounds': [0, cqi_corpus.size - 1], + 'counts': { + 'token': cqi_corpus.size + }, + 'freqs': {} + }, + 'p_attrs': {}, + 's_attrs': {}, + 'values': {'p_attrs': {}, 's_attrs': {}} + } + for p_attr in cqi_p_attrs.values(): + static_corpus_data['corpus']['freqs'][p_attr.name] = {} + chunk_size = 10000 + p_attr_id_list = list(range(p_attr.lexicon_size)) + chunks = [p_attr_id_list[i:i+chunk_size] for i in range(0, len(p_attr_id_list), chunk_size)] + del p_attr_id_list + for chunk in chunks: + # print(f'corpus.freqs.{p_attr.name}: {chunk[0]} - {chunk[-1]}') + static_corpus_data['corpus']['freqs'][p_attr.name].update( + dict(zip(chunk, p_attr.freqs_by_ids(chunk))) + ) + del chunks + static_corpus_data['p_attrs'][p_attr.name] = {} + cpos_list = list(range(cqi_corpus.size)) + chunks = [cpos_list[i:i+chunk_size] for i in range(0, len(cpos_list), chunk_size)] + del cpos_list + for chunk in chunks: + # print(f'p_attrs.{p_attr.name}: {chunk[0]} - {chunk[-1]}') + static_corpus_data['p_attrs'][p_attr.name].update( + dict(zip(chunk, p_attr.ids_by_cpos(chunk))) + ) + del chunks + static_corpus_data['values']['p_attrs'][p_attr.name] = {} + p_attr_id_list = list(range(p_attr.lexicon_size)) + chunks = [p_attr_id_list[i:i+chunk_size] for i in range(0, len(p_attr_id_list), chunk_size)] + del p_attr_id_list + for chunk in chunks: + # print(f'values.p_attrs.{p_attr.name}: {chunk[0]} - {chunk[-1]}') + static_corpus_data['values']['p_attrs'][p_attr.name].update( + dict(zip(chunk, p_attr.values_by_ids(chunk))) + ) + del chunks + for s_attr in cqi_s_attrs.values(): + if s_attr.has_values: + continue + static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size + static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None} + static_corpus_data['values']['s_attrs'][s_attr.name] = {} + ########################################################################## + # A faster way to get cpos boundaries for smaller s_attrs # + ########################################################################## + # if s_attr.name in ['s', 'ent']: + # cqi_corpus.query('Last', f'<{s_attr.name}> []* ;') + # cqi_subcorpus = cqi_corpus.subcorpora.get('Last') + # first_match = 0 + # last_match = cqi_subcorpus.size - 1 + # match_boundaries = zip( + # range(first_match, last_match + 1), + # cqi_subcorpus.dump(cqi_subcorpus.fields['match'], first_match, last_match), + # cqi_subcorpus.dump(cqi_subcorpus.fields['matchend'], first_match, last_match) + # ) + # for id, lbound, rbound in match_boundaries: + # static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {} + # static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound] + # static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {} + # static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1 + # cqi_subcorpus.drop() + for id in range(0, s_attr.size): + # print(f's_attrs.{s_attr.name}.lexicon.{id}') + static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = { + 'bounds': None, + 'counts': None, + 'freqs': None + } + if s_attr.name != 'text': + continue + lbound, rbound = s_attr.cpos_by_id(id) + # print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds') + static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound] + # print(f's_attrs.{s_attr.name}.lexicon.{id}.counts') + static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {} + static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1 + cpos_list = list(range(lbound, rbound + 1)) + chunks = [cpos_list[i:i+chunk_size] for i in range(0, len(cpos_list), chunk_size)] + del cpos_list + ent_ids = set() + for chunk in chunks: + # print(f'Gather ent_ids from cpos: {chunk[0]} - {chunk[-1]}') + ent_ids.update({x for x in cqi_s_attrs['ent'].ids_by_cpos(chunk) if x != -1}) + static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len(ent_ids) + del ent_ids + s_ids = set() + for chunk in chunks: + # print(f'Gather s_ids from cpos: {chunk[0]} - {chunk[-1]}') + s_ids.update({x for x in cqi_s_attrs['s'].ids_by_cpos(chunk) if x != -1}) + static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len(s_ids) + del s_ids + # print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs') + static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {} + for p_attr in cqi_p_attrs.values(): + p_attr_ids = [] + for chunk in chunks: + # print(f'Gather p_attr_ids from cpos: {chunk[0]} - {chunk[-1]}') + p_attr_ids.extend(p_attr.ids_by_cpos(chunk)) + static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids)) + del p_attr_ids + del chunks + sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr}) + s_attr_value_names: List[str] = [ + sub_s_attr.name[(len(s_attr.name) + 1):] + for sub_s_attr in sub_s_attrs + ] + s_attr_id_list = list(range(s_attr.size)) + chunks = [s_attr_id_list[i:i+chunk_size] for i in range(0, len(s_attr_id_list), chunk_size)] + del s_attr_id_list + sub_s_attr_values = [] + for sub_s_attr in sub_s_attrs: + tmp = [] + for chunk in chunks: + tmp.extend(sub_s_attr.values_by_ids(chunk)) + sub_s_attr_values.append(tmp) + del tmp + del chunks + # print(f's_attrs.{s_attr.name}.values') + static_corpus_data['s_attrs'][s_attr.name]['values'] = s_attr_value_names + # print(f'values.s_attrs.{s_attr.name}') + static_corpus_data['values']['s_attrs'][s_attr.name] = { + s_attr_id: { + s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx] + for s_attr_value_name_idx, s_attr_value_name in enumerate( + static_corpus_data['s_attrs'][s_attr.name]['values'] + ) + } for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size)) + } + del sub_s_attr_values + with gzip.open(cache_file_path, 'wt') as f: + json.dump(static_corpus_data, f) + del static_corpus_data + with open(cache_file_path, 'rb') as f: + return f.read() + + +def ext_corpus_paginate_corpus( + corpus: str, + page: int = 1, + per_page: int = 20 +) -> Dict: + cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] + cqi_corpus = cqi_client.corpora.get(corpus) + # Sanity checks + if ( + per_page < 1 + or page < 1 + or ( + cqi_corpus.size > 0 + and page > math.ceil(cqi_corpus.size / per_page) + ) + ): + return {'code': 416, 'msg': 'Range Not Satisfiable'} + first_cpos = (page - 1) * per_page + last_cpos = min(cqi_corpus.size, first_cpos + per_page) + cpos_list = [*range(first_cpos, last_cpos)] + lookups = lookups_by_cpos(cqi_corpus, cpos_list) + payload = {} + # the items for the current page + payload['items'] = [cpos_list] + # the lookups for the items + payload['lookups'] = lookups + # the total number of items matching the query + payload['total'] = cqi_corpus.size + # the number of items to be displayed on a page. + payload['per_page'] = per_page + # The total number of pages + payload['pages'] = math.ceil(payload['total'] / payload['per_page']) + # the current page number (1 indexed) + payload['page'] = page if payload['pages'] > 0 else None + # True if a previous page exists + payload['has_prev'] = payload['page'] > 1 if payload['page'] else False + # True if a next page exists. + payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa + # Number of the previous page. + payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None + # Number of the next page + payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None + return payload + + +def ext_cqp_paginate_subcorpus( + subcorpus: str, + context: int = 50, + page: int = 1, + per_page: int = 20 +) -> Dict: + corpus_name, subcorpus_name = subcorpus.split(':', 1) + cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) + # Sanity checks + if ( + per_page < 1 + or page < 1 + or ( + cqi_subcorpus.size > 0 + and page > math.ceil(cqi_subcorpus.size / per_page) + ) + ): + return {'code': 416, 'msg': 'Range Not Satisfiable'} + offset = (page - 1) * per_page + cutoff = per_page + cqi_results_export = export_subcorpus( + cqi_subcorpus, context=context, cutoff=cutoff, offset=offset) + payload = {} + # the items for the current page + payload['items'] = cqi_results_export.pop('matches') + # the lookups for the items + payload['lookups'] = cqi_results_export + # the total number of items matching the query + payload['total'] = cqi_subcorpus.size + # the number of items to be displayed on a page. + payload['per_page'] = per_page + # The total number of pages + payload['pages'] = math.ceil(payload['total'] / payload['per_page']) + # the current page number (1 indexed) + payload['page'] = page if payload['pages'] > 0 else None + # True if a previous page exists + payload['has_prev'] = payload['page'] > 1 if payload['page'] else False + # True if a next page exists. + payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa + # Number of the previous page. + payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None + # Number of the next page + payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None + return payload + + +def ext_cqp_partial_export_subcorpus( + subcorpus: str, + match_id_list: list, + context: int = 50 +) -> Dict: + corpus_name, subcorpus_name = subcorpus.split(':', 1) + cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) + cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context) + return cqi_subcorpus_partial_export + + +def ext_cqp_export_subcorpus( + subcorpus: str, + context: int = 50 +) -> Dict: + corpus_name, subcorpus_name = subcorpus.split(':', 1) + cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) + cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context) + return cqi_subcorpus_export diff --git a/app/corpora/cqi_over_socketio/utils.py b/app/corpora/cqi_over_sio/utils.py similarity index 55% rename from app/corpora/cqi_over_socketio/utils.py rename to app/corpora/cqi_over_sio/utils.py index bdab8b53..121c3233 100644 --- a/app/corpora/cqi_over_socketio/utils.py +++ b/app/corpora/cqi_over_sio/utils.py @@ -1,64 +1,10 @@ -from flask import session -from functools import wraps -from inspect import signature -import cqi +from cqi.models.corpora import Corpus +from cqi.models.subcorpora import Subcorpus +from typing import Dict, List +from app.models import Corpus -def cqi_over_socketio(f): - @wraps(f) - def wrapped(*args): - if 'd' not in session: - return {'code': 424, 'msg': 'Failed Dependency'} - f_args = {} - # Check for missing args and if all provided args are of the right type - for param in signature(f).parameters.values(): - if param.name == 'corpus_name': - f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}' - continue - if param.name == 'cqi_client': - f_args[param.name] = session['d']['cqi_client'] - continue - if param.default is param.empty: - # args - if param.name not in args[0]: - return {'code': 400, 'msg': 'Bad Request'} - arg = args[0][param.name] - if type(arg) is not param.annotation: - return {'code': 400, 'msg': 'Bad Request'} - f_args[param.name] = arg - else: - # kwargs - if param.name not in args[0]: - continue - arg = args[0][param.name] - if type(arg) is not param.annotation: - return {'code': 400, 'msg': 'Bad Request'} - f_args[param.name] = arg - session['d']['cqi_client_lock'].acquire() - try: - return_value = f(**f_args) - except BrokenPipeError: - return_value = { - 'code': 500, - 'msg': 'Internal Server Error' - } - except cqi.errors.CQiException as e: - return_value = { - 'code': 500, - 'msg': 'Internal Server Error', - 'payload': { - 'code': e.code, - 'desc': e.description, - 'msg': e.name - } - } - finally: - session['d']['cqi_client_lock'].release() - return return_value - return wrapped - - -def lookups_by_cpos(corpus, cpos_list): +def lookups_by_cpos(corpus: Corpus, cpos_list: List[int]) -> Dict: lookups = {} lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list} for attr in corpus.positional_attributes.list(): @@ -93,18 +39,22 @@ def lookups_by_cpos(corpus, cpos_list): return lookups -def partial_export_subcorpus(subcorpus, match_id_list, context=25): - if subcorpus.attrs['size'] == 0: +def partial_export_subcorpus( + subcorpus: Subcorpus, + match_id_list: List[int], + context: int = 25 +) -> Dict: + if subcorpus.size == 0: return {"matches": []} match_boundaries = [] for match_id in match_id_list: - if match_id < 0 or match_id >= subcorpus.attrs['size']: + if match_id < 0 or match_id >= subcorpus.size: continue match_boundaries.append( ( match_id, - subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0], - subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0] + subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0], + subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0] ) ) cpos_set = set() @@ -120,14 +70,14 @@ def partial_export_subcorpus(subcorpus, match_id_list, context=25): lc_rbound = match_start - 1 lc = (lc_lbound, lc_rbound) cpos_list_lbound = lc_lbound - if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0: + if match_end == (subcorpus.collection.corpus.size - 1) or context == 0: rc = None cpos_list_rbound = match_end else: rc_lbound = match_end + 1 rc_rbound = min( (match_end + context), - (subcorpus.collection.corpus.attrs['size'] - 1) + (subcorpus.collection.corpus.size - 1) ) rc = (rc_lbound, rc_rbound) cpos_list_rbound = rc_rbound @@ -138,15 +88,20 @@ def partial_export_subcorpus(subcorpus, match_id_list, context=25): return {'matches': matches, **lookups} -def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0): - if subcorpus.attrs['size'] == 0: +def export_subcorpus( + subcorpus: Subcorpus, + context: int = 25, + cutoff: float = float('inf'), + offset: int = 0 +) -> Dict: + if subcorpus.size == 0: return {"matches": []} first_match = max(0, offset) - last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1)) + last_match = min((offset + cutoff - 1), (subcorpus.size - 1)) match_boundaries = zip( - list(range(first_match, last_match + 1)), - subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match), - subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match) + range(first_match, last_match + 1), + subcorpus.dump(subcorpus.fields['match'], first_match, last_match), + subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match) ) cpos_set = set() matches = [] @@ -160,14 +115,14 @@ def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0): lc_rbound = match_start - 1 lc = (lc_lbound, lc_rbound) cpos_list_lbound = lc_lbound - if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0: + if match_end == (subcorpus.collection.corpus.size - 1) or context == 0: rc = None cpos_list_rbound = match_end else: rc_lbound = match_end + 1 rc_rbound = min( (match_end + context), - (subcorpus.collection.corpus.attrs['size'] - 1) + (subcorpus.collection.corpus.size - 1) ) rc = (rc_lbound, rc_rbound) cpos_list_rbound = rc_rbound diff --git a/app/corpora/cqi_over_socketio/__init__.py b/app/corpora/cqi_over_socketio/__init__.py deleted file mode 100644 index cd02bedd..00000000 --- a/app/corpora/cqi_over_socketio/__init__.py +++ /dev/null @@ -1,115 +0,0 @@ -from flask import session -from flask_login import current_user -from flask_socketio import ConnectionRefusedError -from threading import Lock -import cqi -from app import db, hashids, socketio -from app.decorators import socketio_login_required -from app.models import Corpus, CorpusStatus - - -''' -This package tunnels the Corpus Query interface (CQi) protocol through -Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event. - -This module only handles the SIO connect/disconnect, which handles the setup -and teardown of necessary ressources for later use. Each CQi function has a -corresponding SIO event. The event handlers are spread across the different -modules within this package. - -Basic concept: -1. A client connects to the SIO namespace and provides the id of a corpus to be - analysed. - 1.1 The analysis session counter of the corpus is incremented. - 1.2 A CQiClient and a (Mutex) Lock belonging to it is created. - 1.3 Wait until the CQP server is running. - 1.4 Connect the CQiClient to the server. - 1.5 Save the CQiClient and the Lock in the session for subsequential use. -2. A client emits an event and may provide a single json object with necessary - arguments for the targeted CQi function. -3. A SIO event handler (decorated with cqi_over_socketio) gets executed. - - The event handler function defines all arguments. Hence the client - is sent as a single json object, the decorator decomposes it to fit - the functions signature. This also includes type checking and proper - use of the lock (acquire/release) mechanism. -4. Wait for more events -5. The client disconnects from the SIO namespace - 1.1 The analysis session counter of the corpus is decremented. - 1.2 The CQiClient and (Mutex) Lock belonging to it are teared down. -''' - - -NAMESPACE = '/corpora/corpus/corpus_analysis' - - -# Import all CQi over Socket.IO event handlers -from .cqi_corpora_corpus_subcorpora import * # noqa -from .cqi_corpora_corpus_structural_attributes import * # noqa -from .cqi_corpora_corpus_positional_attributes import * # noqa -from .cqi_corpora_corpus_alignment_attributes import * # noqa -from .cqi_corpora_corpus import * # noqa -from .cqi_corpora import * # noqa -from .cqi import * # noqa - - -@socketio.on('connect', namespace=NAMESPACE) -@socketio_login_required -def connect(auth): - # the auth variable is used in a hacky way. It contains the corpus id for - # which a corpus analysis session should be started. - corpus_id = hashids.decode(auth['corpus_id']) - corpus = Corpus.query.get(corpus_id) - if corpus is None: - # return {'code': 404, 'msg': 'Not Found'} - raise ConnectionRefusedError('Not Found') - if not (corpus.user == current_user - or current_user.is_following_corpus(corpus) - or current_user.is_administrator()): - # return {'code': 403, 'msg': 'Forbidden'} - raise ConnectionRefusedError('Forbidden') - if corpus.status not in [ - CorpusStatus.BUILT, - CorpusStatus.STARTING_ANALYSIS_SESSION, - CorpusStatus.RUNNING_ANALYSIS_SESSION, - CorpusStatus.CANCELING_ANALYSIS_SESSION - ]: - # return {'code': 424, 'msg': 'Failed Dependency'} - raise ConnectionRefusedError('Failed Dependency') - if corpus.num_analysis_sessions is None: - corpus.num_analysis_sessions = 0 - db.session.commit() - corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1 - db.session.commit() - retry_counter = 20 - while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION: - if retry_counter == 0: - corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 - db.session.commit() - return {'code': 408, 'msg': 'Request Timeout'} - socketio.sleep(3) - retry_counter -= 1 - db.session.refresh(corpus) - cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}') - session['d'] = { - 'corpus_id': corpus_id, - 'cqi_client': cqi_client, - 'cqi_client_lock': Lock(), - } - # return {'code': 200, 'msg': 'OK'} - - -@socketio.on('disconnect', namespace=NAMESPACE) -def disconnect(): - if 'd' not in session: - return - session['d']['cqi_client_lock'].acquire() - try: - session['d']['cqi_client'].disconnect() - except (BrokenPipeError, cqi.errors.CQiException): - pass - session['d']['cqi_client_lock'].release() - corpus = Corpus.query.get(session['d']['corpus_id']) - corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 - db.session.commit() - session.pop('d') - # return {'code': 200, 'msg': 'OK'} diff --git a/app/corpora/cqi_over_socketio/cqi.py b/app/corpora/cqi_over_socketio/cqi.py deleted file mode 100644 index 9d0fbfd6..00000000 --- a/app/corpora/cqi_over_socketio/cqi.py +++ /dev/null @@ -1,43 +0,0 @@ -from socket import gaierror -import cqi -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio - - -@socketio.on('cqi.connect', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_connect(cqi_client: cqi.CQiClient): - try: - cqi_status = cqi_client.connect() - except gaierror as e: - return { - 'code': 500, - 'msg': 'Internal Server Error', - 'payload': {'code': e.args[0], 'desc': e.args[1]} - } - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.disconnect', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_disconnect(cqi_client: cqi.CQiClient): - cqi_status = cqi_client.disconnect() - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.ping', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_ping(cqi_client: cqi.CQiClient): - cqi_status = cqi_client.ping() - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora.py b/app/corpora/cqi_over_socketio/cqi_corpora.py deleted file mode 100644 index 7b73429a..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora.py +++ /dev/null @@ -1,22 +0,0 @@ -import cqi -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio - - -@socketio.on('cqi.corpora.get', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str): - cqi_corpus = cqi_client.corpora.get(corpus_name) - payload = {**cqi_corpus.attrs} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.list', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_list(cqi_client: cqi.CQiClient): - payload = [{**x.attrs} for x in cqi_client.corpora.list()] - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py deleted file mode 100644 index 9a976dd7..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py +++ /dev/null @@ -1,85 +0,0 @@ -from flask import session -import cqi -import math -from app import db, socketio -from app.decorators import socketio_login_required -from app.models import Corpus -from . import NAMESPACE as ns -from .utils import cqi_over_socketio, lookups_by_cpos - - -@socketio.on('cqi.corpora.corpus.drop', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str): - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_status = cqi_corpus.drop() - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.query', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_status = cqi_corpus.query(subcorpus_name, query) - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -############################################################################### -# nopaque specific CQi extensions # -############################################################################### -@socketio.on('cqi.corpora.corpus.update_db', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str): - corpus = Corpus.query.get(session['d']['corpus_id']) - corpus.num_tokens = cqi_client.corpora.get(corpus_name).attrs['size'] - db.session.commit() - - -@socketio.on('cqi.corpora.corpus.paginate', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - # Sanity checks - if ( - per_page < 1 - or page < 1 - or ( - cqi_corpus.attrs['size'] > 0 - and page > math.ceil(cqi_corpus.attrs['size'] / per_page) - ) - ): - return {'code': 416, 'msg': 'Range Not Satisfiable'} - first_cpos = (page - 1) * per_page - last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page) - cpos_list = [*range(first_cpos, last_cpos)] - lookups = lookups_by_cpos(cqi_corpus, cpos_list) - payload = {} - # the items for the current page - payload['items'] = [cpos_list] - # the lookups for the items - payload['lookups'] = lookups - # the total number of items matching the query - payload['total'] = cqi_corpus.attrs['size'] - # the number of items to be displayed on a page. - payload['per_page'] = per_page - # The total number of pages - payload['pages'] = math.ceil(payload['total'] / payload['per_page']) - # the current page number (1 indexed) - payload['page'] = page if payload['pages'] > 0 else None - # True if a previous page exists - payload['has_prev'] = payload['page'] > 1 if payload['page'] else False - # True if a next page exists. - payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa - # Number of the previous page. - payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None - # Number of the next page - payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py deleted file mode 100644 index d6382eaa..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py +++ /dev/null @@ -1,24 +0,0 @@ -import cqi -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio - - -@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa - payload = {**cqi_alignment_attribute.attrs} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()] - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py deleted file mode 100644 index 41d55bb3..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py +++ /dev/null @@ -1,24 +0,0 @@ -import cqi -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio - - -@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa - payload = {**cqi_positional_attribute.attrs} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()] - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py deleted file mode 100644 index 11178906..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py +++ /dev/null @@ -1,24 +0,0 @@ -import cqi -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio - - -@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa - payload = {**cqi_structural_attribute.attrs} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()] - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py deleted file mode 100644 index 578968be..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py +++ /dev/null @@ -1,125 +0,0 @@ -import cqi -import math -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus - - -@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - payload = {**cqi_subcorpus.attrs} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()] - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - cqi_status = cqi_subcorpus.drop() - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient): - return {'code': 501, 'msg': 'Not Implemented'} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient): - return {'code': 501, 'msg': 'Not Implemented'} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient): - return {'code': 501, 'msg': 'Not Implemented'} - - -############################################################################### -# nopaque specific CQi extensions # -############################################################################### -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - # Sanity checks - if ( - per_page < 1 - or page < 1 - or ( - cqi_subcorpus.attrs['size'] > 0 - and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page) - ) - ): - return {'code': 416, 'msg': 'Range Not Satisfiable'} - offset = (page - 1) * per_page - cutoff = per_page - cqi_results_export = export_subcorpus( - cqi_subcorpus, context=context, cutoff=cutoff, offset=offset) - payload = {} - # the items for the current page - payload['items'] = cqi_results_export.pop('matches') - # the lookups for the items - payload['lookups'] = cqi_results_export - # the total number of items matching the query - payload['total'] = cqi_subcorpus.attrs['size'] - # the number of items to be displayed on a page. - payload['per_page'] = per_page - # The total number of pages - payload['pages'] = math.ceil(payload['total'] / payload['per_page']) - # the current page number (1 indexed) - payload['page'] = page if payload['pages'] > 0 else None - # True if a previous page exists - payload['has_prev'] = payload['page'] > 1 if payload['page'] else False - # True if a next page exists. - payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa - # Number of the previous page. - payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None - # Number of the next page - payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context) - return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context) - return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export} diff --git a/app/corpora/json_routes.py b/app/corpora/json_routes.py index 6005fc48..6a3b5f29 100644 --- a/app/corpora/json_routes.py +++ b/app/corpora/json_routes.py @@ -7,6 +7,8 @@ from app.decorators import content_negotiation from app.models import Corpus, CorpusFollowerRole from . import bp from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required +import nltk +from string import punctuation @bp.route('/', methods=['DELETE']) @@ -56,6 +58,18 @@ def build_corpus(corpus_id): } return response_data, 202 +@bp.route('/stopwords') +@content_negotiation(produces='application/json') +def get_stopwords(): + nltk.download('stopwords') + languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"] + stopwords = {} + for language in languages: + stopwords[language] = nltk.corpus.stopwords.words(language) + stopwords['punctuation'] = list(punctuation) + ['—', '|', '–', '“', '„', '--'] + stopwords['user_stopwords'] = [] + response_data = stopwords + return response_data, 202 # @bp.route('//generate-share-link', methods=['POST']) # @corpus_follower_permission_required('MANAGE_FOLLOWERS') diff --git a/app/daemon/corpus_utils.py b/app/daemon/corpus_utils.py index 4d807c14..5b885db7 100644 --- a/app/daemon/corpus_utils.py +++ b/app/daemon/corpus_utils.py @@ -28,19 +28,19 @@ def _create_build_corpus_service(corpus): ''' ## Command ## ''' command = ['bash', '-c'] command.append( - f'mkdir /corpora/data/nopaque_{corpus.id}' + f'mkdir /corpora/data/nopaque-{corpus.hashid.lower()}' ' && ' 'cwb-encode' ' -c utf8' - f' -d /corpora/data/nopaque_{corpus.id}' + f' -d /corpora/data/nopaque-{corpus.hashid.lower()}' ' -f /root/files/corpus.vrt' - f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}' + f' -R /usr/local/share/cwb/registry/nopaque-{corpus.hashid.lower()}' ' -P pos -P lemma -P simple_pos' ' -S ent:0+type -S s:0' ' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title' ' -xsB -9' ' && ' - f'cwb-make -V NOPAQUE_{corpus.id}' + f'cwb-make -V NOPAQUE-{corpus.hashid.upper()}' ) ''' ## Constraints ## ''' constraints = ['node.role==worker'] @@ -149,11 +149,15 @@ def _create_cqpserver_container(corpus): ''' ### Corpus data volume ### ''' data_volume_source = os.path.join(corpus.path, 'cwb', 'data') data_volume_target = '/corpora/data' + # data_volume_source = os.path.join(corpus.path, 'cwb', 'data', f'nopaque_{corpus.id}') + # data_volume_target = f'/corpora/data/nopaque_{corpus.hashid.lower()}' data_volume = f'{data_volume_source}:{data_volume_target}:rw' volumes.append(data_volume) ''' ### Corpus registry volume ### ''' registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry') registry_volume_target = '/usr/local/share/cwb/registry' + # registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry', f'nopaque_{corpus.id}') + # registry_volume_target = f'/usr/local/share/cwb/registry/nopaque_{corpus.hashid.lower()}' registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw' volumes.append(registry_volume) # Check if a cqpserver container already exists. If this is the case, diff --git a/app/jobs/json_routes.py b/app/jobs/json_routes.py index 7bedc726..9f1e1b2f 100644 --- a/app/jobs/json_routes.py +++ b/app/jobs/json_routes.py @@ -42,7 +42,6 @@ def job_log(job_id): with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file: log = log_file.read() response_data = { - 'message': '', 'jobLog': log } return response_data, 200 diff --git a/app/models.py b/app/models.py index a7cc77e9..8121f7a9 100644 --- a/app/models.py +++ b/app/models.py @@ -1607,9 +1607,14 @@ class Corpus(HashidMixin, db.Model): return corpus def build(self): + build_dir = os.path.join(self.path, 'cwb') + shutil.rmtree(build_dir, ignore_errors=True) + os.mkdir(build_dir) + os.mkdir(os.path.join(build_dir, 'data')) + os.mkdir(os.path.join(build_dir, 'registry')) corpus_element = ET.fromstring('\n') for corpus_file in self.files: - normalized_vrt_path = os.path.join(self.path, 'cwb', f'{corpus_file.id}.norm.vrt') + normalized_vrt_path = os.path.join(build_dir, f'{corpus_file.id}.norm.vrt') try: normalize_vrt_file(corpus_file.path, normalized_vrt_path) except: @@ -1636,7 +1641,7 @@ class Corpus(HashidMixin, db.Model): # corpus_element.insert(1, text_element) corpus_element.append(text_element) ET.ElementTree(corpus_element).write( - os.path.join(self.path, 'cwb', 'corpus.vrt'), + os.path.join(build_dir, 'corpus.vrt'), encoding='utf-8' ) self.status = CorpusStatus.SUBMITTED diff --git a/app/static/images/workshops/fgho_sommerschule_2023/add_corpus_file.png b/app/static/images/workshops/fgho_sommerschule_2023/add_corpus_file.png new file mode 100644 index 00000000..e5518b07 Binary files /dev/null and b/app/static/images/workshops/fgho_sommerschule_2023/add_corpus_file.png differ diff --git a/app/static/images/workshops/fgho_sommerschule_2023/corpus.png b/app/static/images/workshops/fgho_sommerschule_2023/corpus.png new file mode 100644 index 00000000..3ebc41b3 Binary files /dev/null and b/app/static/images/workshops/fgho_sommerschule_2023/corpus.png differ diff --git a/app/static/images/workshops/fgho_sommerschule_2023/create_corpus.png b/app/static/images/workshops/fgho_sommerschule_2023/create_corpus.png new file mode 100644 index 00000000..246a28d7 Binary files /dev/null and b/app/static/images/workshops/fgho_sommerschule_2023/create_corpus.png differ diff --git a/app/static/images/workshops/fgho_sommerschule_2023/dashboard.png b/app/static/images/workshops/fgho_sommerschule_2023/dashboard.png new file mode 100644 index 00000000..be70f4d6 Binary files /dev/null and b/app/static/images/workshops/fgho_sommerschule_2023/dashboard.png differ diff --git a/app/static/images/workshops/fgho_sommerschule_2023/empty_corpus.png b/app/static/images/workshops/fgho_sommerschule_2023/empty_corpus.png new file mode 100644 index 00000000..d32cc70d Binary files /dev/null and b/app/static/images/workshops/fgho_sommerschule_2023/empty_corpus.png differ diff --git a/app/static/js/App.js b/app/static/js/App.js index 0741592d..cfcb3a05 100644 --- a/app/static/js/App.js +++ b/app/static/js/App.js @@ -8,19 +8,19 @@ class App { this.socket.on('PATCH', (patch) => {this.onPatch(patch);}); } - getUser(userId, backrefs=true, relationships=true) { + getUser(userId) { if (userId in this.data.promises.getUser) { return this.data.promises.getUser[userId]; } this.data.promises.getUser[userId] = new Promise((resolve, reject) => { - this.socket.emit('GET /users/', userId, backrefs, relationships, (response) => { - if (response.status !== 200) { - reject(response); - return; + this.socket.emit('GET /users/', userId, (response) => { + if (response.status === 200) { + this.data.users[userId] = response.body; + resolve(this.data.users[userId]); + } else { + reject(`[${response.status}] ${response.statusText}`); } - this.data.users[userId] = response.body; - resolve(this.data.users[userId]); }); }); diff --git a/app/static/js/CorpusAnalysis/CQiClient.js b/app/static/js/CorpusAnalysis/CQiClient.js deleted file mode 100644 index 4a3706e3..00000000 --- a/app/static/js/CorpusAnalysis/CQiClient.js +++ /dev/null @@ -1,485 +0,0 @@ -class CQiClient { - constructor(corpusId) { - this.socket = io( - '/corpora/corpus/corpus_analysis', - { - auth: {corpus_id: corpusId}, - transports: ['websocket'], - upgrade: false - } - ); - this.connected = false; - this.corpora = new CQiCorpusCollection(this.socket); - } - - connect() { - return new Promise((resolve, reject) => { - this.socket.emit('cqi.connect', response => { - if (response.code === 200) { - this.connected = true; - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - disconnect() { - return new Promise((resolve, reject) => { - this.socket.emit('cqi.disconnect', response => { - if (response.code === 200) { - this.connected = false; - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - ping() { - return new Promise((resolve, reject) => { - this.socket.emit('cqi.ping', response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } -} - - -class CQiCorpusCollection { - constructor(socket) { - this.socket = socket; - } - - get(corpusName) { - return new Promise((resolve, reject) => { - const args = {corpus_name: corpusName}; - - this.socket.emit('cqi.corpora.get', args, response => { - if (response.code === 200) { - resolve(new CQiCorpus(this.socket, response.payload)); - } else { - reject(response); - } - }); - }); - } - - list() { - return new Promise((resolve, reject) => { - this.socket.emit('cqi.corpora.list', response => { - if (response.code === 200) { - resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, x);})); - } else { - reject(response); - } - }); - }); - } -} - - -class CQiCorpus { - constructor(socket, attrs) { - this.socket = socket; - this.charset = attrs.charset; - this.name = attrs.name; - this.properties = attrs.properties; - this.size = attrs.size; - this.alignmentAttributes = new CQiAlignmentAttributeCollection(this.socket, this); - this.positionalAttributes = new CQiPositionalAttributeCollection(this.socket, this); - this.structuralAttributes = new CQiStructuralAttributeCollection(this.socket, this); - this.subcorpora = new CQiSubcorpusCollection(this.socket, this); - } - - drop() { - return new Promise((resolve, reject) => { - const args = {corpus_name: this.name}; - - this.socket.emit('cqi.corpora.corpus.drop', args, response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - query(subcorpus_name, queryString) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.name, - subcorpus_name: subcorpus_name, - query: queryString - }; - - this.socket.emit('cqi.corpora.corpus.query', args, response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - // nopaque specific CQi extension - paginate(page=1, perPage=20) { - return new Promise((resolve, reject) => { - const args = {corpus_name: this.name, page: page, per_page: perPage}; - - this.socket.emit('cqi.corpora.corpus.paginate', args, response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - updateDb() { - const args = {corpus_name: this.name}; - - this.socket.emit('cqi.corpora.corpus.update_db', args); - } -} - - -class CQiAlignmentAttributeCollection { - constructor(socket, corpus) { - this.corpus = corpus; - this.socket = socket; - } - - get(alignmentAttributeName) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.corpus.name, - alignment_attribute_name: alignmentAttributeName - }; - - this.socket.emit('cqi.corpora.corpus.alignment_attributes.get', args, response => { - if (response.code === 200) { - resolve(new CQiAlignmentAttribute(this.socket, this.corpus, response.payload)); - } else { - reject(response); - } - }); - }); - } - - list() { - return new Promise((resolve, reject) => { - const args = {corpus_name: this.corpus.name}; - - this.socket.emit('cqi.corpus.alignment_attributes.list', args, response => { - if (response.code === 200) { - resolve(response.payload.map(x => {return new CQiAlignmentAttribute(this.socket, this.corpus, x);})); - } else { - reject(response); - } - }); - }); - } -} - - -class CQiAlignmentAttribute { - constructor(socket, corpus, attrs) { - this.socket = socket; - this.corpus = corpus; - this.name = attrs.name; - this.size = attrs.size; - } -} - - -class CQiPositionalAttributeCollection { - constructor(socket, corpus) { - this.corpus = corpus; - this.socket = socket; - } - - get(positionalAttributeName) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.corpus.name, - positional_attribute_name: positionalAttributeName - }; - - this.socket.emit('cqi.corpora.corpus.positional_attributes.get', args, response => { - if (response.code === 200) { - resolve(new CQiPositionalAttribute(this.socket, this.corpus, response.payload)); - } else { - reject(response); - } - }); - }); - } - - list() { - return new Promise((resolve, reject) => { - const args = {corpus_name: this.corpus.name}; - - this.socket.emit('cqi.corpus.positional_attributes.list', args, response => { - if (response.code === 200) { - resolve(response.payload.map(x => {return new CQiPositionalAttribute(this.socket, this.corpus, x);})); - } else { - reject(response); - } - }); - }); - } -} - - -class CQiPositionalAttribute { - constructor(socket, corpus, attrs) { - this.socket = socket; - this.corpus = corpus; - this.lexiconSize = attrs.lexicon_size; - this.name = attrs.name; - this.size = attrs.size; - } -} - - -class CQiStructuralAttributeCollection { - constructor(socket, corpus) { - this.corpus = corpus; - this.socket = socket; - } - - get(structuralAttributeName) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.corpus.name, - structural_attribute_name: structuralAttributeName - }; - - this.socket.emit('cqi.corpora.corpus.structural_attributes.get', args, response => { - if (response.code === 200) { - resolve(new CQiStructuralAttribute(this.socket, this.corpus, response.payload)); - } else { - reject(response); - } - }); - }); - } - - list() { - return new Promise((resolve, reject) => { - const args = {corpus_name: this.corpus.name}; - - this.socket.emit('cqi.corpus.structural_attributes.list', args, response => { - if (response.code === 200) { - resolve(response.payload.map(x => {return new CQiStructuralAttribute(this.socket, this.corpus, x);})); - } else { - reject(response); - } - }); - }); - } -} - - -class CQiStructuralAttribute { - constructor(socket, corpus, attrs) { - this.socket = socket; - this.corpus = corpus; - this.hasValues = attrs.has_values; - this.name = attrs.name; - this.size = attrs.size; - } -} - - -class CQiSubcorpusCollection { - constructor(socket, corpus) { - this.corpus = corpus; - this.socket = socket; - } - - get(subcorpusName) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.corpus.name, - subcorpus_name: subcorpusName - }; - this.socket.emit('cqi.corpora.corpus.subcorpora.get', args, response => { - if (response.code === 200) { - resolve(new CQiSubcorpus(this.socket, this.corpus, response.payload)); - } else { - reject(response); - } - }); - }); - } - - list() { - return new Promise((resolve, reject) => { - const args = {corpus_name: this.corpus.name}; - - this.socket.emit('cqi.corpora.corpus.subcorpora.list', args, response => { - if (response.code === 200) { - resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, this.corpus, x);})); - } else { - reject(response); - } - }); - }); - } -} - - -class CQiSubcorpus { - constructor(socket, corpus, attrs) { - this.socket = socket; - this.corpus = corpus; - this.fields = attrs.fields; - this.name = attrs.name; - this.size = attrs.size; - } - - drop() { - return new Promise((resolve, reject) => { - const args = {corpus_name: this.corpus.name, subcorpus_name: this.name}; - - this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.drop', args, response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - dump(field, first, last) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.corpus.name, - subcorpus_name: this.name, - field: field, - first: first, - last: last - }; - - this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.dump', args, response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - export(context=50) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.corpus.name, - subcorpus_name: this.name, - context: context - }; - - this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.export', args, response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - partial_export(matchIdList, context=50) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.corpus.name, - subcorpus_name: this.name, - match_id_list: matchIdList, - context: context - }; - - this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', args, response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - fdst_1(cutoff, field, attribute) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.corpus.name, - subcorpus_name: this.name, - cutoff: cutoff, - field: field, - attribute: attribute - }; - - this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - fdst_2(cutoff, field1, attribute1, field2, attribute2) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.corpus.name, - subcorpus_name: this.name, - cutoff: cutoff, - field1: field1, - attribute1: attribute1, - field2: field2, - attribute2: attribute2 - }; - - this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } - - // nopaque specific CQi extension - paginate(page=1, perPage=20, context=50) { - return new Promise((resolve, reject) => { - const args = { - corpus_name: this.corpus.name, - subcorpus_name: this.name, - page: page, - per_page: perPage, - context: context - }; - - this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.paginate', args, response => { - if (response.code === 200) { - resolve(response.payload); - } else { - reject(response); - } - }); - }); - } -} diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js index f5cb8712..d6274f32 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js @@ -1,13 +1,15 @@ class CorpusAnalysisApp { constructor(corpusId) { + this.corpusId = corpusId; + this.data = {}; // HTML elements this.elements = { container: document.querySelector('#corpus-analysis-app-container'), + extensionCards: document.querySelector('#corpus-analysis-app-extension-cards'), extensionTabs: document.querySelector('#corpus-analysis-app-extension-tabs'), - initModal: document.querySelector('#corpus-analysis-app-init-modal'), - overview: document.querySelector('#corpus-analysis-app-overview') + initModal: document.querySelector('#corpus-analysis-app-init-modal') }; // Materialize elements this.elements.m = { @@ -17,81 +19,100 @@ class CorpusAnalysisApp { this.extensions = {}; - this.settings = { - corpusId: corpusId - }; + this.settings = {}; } - init() { + async init() { this.disableActionElements(); this.elements.m.initModal.open(); - // Init data - this.data.cQiClient = new CQiClient(this.settings.corpusId); - this.data.cQiClient.connect() - .then(cQiStatus => { - return this.data.cQiClient.corpora.get(`NOPAQUE_${this.settings.corpusId}`); - }) - .then( - cQiCorpus => { - this.data.corpus = {o: cQiCorpus}; - // TODO: Don't do this here - cQiCorpus.updateDb(); - this.enableActionElements(); - for (let extension of Object.values(this.extensions)) {extension.init();} - this.elements.m.initModal.close(); - }, - cQiError => { - let errorsElement = this.elements.initModal.querySelector('.errors'); - let progressElement = this.elements.initModal.querySelector('.progress'); - errorsElement.innerText = JSON.stringify(cQiError); - errorsElement.classList.remove('hide'); - progressElement.classList.add('hide'); - if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) { - app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error'); - } - } - ); - - // Add event listeners - for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) { + try { + // Setup CQi over SocketIO connection and gather data from the CQPServer + const statusTextElement = this.elements.initModal.querySelector('.status-text'); + statusTextElement.innerText = 'Creating CQi over SocketIO client...'; + const cqiClient = new cqi.CQiClient('/cqi_over_sio'); + statusTextElement.innerText += ' Done'; + statusTextElement.innerHTML = 'Waiting for the CQP server...'; + const response = await cqiClient.api.socket.emitWithAck('init', this.corpusId); + if (response.code !== 200) {throw new Error();} + statusTextElement.innerText += ' Done'; + statusTextElement.innerHTML = 'Connecting to the CQP server...'; + await cqiClient.connect('anonymous', ''); + statusTextElement.innerText += ' Done'; + statusTextElement.innerHTML = 'Building and receiving corpus data cache from the server (This may take a while)...'; + const cqiCorpus = await cqiClient.corpora.get(`NOPAQUE-${this.corpusId.toUpperCase()}`); + statusTextElement.innerText += ' Done'; + // TODO: Don't do this hgere + await cqiCorpus.updateDb(); + this.data.cqiClient = cqiClient; + this.data.cqiCorpus = cqiCorpus; + this.data.corpus = {o: cqiCorpus}; // legacy + // Initialize extensions + for (const extension of Object.values(this.extensions)) { + statusTextElement.innerHTML = `Initializing ${extension.name} extension...`; + await extension.init(); + statusTextElement.innerText += ' Done' + } + } catch (error) { + let errorString = ''; + if ('code' in error && error.code !== undefined && error.code !== null) { + errorString += `[${error.code}] `; + } + errorString += `${error.constructor.name}`; + if ('description' in error && error.description !== undefined && error.description !== null) { + errorString += `: ${error.description}`; + } + const errorsElement = this.elements.initModal.querySelector('.errors'); + const progressElement = this.elements.initModal.querySelector('.progress'); + errorsElement.innerText = errorString; + errorsElement.classList.remove('hide'); + progressElement.classList.add('hide'); + return; + } + + for (const extensionSelectorElement of this.elements.extensionCards.querySelectorAll('.extension-selector')) { extensionSelectorElement.addEventListener('click', () => { this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target); }); } + + this.enableActionElements(); + this.elements.m.initModal.close(); } registerExtension(extension) { - if (extension.name in this.extensions) { - console.error(`Can't register extension ${extension.name}: Already registered`); - return; - } + if (extension.name in this.extensions) {return;} this.extensions[extension.name] = extension; - if ('cQiClient' in this.data && this.data.cQiClient.connected) {extension.init();} } disableActionElements() { - let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action'); - for (let actionElement of actionElements) { - if (actionElement.nodeName === 'INPUT') { - actionElement.disabled = true; - } else if (actionElement.nodeName === 'SELECT') { - actionElement.parentNode.querySelector('input.select-dropdown').disabled = true; - } else { - actionElement.classList.add('disabled'); + const actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action'); + for (const actionElement of actionElements) { + switch(actionElement.nodeName) { + case 'INPUT': + actionElement.disabled = true; + break; + case 'SELECT': + actionElement.parentNode.querySelector('input.select-dropdown').disabled = true; + break; + default: + actionElement.classList.add('disabled'); } } } enableActionElements() { - let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action'); - for (let actionElement of actionElements) { - if (actionElement.nodeName === 'INPUT') { - actionElement.disabled = false; - } else if (actionElement.nodeName === 'SELECT') { - actionElement.parentNode.querySelector('input.select-dropdown').disabled = false; - } else { - actionElement.classList.remove('disabled'); + const actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action'); + for (const actionElement of actionElements) { + switch(actionElement.nodeName) { + case 'INPUT': + actionElement.disabled = false; + break; + case 'SELECT': + actionElement.parentNode.querySelector('input.select-dropdown').disabled = false; + break; + default: + actionElement.classList.remove('disabled'); } } } diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisConcordance.js b/app/static/js/CorpusAnalysis/CorpusAnalysisConcordance.js index e434c545..6af78603 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisConcordance.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisConcordance.js @@ -30,63 +30,60 @@ class CorpusAnalysisConcordance { this.app.registerExtension(this); } - init() { + async submitForm() { + this.app.disableActionElements(); + let query = this.elements.form.query.value.trim(); + let subcorpusName = this.elements.form['subcorpus-name'].value; + this.elements.error.innerText = ''; + this.elements.error.classList.add('hide'); + this.elements.progress.classList.remove('hide'); + try { + const subcorpus = {}; + subcorpus.q = query; + subcorpus.selectedItems = new Set(); + await this.data.corpus.o.query(subcorpusName, query); + if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;} + const cqiSubcorpus = await this.data.corpus.o.subcorpora.get(subcorpusName); + subcorpus.o = cqiSubcorpus; + const paginatedSubcorpus = await cqiSubcorpus.paginate(this.settings.context, 1, this.settings.perPage); + subcorpus.p = paginatedSubcorpus; + this.data.subcorpora[subcorpusName] = subcorpus; + this.settings.selectedSubcorpus = subcorpusName; + this.renderSubcorpusList(); + this.renderSubcorpusInfo(); + this.renderSubcorpusActions(); + this.renderSubcorpusItems(); + this.renderSubcorpusPagination(); + this.elements.progress.classList.add('hide'); + } catch (error) { + let errorString = ''; + if ('code' in error) {errorString += `[${error.code}] `;} + errorString += `${error.constructor.name}`; + this.elements.error.innerText = errorString; + this.elements.error.classList.remove('hide'); + app.flash(errorString, 'error'); + this.elements.progress.classList.add('hide'); + } + this.app.enableActionElements(); + } + + async init() { // Init data this.data.corpus = this.app.data.corpus; this.data.subcorpora = {}; // Add event listeners - this.elements.form.addEventListener('submit', event => { + this.elements.form.addEventListener('submit', (event) => { event.preventDefault(); - this.app.disableActionElements(); - let query = this.elements.form.query.value.trim(); - let subcorpusName = this.elements.form['subcorpus-name'].value; - this.elements.error.innerText = ''; - this.elements.error.classList.add('hide'); - this.elements.progress.classList.remove('hide'); - let subcorpus = {}; - this.data.corpus.o.query(subcorpusName, query) - .then(cQiStatus => { - subcorpus.q = query; - subcorpus.selectedItems = new Set(); - if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;} - return this.data.corpus.o.subcorpora.get(subcorpusName); - }) - .then(cQiSubcorpus => { - subcorpus.o = cQiSubcorpus; - return cQiSubcorpus.paginate(1, this.settings.perPage, this.settings.context); - }) - .then( - paginatedSubcorpus => { - subcorpus.p = paginatedSubcorpus; - this.data.subcorpora[subcorpusName] = subcorpus; - this.settings.selectedSubcorpus = subcorpusName; - this.renderSubcorpusList(); - this.renderSubcorpusInfo(); - this.renderSubcorpusActions(); - this.renderSubcorpusItems(); - this.renderSubcorpusPagination(); - this.elements.progress.classList.add('hide'); - this.app.enableActionElements(); - }, - cQiError => { - this.elements.error.innerText = JSON.stringify(cQiError); - this.elements.error.classList.remove('hide'); - if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) { - app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error'); - } - this.elements.progress.classList.add('hide'); - this.app.enableActionElements(); - } - ); + this.submitForm(); }); - this.elements.form.addEventListener('change', event => { + this.elements.form.addEventListener('change', (event) => { if (event.target === this.elements.form['context']) { this.settings.context = parseInt(this.elements.form['context'].value); - this.elements.form.submit.click(); + this.submitForm(); } if (event.target === this.elements.form['per-page']) { this.settings.perPage = parseInt(this.elements.form['per-page'].value); - this.elements.form.submit.click(); + this.submitForm(); } if (event.target === this.elements.form['text-style']) { this.settings.textStyle = parseInt(this.elements.form['text-style'].value); @@ -162,7 +159,7 @@ class CorpusAnalysisConcordance { `.trim(); M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped')); - this.elements.subcorpusActions.querySelector('.subcorpus-export-trigger').addEventListener('click', event => { + this.elements.subcorpusActions.querySelector('.subcorpus-export-trigger').addEventListener('click', (event) => { event.preventDefault(); let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; let modalElementId = Utils.generateElementId('export-subcorpus-modal-'); @@ -219,7 +216,7 @@ class CorpusAnalysisConcordance { } } ); - exportButton.addEventListener('click', event => { + exportButton.addEventListener('click', (event) => { event.preventDefault(); this.app.disableActionElements(); this.elements.progress.classList.remove('hide'); @@ -236,12 +233,12 @@ class CorpusAnalysisConcordance { app.flash('No matches selected', 'error'); return; } - promise = subcorpus.o.partial_export([...subcorpus.selectedItems], 50); + promise = subcorpus.o.partialExport([...subcorpus.selectedItems], 50); } else { promise = subcorpus.o.export(50); } promise.then( - data => { + (data) => { let blob; if (exportFormat === 'csv') { let csvContent = 'sep=,\r\n'; @@ -287,11 +284,11 @@ class CorpusAnalysisConcordance { }); modal.open(); }); - this.elements.subcorpusActions.querySelector('.subcorpus-delete-trigger').addEventListener('click', event => { + this.elements.subcorpusActions.querySelector('.subcorpus-delete-trigger').addEventListener('click', (event) => { event.preventDefault(); let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; subcorpus.o.drop().then( - cQiStatus => { + (cQiStatus) => { app.flash(`${subcorpus.o.name} deleted`, 'corpus'); delete this.data.subcorpora[subcorpus.o.name]; this.settings.selectedSubcorpus = undefined; @@ -312,8 +309,9 @@ class CorpusAnalysisConcordance { this.clearSubcorpusPagination(); } }, - cQiError => { - app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error'); + (cqiError) => { + let errorString = `${cqiError.code}: ${cqiError.constructor.name}`; + app.flash(errorString, 'error'); } ); }); @@ -362,7 +360,7 @@ class CorpusAnalysisConcordance { this.setTextStyle(); this.setTokenRepresentation(); for (let gotoReaderTriggerElement of this.elements.subcorpusItems.querySelectorAll('.goto-reader-trigger')) { - gotoReaderTriggerElement.addEventListener('click', event => { + gotoReaderTriggerElement.addEventListener('click', (event) => { event.preventDefault(); let corpusAnalysisReader = this.app.extensions.Reader; let itemId = parseInt(gotoReaderTriggerElement.closest('.item').dataset.id); @@ -384,7 +382,7 @@ class CorpusAnalysisConcordance { }); } for (let selectTriggerElement of this.elements.subcorpusItems.querySelectorAll('.select-trigger')) { - selectTriggerElement.addEventListener('click', event => { + selectTriggerElement.addEventListener('click', (event) => { event.preventDefault(); let itemElement = selectTriggerElement.closest('.item'); let itemId = parseInt(itemElement.dataset.id); @@ -446,14 +444,14 @@ class CorpusAnalysisConcordance { `.trim(); for (let paginationTriggerElement of this.elements.subcorpusPagination.querySelectorAll('.pagination-trigger[data-target]')) { - paginationTriggerElement.addEventListener('click', event => { + paginationTriggerElement.addEventListener('click', (event) => { event.preventDefault(); this.app.disableActionElements(); this.elements.progress.classList.remove('hide'); let page = parseInt(paginationTriggerElement.dataset.target); - subcorpus.o.paginate(page, this.settings.perPage, this.settings.context) + subcorpus.o.paginate(this.settings.context, page, this.settings.perPage) .then( - paginatedSubcorpus => { + (paginatedSubcorpus) => { subcorpus.p = paginatedSubcorpus; this.renderSubcorpusItems(); this.renderSubcorpusPagination(); diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisReader.js b/app/static/js/CorpusAnalysis/CorpusAnalysisReader.js index acb36fa1..eb63cae9 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisReader.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisReader.js @@ -19,47 +19,52 @@ class CorpusAnalysisReader { this.settings = { perPage: parseInt(this.elements.form['per-page'].value), textStyle: parseInt(this.elements.form['text-style'].value), - tokenRepresentation: this.elements.form['token-representation'].value + tokenRepresentation: this.elements.form['token-representation'].value, + pagination: { + innerWindow: 5, + outerWindow: 1 + } } this.app.registerExtension(this); } - init() { + async submitForm() { + this.app.disableActionElements(); + this.elements.error.innerText = ''; + this.elements.error.classList.add('hide'); + this.elements.progress.classList.remove('hide'); + try { + const paginatedCorpus = await this.data.corpus.o.paginate(1, this.settings.perPage); + this.data.corpus.p = paginatedCorpus; + this.renderCorpus(); + this.renderCorpusPagination(); + this.elements.progress.classList.add('hide'); + } catch (error) { + let errorString = ''; + if ('code' in error) {errorString += `[${error.code}] `;} + errorString += `${error.constructor.name}`; + if ('description' in error) {errorString += `: ${error.description}`;} + this.elements.error.innerText = errorString; + this.elements.error.classList.remove('hide'); + app.flash(errorString, 'error'); + this.elements.progress.classList.add('hide'); + } + this.app.enableActionElements(); + } + + async init() { // Init data this.data.corpus = this.app.data.corpus; - this.data.subcorpora = {}; // Add event listeners this.elements.form.addEventListener('submit', (event) => { event.preventDefault(); - this.app.disableActionElements(); - this.elements.error.innerText = ''; - this.elements.error.classList.add('hide'); - this.elements.progress.classList.remove('hide'); - this.data.corpus.o.paginate(1, this.settings.perPage) - .then( - paginatedCorpus => { - this.data.corpus.p = paginatedCorpus; - this.renderCorpus(); - this.renderCorpusPagination(); - this.elements.progress.classList.add('hide'); - this.app.enableActionElements(); - }, - error => { - this.elements.error.innerText = JSON.stringify(error); - this.elements.error.classList.remove('hide'); - if ('payload' in error && 'code' in error.payload && 'msg' in error.payload) { - app.flash(`${error.payload.code}: ${error.payload.msg}`, 'error'); - } - this.elements.progress.classList.add('hide'); - this.app.enableActionElements(); - } - ); + this.submitForm(); }); - this.elements.form.addEventListener('change', event => { + this.elements.form.addEventListener('change', (event) => { if (event.target === this.elements.form['per-page']) { this.settings.perPage = parseInt(this.elements.form['per-page'].value); - this.elements.form.submit.click(); + this.submitForm(); } if (event.target === this.elements.form['text-style']) { this.settings.textStyle = parseInt(this.elements.form['text-style'].value); @@ -71,7 +76,7 @@ class CorpusAnalysisReader { } }); // Load initial data - this.elements.form.submit.click(); + await this.submitForm(); } clearCorpus() { @@ -144,7 +149,7 @@ class CorpusAnalysisReader { } // render page buttons (5 before and 5 after current page) - for (let i = this.data.corpus.p.page -5; i <= this.data.corpus.p.page; i++) { + for (let i = this.data.corpus.p.page - this.settings.pagination.innerWindow; i <= this.data.corpus.p.page; i++) { if (i <= 0) {continue;} pageElement = Utils.HTMLToElement( ` @@ -155,7 +160,7 @@ class CorpusAnalysisReader { ); this.elements.corpusPagination.appendChild(pageElement); }; - for (let i = this.data.corpus.p.page +1; i <= this.data.corpus.p.page +5; i++) { + for (let i = this.data.corpus.p.page +1; i <= this.data.corpus.p.page + this.settings.pagination.innerWindow; i++) { if (i > this.data.corpus.p.pages) {break;} pageElement = Utils.HTMLToElement( ` @@ -203,7 +208,7 @@ class CorpusAnalysisReader { this.elements.corpusPagination.appendChild(pageElement); for (let paginateTriggerElement of this.elements.corpusPagination.querySelectorAll('.pagination-trigger[data-target]')) { - paginateTriggerElement.addEventListener('click', event => { + paginateTriggerElement.addEventListener('click', (event) => { event.preventDefault(); let page = parseInt(paginateTriggerElement.dataset.target); this.page(page); @@ -247,7 +252,7 @@ class CorpusAnalysisReader { this.elements.progress.classList.remove('hide'); this.data.corpus.o.paginate(pageNum, this.settings.perPage) .then( - paginatedCorpus => { + (paginatedCorpus) => { this.data.corpus.p = paginatedCorpus; this.renderCorpus(); this.renderCorpusPagination(); diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js b/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js new file mode 100644 index 00000000..e51a3534 --- /dev/null +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js @@ -0,0 +1,443 @@ +class CorpusAnalysisStaticVisualization { + name = 'Static Visualization (beta)'; + + constructor(app) { + this.app = app; + this.data = { + stopwords: undefined, + originalStopwords: {}, + stopwordCache: {}, + promises: {getStopwords: undefined}, + tokenSet: new Set() + }; + + this.app.registerExtension(this); + } + + init() { + // Init data + this.data.corpus = this.app.data.corpus; + this.renderGeneralCorpusInfo(); + this.renderTextInfoList(); + this.renderTextProportionsGraphic(); + this.renderTokenList(); + // this.renderFrequenciesGraphic(); + + // Add event listeners + let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal'); + let frequenciesStopwordSettingModalButton = document.querySelector('#frequencies-stopwords-setting-modal-button'); + frequenciesStopwordSettingModalButton.addEventListener('click', () => { + this.data.stopwordCache = structuredClone(this.data.stopwords); + this.renderStopwordSettingsModal(this.data.stopwords); + M.Modal.init(frequenciesStopwordSettingModal, {dismissible: false}); + }); + + let textProportionsGraphModeButtons = document.querySelectorAll('.text-proportions-graph-mode-button'); + textProportionsGraphModeButtons.forEach(graphModeButton => { + graphModeButton.addEventListener('click', (event) => { + textProportionsGraphModeButtons.forEach(btn => { + btn.classList.remove('disabled'); + }); + event.target.closest('.text-proportions-graph-mode-button').classList.add('disabled'); + this.renderTextProportionsGraphic(); + }); + }); + + let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); + let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); + frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { + frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML; + this.renderTokenList(); + }); + + let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button'); + frequenciesGraphModeButtons.forEach(graphModeButton => { + graphModeButton.addEventListener('click', (event) => { + frequenciesGraphModeButtons.forEach(btn => { + btn.classList.remove('disabled'); + }); + event.target.closest('.frequencies-graph-mode-button').classList.add('disabled'); + this.renderFrequenciesGraphic(this.data.tokenSet); + }); + }); + + for (let actionButton of document.querySelectorAll('.frequencies-stopword-setting-modal-action-buttons')) { + actionButton.addEventListener('click', (event) => { + let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action; + if (action === 'submit') { + this.renderTokenList(); + } else if (action === 'cancel') { + this.data.stopwords = structuredClone(this.data.stopwordCache); + } + }); + } + } + + getStopwords() { + this.data.promises.getStopwords = new Promise((resolve, reject) => { + Requests.corpora.entity.getStopwords() + .then((response) => { + response.json() + .then((json) => { + this.data.originalStopwords = structuredClone(json); + this.data.stopwords = structuredClone(json); + resolve(this.data.stopwords); + }) + .catch((error) => { + reject(error); + }); + }); + }); + return this.data.promises.getStopwords; + } + + renderGeneralCorpusInfo() { + let corpusData = this.data.corpus.o.staticData; + document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.counts.token; + document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.counts.s; + document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length; + document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length; + document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length; + document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length; + } + + renderTextInfoList() { + let corpusData = this.data.corpus.o.staticData; + let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list'); + let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement); + let texts = corpusData.s_attrs.text.lexicon; + let textData = []; + for (let i = 0; i < Object.entries(texts).length; i++) { + let resource = { + title: corpusData.values.s_attrs.text[i].title, + publishing_year: corpusData.values.s_attrs.text[i].publishing_year, + num_tokens: corpusData.s_attrs.text.lexicon[i].counts.token, + num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s, + num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length, + num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length, + num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length, + num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length + }; + + textData.push(resource); + } + + corpusTextInfoList.add(textData); + + let textCountChipElement = document.querySelector('.text-count-chip'); + textCountChipElement.innerHTML = `Text count: ${corpusData.corpus.counts.text}`; + } + + renderTextProportionsGraphic() { + let corpusData = this.data.corpus.o.staticData; + let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic'); + let texts = Object.entries(corpusData.s_attrs.text.lexicon); + let graphtype = document.querySelector('.text-proportions-graph-mode-button.disabled').dataset.graphType; + let textProportionsTitleElement = document.querySelector('#text-proportions-title-element'); + + if (graphtype === 'bar') { + textProportionsTitleElement.innerHTML = 'Bounds'; + } else if (graphtype === 'pie') { + textProportionsTitleElement.innerHTML = 'Proportions'; + } + + let graphData = this.createTextProportionsGraphData(texts, graphtype); + let graphLayout = { + barmode: graphtype === 'bar' ? 'relative' : '', + type: graphtype, + showgrid: false, + height: 447, + margin: { + l: 10, + r: 10, + b: graphtype === 'bar' ? 80 : 10, + t: graphtype === 'bar' ? 80 : 10, + }, + legend: { + "orientation": "h", + font: { + size: 10 + } + }, + xaxis: { + rangemode: 'nonnegative', + autorange: true + }, + yaxis: { + autorange: true, + showticklabels: false + } + }; + let config = { + responsive: true, + modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'], + displaylogo: false + }; + + Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config); + } + + createTextProportionsGraphData(texts, graphtype) { + let corpusData = this.data.corpus.o.staticData; + let graphData = []; + switch (graphtype) { + case 'bar': + for (let text of texts) { + let textData = { + type: 'bar', + orientation: 'h', + x: [text[1].bounds[1] - text[1].bounds[0]], + y: [0.5], + text: [`${text[1].bounds[0]} - ${text[1].bounds[1]}`], + name: `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`, + hovertemplate: `${text[1].bounds[0]} - ${text[1].bounds[1]}`, + }; + graphData.push(textData); + } + break; + default: + graphData = [ + { + values: texts.map(text => text[1].counts.token), + labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), + type: graphtype + } + ]; + break; + } + return graphData; + } + + async renderTokenList() { + let corpusTokenListElement = document.querySelector('.corpus-token-list'); + let corpusTokenList = new CorpusTokenList(corpusTokenListElement); + let filteredData = this.filterData(); + let stopwords = this.data.stopwords; + if (this.data.stopwords === undefined) { + stopwords = await this.getStopwords(); + } + stopwords = Object.values(stopwords).flat(); + let mostFrequent = Object.entries(filteredData) + .sort((a, b) => b[1].count - a[1].count) + .filter(item => !stopwords.includes(item[0].toLowerCase())) + .slice(0, 4) + .map(item => item[0]) + + let tokenData = []; + Object.entries(filteredData).forEach(item => { + let resource = { + term: item[0], + count: item[1].count, + mostFrequent: mostFrequent.includes(item[0]) + }; + if (!Object.values(stopwords).includes(resource.term)) { + tokenData.push(resource); + } + }); + corpusTokenList.add(tokenData); + } + + filterData() { + let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); + let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); + let corpusData = this.data.corpus.o.staticData; + let filteredData = {}; + + for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) { + let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase(); + let count = corpusData.corpus.freqs[tokenCategory][i]; + + if (filteredData[term]) { + filteredData[term].count += count; + filteredData[term].originalIds.push(i); + } else { + filteredData[term] = { + count: count, + originalIds: [i] + }; + } + } + return filteredData; + } + + + renderFrequenciesGraphic(tokenSet) { + this.data.tokenSet = tokenSet; + let corpusData = this.data.corpus.o.staticData; + let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); + let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); + let texts = Object.entries(corpusData.s_attrs.text.lexicon); + let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType; + let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); + + let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet); + let graphLayout = { + barmode: graphtype === 'bar' ? 'stack' : '', + yaxis: { + showticklabels: graphtype === 'markers' ? false : true + }, + height: 627, + margin: { + l: 33 + } + }; + let config = { + responsive: true, + modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'], + displaylogo: false + }; + Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config); + } + + createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) { + let corpusData = this.data.corpus.o.staticData; + let graphData = []; + let filteredData = this.filterData(); + switch (graphtype) { + case 'markers': + for (let item of tokenSet) { + let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`); + let tokenCountPerText = []; + for (let originalId of filteredData[item].originalIds) { + for (let i = 0; i < texts.length; i++) { + tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0); + } + } + let data = { + x: textTitles, + y: texts.map(text => item), + name: item, + text: texts.map(text => `${item}
${tokenCountPerText || 0}`), + mode: 'markers', + marker: { + size: tokenCountPerText, + sizeref: 0.4 + } + }; + graphData.push(data); + } + break; + default: + for (let item of tokenSet) { + let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`); + let tokenCountPerText = []; + for (let originalId of filteredData[item].originalIds) { + for (let i = 0; i < texts.length; i++) { + tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0); + } + } + let data = { + x: textTitles, + y: tokenCountPerText, + name: item, + type: graphtype + }; + graphData.push(data); + } + break; + } + return graphData; + } + + renderStopwordSettingsModal(stopwords) { + let stopwordInputField = document.querySelector('#stopword-input-field'); + let userStopwordListContainer = document.querySelector('#user-stopword-list-container'); + let stopwordLanguageSelection = document.querySelector('#stopword-language-selection'); + let stopwordLanguageChipList = document.querySelector('#stopword-language-chip-list'); + let deleteLanguageStopwordListEntriesButton = document.querySelector('#delete-language-stopword-list-entries-button'); + let resetLanguageStopwordListEntriesButton = document.querySelector('#reset-language-stopword-list-entries-button'); + + stopwordLanguageChipList.innerHTML = ''; + userStopwordListContainer.innerHTML = ''; + stopwordInputField.value = ''; + + // Render stopword language selection. Set english as default language. Filter out user_stopwords. + if (stopwordLanguageSelection.children.length === 0) { + Object.keys(stopwords).forEach(language => { + if (language !== 'user_stopwords') { + let optionElement = Utils.HTMLToElement(``); + stopwordLanguageSelection.appendChild(optionElement); + } + }); + } + + // Render user stopwords over input field. + if (this.data.stopwords['user_stopwords'].length > 0) { + for (let word of this.data.stopwords['user_stopwords']) { + let chipElement = Utils.HTMLToElement(`
${word}close
`); + chipElement.addEventListener('click', (event) => { + let removedListItem = event.target.closest('.chip').firstChild.textContent; + this.data.stopwords['user_stopwords'] = structuredClone(this.data.stopwords['user_stopwords'].filter(item => item !== removedListItem)); + }); + userStopwordListContainer.appendChild(chipElement); + } + } + + // Render english stopwords as default ... + let selectedLanguage = document.querySelector('#stopword-language-selection').value; + this.renderStopwordLanguageChipList(selectedLanguage, stopwords[selectedLanguage]); + + // ... or render selected language stopwords. + stopwordLanguageSelection.addEventListener('change', (event) => { + this.renderStopwordLanguageChipList(event.target.value, stopwords[event.target.value]); + }); + + // Eventlistener for deleting all stopwords of a language. + deleteLanguageStopwordListEntriesButton.addEventListener('click', (event) => { + let selectedLanguage = stopwordLanguageSelection.value; + this.data.stopwords[selectedLanguage] = []; + stopwordLanguageChipList.innerHTML = ''; + this.buttonRendering(); + }); + + // Eventlistener for resetting all stopwords of a language to the original stopwords. + resetLanguageStopwordListEntriesButton.addEventListener('click', () => { + let selectedLanguage = stopwordLanguageSelection.value; + this.data.stopwords[selectedLanguage] = structuredClone(this.data.originalStopwords[selectedLanguage]); + this.renderStopwordLanguageChipList(selectedLanguage, this.data.stopwords[selectedLanguage]); + }); + + // Initialize Materialize components. + M.Chips.init( + stopwordInputField, + { + placeholder: 'Add stopwords', + onChipAdd: (event) => { + for (let word of event[0].M_Chips.chipsData) { + if (!this.data.stopwords['user_stopwords'].includes(word.tag.toLowerCase())) { + this.data.stopwords['user_stopwords'].push(word.tag.toLowerCase()); + } + } + } + } + ); + M.FormSelect.init(stopwordLanguageSelection); + + } + + buttonRendering() { + let deleteLanguageStopwordListEntriesButton = document.querySelector('#delete-language-stopword-list-entries-button'); + let resetLanguageStopwordListEntriesButton = document.querySelector('#reset-language-stopword-list-entries-button'); + let selectedLanguage = document.querySelector('#stopword-language-selection').value; + let stopwordLength = this.data.stopwords[selectedLanguage].length; + let originalStopwordListLength = this.data.originalStopwords[selectedLanguage].length; + + deleteLanguageStopwordListEntriesButton.classList.toggle('disabled', stopwordLength === 0); + resetLanguageStopwordListEntriesButton.classList.toggle('disabled', stopwordLength === originalStopwordListLength); + } + + renderStopwordLanguageChipList(language, stopwords) { + let stopwordLanguageChipList = document.querySelector('#stopword-language-chip-list'); + stopwordLanguageChipList.innerHTML = ''; + for (let word of stopwords) { + let chipElement = Utils.HTMLToElement(`
${word}close
`); + chipElement.addEventListener('click', (event) => { + let removedListItem = event.target.closest('.chip').firstChild.textContent; + this.data.stopwords[language] = structuredClone(this.data.stopwords[language].filter(item => item !== removedListItem)); + this.buttonRendering(); + }); + stopwordLanguageChipList.appendChild(chipElement); + } + this.buttonRendering(); + } +} diff --git a/app/static/js/CorpusAnalysis/QueryBuilder.js b/app/static/js/CorpusAnalysis/QueryBuilder.js index 6b6aa4c5..9163b4bc 100644 --- a/app/static/js/CorpusAnalysis/QueryBuilder.js +++ b/app/static/js/CorpusAnalysis/QueryBuilder.js @@ -162,9 +162,21 @@ class ConcordanceQueryBuilder { this.elements.or.addEventListener('click', () => {this.orHandler();}); this.elements.and.addEventListener('click', () => {this.andHandler();}); - //#endregion Token Attribute Event Listeners - + let selectInstances = this.elements.concordanceQueryBuilder.querySelectorAll('select'); + M.FormSelect.init( + selectInstances, + { + dropdownOptions: { + alignment: 'bottom', + coverTrigger: false + } + } + ) + let dropdownContents = this.elements.concordanceQueryBuilder.querySelectorAll('.dropdown-content'); + dropdownContents.forEach((dropdownContent) => { + dropdownContent.style.paddingBottom = '15px'; + }); } @@ -182,6 +194,7 @@ class ConcordanceQueryBuilder { showPositionalAttrArea() { this.elements.positionalAttrArea.classList.remove('hide'); + this.elements.structuralAttrArea.classList.add('hide'); this.wordBuilder(); this.elements.tokenQueryFilled = false; @@ -195,6 +208,7 @@ class ConcordanceQueryBuilder { } queryChipFactory(dataType, prettyQueryText, queryText) { + this.elements.counter++; window.location.href = '#query-container'; queryText = Utils.escape(queryText); prettyQueryText = Utils.escape(prettyQueryText); @@ -274,9 +288,9 @@ class ConcordanceQueryBuilder { queryPreviewBuilder() { this.elements.yourQueryContent = []; for (let element of this.elements.yourQuery.childNodes) { - let queryElement = decodeURI(element.dataset.query); - queryElement = Utils.escape(queryElement); - if (queryElement !== 'undefined') { + let queryElement = element.dataset.query; + if (queryElement !== undefined) { + queryElement = Utils.escape(queryElement); this.elements.yourQueryContent.push(queryElement); } } @@ -306,7 +320,7 @@ class ConcordanceQueryBuilder { this.validateValue(); if (this.elements.valueValidator) { for (let element of this.elements.yourQuery.childNodes) { - let queryElement = decodeURI(element.dataset.query); + let queryElement = element.dataset.query; if (queryElement !== 'undefined') { this.elements.yourQueryContent.push(queryElement); } @@ -632,8 +646,11 @@ class ConcordanceQueryBuilder { englishPosHandler() { this.hideEverything(); this.elements.englishPosBuilder.classList.remove('hide'); - // this.elements.incidenceModifiersButton.classList.remove('hide'); + this.elements.incidenceModifiersButton.classList.remove('hide'); this.elements.conditionContainer.classList.remove('hide'); + this.elements.incidenceModifiersButton.firstElementChild.classList.remove('disabled'); + this.elements.or.classList.remove('disabled'); + this.elements.and.classList.remove('disabled'); // Resets materialize select dropdown let selectInstance = M.FormSelect.getInstance(this.elements.englishPos); @@ -644,8 +661,11 @@ class ConcordanceQueryBuilder { germanPosHandler() { this.hideEverything(); this.elements.germanPosBuilder.classList.remove('hide'); - // this.elements.incidenceModifiersButton.classList.remove('hide'); + this.elements.incidenceModifiersButton.classList.remove('hide'); this.elements.conditionContainer.classList.remove('hide'); + this.elements.incidenceModifiersButton.firstElementChild.classList.remove('disabled'); + this.elements.or.classList.remove('disabled'); + this.elements.and.classList.remove('disabled'); // Resets materialize select dropdown let selectInstance = M.FormSelect.getInstance(this.elements.germanPos); @@ -656,14 +676,27 @@ class ConcordanceQueryBuilder { simplePosBuilder() { this.hideEverything(); this.elements.simplePosBuilder.classList.remove('hide'); - // this.elements.incidenceModifiersButton.classList.remove('hide'); + this.elements.incidenceModifiersButton.classList.remove('hide'); this.elements.conditionContainer.classList.remove('hide'); this.elements.simplePos.selectedIndex = 0; + this.elements.incidenceModifiersButton.firstElementChild.classList.remove('disabled'); + this.elements.or.classList.remove('disabled'); + this.elements.and.classList.remove('disabled'); // Resets materialize select dropdown let selectInstance = M.FormSelect.getInstance(this.elements.simplePos); selectInstance.input.value = 'simple_pos tagset'; this.elements.simplePos.value = 'default'; + M.FormSelect.init( + selectInstance, + { + dropdownOptions: { + direction: 'bottom', + coverTrigger: false + } + } + ) + } emptyTokenHandler() { @@ -671,6 +704,8 @@ class ConcordanceQueryBuilder { this.elements.tokenQueryFilled = true; this.hideEverything(); this.elements.incidenceModifiersButton.classList.remove('hide'); + this.elements.incidenceModifiersButton.firstElementChild.classList.remove('disabled'); + } //#endregion Dropdown Select Handler @@ -686,7 +721,7 @@ class ConcordanceQueryBuilder { } if (elem === this.elements.optionGroup) { - input.value += '( option1 | option2 )'; + input.value += '(option1|option2)'; let firstIndex = input.value.indexOf('option1'); let lastIndex = firstIndex + 'option1'.length; input.focus(); @@ -694,6 +729,7 @@ class ConcordanceQueryBuilder { } else if (elem === this.elements.wildcardChar) { input.value += '.'; } + this.inputFieldHandler(); } nSubmitHandler() { @@ -807,8 +843,7 @@ class ConcordanceQueryBuilder { } else { input = this.elements.lemmaInput; } - - input.value += ' ' + elem.dataset.token; + input.value += elem.dataset.token; } } diff --git a/app/static/js/Requests/Requests.js b/app/static/js/Requests/Requests.js index 0504d8a0..7282f0f7 100644 --- a/app/static/js/Requests/Requests.js +++ b/app/static/js/Requests/Requests.js @@ -22,9 +22,11 @@ Requests.JSONfetch = (input, init={}) => { response.json() .then( (json) => { - let message = json.message || json; + let message = json.message; let category = json.category || 'message'; - app.flash(message, category); + if (message) { + app.flash(message, category); + } }, (error) => { app.flash(`[${response.status}]: ${response.statusText}`, 'error'); diff --git a/app/static/js/Requests/corpora/corpora.js b/app/static/js/Requests/corpora/corpora.js index 55f6b899..3118a153 100644 --- a/app/static/js/Requests/corpora/corpora.js +++ b/app/static/js/Requests/corpora/corpora.js @@ -31,6 +31,14 @@ Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => { return Requests.JSONfetch(input, init); }; +Requests.corpora.entity.getStopwords = () => { + let input = `/corpora/stopwords`; + let init = { + method: 'GET' + }; + return Requests.JSONfetch(input, init); +}; + Requests.corpora.entity.isPublic = {}; Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => { @@ -43,4 +51,3 @@ Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => { }; - diff --git a/app/static/js/ResourceLists/AdminUserList.js b/app/static/js/ResourceLists/AdminUserList.js index 0307bbdc..0b8f0c16 100644 --- a/app/static/js/ResourceLists/AdminUserList.js +++ b/app/static/js/ResourceLists/AdminUserList.js @@ -91,8 +91,7 @@ class AdminUserList extends ResourceList { let listAction = listActionElement === null ? 'view' : listActionElement.dataset.listAction; switch (listAction) { case 'delete': { - console.log('delete', itemId); - Utils.deleteUserRequest(itemId); + Requests.users.entity.delete(itemId); if (itemId === currentUserId) {window.location.href = '/';} break; } diff --git a/app/static/js/ResourceLists/CorpusTextInfoList.js b/app/static/js/ResourceLists/CorpusTextInfoList.js new file mode 100644 index 00000000..f1545d70 --- /dev/null +++ b/app/static/js/ResourceLists/CorpusTextInfoList.js @@ -0,0 +1,112 @@ +class CorpusTextInfoList extends ResourceList { + + static autoInit() { + for (let corpusTextInfoListElement of document.querySelectorAll('.corpus-text-info-list:not(.no-autoinit)')) { + new CorpusTextInfoList(corpusTextInfoListElement); + } + } + + static defaultOptions = { + page: 5 + }; + + constructor(listContainerElement, options = {}) { + let _options = Utils.mergeObjectsDeep( + CorpusTextInfoList.defaultOptions, + options + ); + super(listContainerElement, _options); + this.isInitialized = false; + let sortElements = this.listContainerElement.querySelectorAll('.sort'); + sortElements.forEach((sortElement) => { + sortElement.addEventListener('click', (event) => {this.renderSortElement(sortElement)}); + }); + } + + get item() { + return (values) => { + return ` + + () + + + + + + + + `.trim(); + } + } + + get valueNames() { + return [ + 'title', + 'publishing_year', + 'num_tokens', + 'num_sentences', + 'num_unique_words', + 'num_unique_lemmas', + 'num_unique_pos', + 'num_unique_simple_pos' + ]; + } + + initListContainerElement() { + if (!this.listContainerElement.hasAttribute('id')) { + this.listContainerElement.id = Utils.generateElementId('corpus-file-list-'); + } + let listSearchElementId = Utils.generateElementId(`${this.listContainerElement.id}-search-`); + this.listContainerElement.innerHTML = ` +
+ search + + +
+ + + + + + + + + + + + + +
Textarrow_drop_downTokensarrow_drop_downSentencesarrow_drop_downUnique wordsarrow_drop_downUnique lemmasarrow_drop_downUnique posarrow_drop_downUnique simple posarrow_drop_down
+
    + `.trim(); + } + + mapResourceToValue(corpusTextData) { + return { + title: corpusTextData.title, + publishing_year: corpusTextData.publishing_year, + num_tokens: corpusTextData.num_tokens, + num_sentences: corpusTextData.num_sentences, + num_unique_words: corpusTextData.num_unique_words, + num_unique_lemmas: corpusTextData.num_unique_lemmas, + num_unique_pos: corpusTextData.num_unique_pos, + num_unique_simple_pos: corpusTextData.num_unique_simple_pos + }; + } + + sort() { + this.listjs.sort('title'); + } + + renderSortElement(clickedSortElement) { + this.listContainerElement.querySelectorAll('.sort').forEach((sortElement) => { + if (sortElement !== clickedSortElement) { + sortElement.classList.remove('asc', 'desc'); + sortElement.style.color = 'black'; + sortElement.innerHTML = 'arrow_drop_down'; + }; + }); + clickedSortElement.style.color = '#aa9cc9'; + clickedSortElement.innerHTML = clickedSortElement.classList.contains('asc') ? 'arrow_drop_down' : 'arrow_drop_up'; + } +} diff --git a/app/static/js/ResourceLists/CorpusTokenList.js b/app/static/js/ResourceLists/CorpusTokenList.js new file mode 100644 index 00000000..cc16692b --- /dev/null +++ b/app/static/js/ResourceLists/CorpusTokenList.js @@ -0,0 +1,141 @@ +class CorpusTokenList extends ResourceList { + static autoInit() { + for (let corpusTokenListElement of document.querySelectorAll('.corpus-token-list:not(.no-autoinit)')) { + new CorpusTokenList(corpusTokenListElement); + } + } + + static defaultOptions = { + page: 7 + }; + + constructor(listContainerElement, options = {}) { + let _options = Utils.mergeObjectsDeep( + CorpusTokenList.defaultOptions, + options + ); + super(listContainerElement, _options); + this.listjs.list.addEventListener('click', (event) => {this.onClick(event)}); + this.selectedItemTerms = new Set(); + this.listjs.on('sortComplete', () => { + let listItems = Array.from(this.listjs.items).filter(item => item.elm); + for (let item of listItems) { + let termElement = item.elm.querySelector('.term'); + let mostFrequent = item.elm.dataset.mostfrequent === 'true'; + if (mostFrequent) { + this.selectedItemTerms.add(termElement.textContent); + } + } + corpusAnalysisApp.extensions['Static Visualization (beta)'].renderFrequenciesGraphic(this.selectedItemTerms); + }); + + let tokenListResetButtonElement = this.listContainerElement.querySelector('#token-list-reset-button'); + tokenListResetButtonElement.addEventListener('click', () => { + this.selectedItemTerms.clear(); + let listItems = Array.from(this.listjs.items).filter(item => item.elm); + for (let item of listItems) { + let termElement = item.elm.querySelector('.term'); + let mostFrequent = item.elm.dataset.mostfrequent === 'true'; + if (mostFrequent) { + item.elm.querySelector('.select-checkbox').checked = true; + this.selectedItemTerms.add(termElement.textContent); + } else { + item.elm.querySelector('.select-checkbox').checked = false; + } + } + corpusAnalysisApp.extensions['Static Visualization (beta)'].renderFrequenciesGraphic(this.selectedItemTerms); + }); + } + + get item() { + return (values) => { + return ` + + + + + + + + + `.trim(); + } + } + + get valueNames() { + return [ + 'term', + 'count', + {data: ['mostFrequent']}, + 'frequency' + ]; + } + + initListContainerElement() { + if (!this.listContainerElement.hasAttribute('id')) { + this.listContainerElement.id = Utils.generateElementId('corpus-token-list-'); + } + let listSearchElementId = Utils.generateElementId(`${this.listContainerElement.id}-search-`); + this.listContainerElement.innerHTML = ` +
    + search + + +
    + + + + + + + + + + +
    + refresh + TermCountFrequency
    +
      + `.trim(); + } + + mapResourceToValue(corpusTokenData) { + return { + term: corpusTokenData.term, + count: corpusTokenData.count, + mostFrequent: corpusTokenData.mostFrequent, + frequency: '-' + }; + } + + sort() { + this.listjs.sort('count', {order: 'desc'}); + } + + onClick(event) { + if (event.target.closest('.disable-on-click') !== null) {return;} + let listItemElement = event.target.closest('.list-item'); + if (listItemElement === null) {return;} + let item = listItemElement.querySelector('.term').textContent; + let listActionElement = event.target.closest('.list-action-trigger[data-list-action]'); + let listAction = listActionElement === null ? '' : listActionElement.dataset.listAction; + switch (listAction) { + case 'select': { + if (event.target.checked) { + this.selectedItemTerms.add(item); + } else { + this.selectedItemTerms.delete(item); + } + corpusAnalysisApp.extensions['Static Visualization (beta)'].renderFrequenciesGraphic(this.selectedItemTerms); + break; + } + default: { + break; + } + } + } + +} diff --git a/app/static/js/ResourceLists/ResourceList.js b/app/static/js/ResourceLists/ResourceList.js index 3251ef2b..6bc6ac1f 100644 --- a/app/static/js/ResourceLists/ResourceList.js +++ b/app/static/js/ResourceLists/ResourceList.js @@ -15,6 +15,8 @@ class ResourceList { UserList.autoInit(); AdminUserList.autoInit(); CorpusFollowerList.autoInit(); + CorpusTextInfoList.autoInit(); + CorpusTokenList.autoInit(); } static defaultOptions = { diff --git a/app/static/js/cqi/api/client.js b/app/static/js/cqi/api/client.js new file mode 100644 index 00000000..14bf3690 --- /dev/null +++ b/app/static/js/cqi/api/client.js @@ -0,0 +1,688 @@ +cqi.api.APIClient = class APIClient { + /** + * @param {string} host + * @param {number} [timeout=60] timeout + * @param {string} [version=0.1] version + */ + constructor(host, timeout = 60, version = '0.1') { + this.host = host; + this.timeout = timeout * 1000; // convert seconds to milliseconds + this.version = version; + this.socket = io( + this.host, + { + transports: ['websocket'], + upgrade: false + } + ); + } + + /** + * @param {string} fn_name + * @param {object} [fn_args={}] + * @returns {Promise} + */ + async #request(fn_name, fn_args = {}) { + // TODO: implement timeout + let response = await this.socket.emitWithAck('exec', fn_name, fn_args); + if (response.code === 200) { + return response.payload; + } else if (response.code === 500) { + throw new Error(`[${response.code}] ${response.msg}`); + } else if (response.code === 502) { + if (response.payload.code in cqi.errors.lookup) { + throw new cqi.errors.lookup[response.payload.code](); + } else { + throw new cqi.errors.CQiError(); + } + } + } + + /** + * @param {string} username + * @param {string} password + * @returns {Promise} + */ + async ctrl_connect(username, password) { + const fn_name = 'ctrl_connect'; + const fn_args = {username: username, password: password}; + let payload = await this.#request(fn_name, fn_args); + return new cqi.status.lookup[payload.code](); + } + + /** + * @returns {Promise} + */ + async ctrl_bye() { + const fn_name = 'ctrl_bye'; + let payload = await this.#request(fn_name); + return new cqi.status.lookup[payload.code](); + } + + /** + * @returns {Promise} + */ + async ctrl_user_abort() { + const fn_name = 'ctrl_user_abort'; + return await this.#request(fn_name); + } + + /** + * @returns {Promise} + */ + async ctrl_ping() { + const fn_name = 'ctrl_ping'; + let payload = await this.#request(fn_name); + return new cqi.status.lookup[payload.code](); + } + + /** + * Full-text error message for the last general error reported + * by the CQi server + * + * @returns {Promise} + */ + async ctrl_last_general_error() { + const fn_name = 'ctrl_last_general_error'; + return await this.#request(fn_name); + } + + /** + * @returns {Promise} + */ + async ask_feature_cqi_1_0() { + const fn_name = 'ask_feature_cqi_1_0'; + return await this.#request(fn_name); + } + + /** + * @returns {Promise} + */ + async ask_feature_cl_2_3() { + const fn_name = 'ask_feature_cl_2_3'; + return await this.#request(fn_name); + } + + /** + * @returns {Promise} + */ + async ask_feature_cqp_2_3() { + const fn_name = 'ask_feature_cqp_2_3'; + return await this.#request(fn_name); + } + + /** + * @returns {Promise} + */ + async corpus_list_corpora() { + const fn_name = 'corpus_list_corpora'; + return await this.#request(fn_name); + } + + /** + * @param {string} corpus + * @returns {Promise} + */ + async corpus_charset(corpus) { + const fn_name = 'corpus_charset'; + const fn_args = {corpus: corpus}; + return await this.#request(fn_name, fn_args); + } + + /** + * @param {string} corpus + * @returns {Promise} + */ + async corpus_properties(corpus) { + const fn_name = 'corpus_properties'; + const fn_args = {corpus: corpus}; + return await this.#request(fn_name, fn_args); + } + + /** + * @param {string} corpus + * @returns {Promise} + */ + async corpus_positional_attributes(corpus) { + const fn_name = 'corpus_positional_attributes'; + const fn_args = {corpus: corpus}; + return await this.#request(fn_name, fn_args); + } + + /** + * @param {string} corpus + * @returns {Promise} + */ + async corpus_structural_attributes(corpus) { + const fn_name = 'corpus_structural_attributes'; + const fn_args = {corpus: corpus}; + return await this.#request(fn_name, fn_args); + } + + /** + * @param {string} corpus + * @param {string} attribute + * @returns {Promise} + */ + async corpus_structural_attribute_has_values(corpus, attribute) { + const fn_name = 'corpus_structural_attribute_has_values'; + const fn_args = {corpus: corpus, attribute: attribute}; + return await this.#request(fn_name, fn_args); + } + + /** + * @param {string} corpus + * @returns {Promise} + */ + async corpus_alignment_attributes(corpus) { + const fn_name = 'corpus_alignment_attributes'; + const fn_args = {corpus: corpus}; + return await this.#request(fn_name, fn_args); + } + + /** + * the full name of as specified in its registry entry + * + * @param {string} corpus + * @returns {Promise} + */ + async corpus_full_name(corpus) { + const fn_name = 'corpus_full_name'; + const fn_args = {corpus: corpus}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns the contents of the .info file of as a list of lines + * + * @param {string} corpus + * @returns {Promise} + */ + async corpus_info(corpus) { + const fn_name = 'corpus_info'; + const fn_args = {corpus: corpus}; + return await this.#request(fn_name, fn_args); + } + + /** + * try to unload a corpus and all its attributes from memory + * + * @param {string} corpus + * @returns {Promise} + */ + async corpus_drop_corpus(corpus) { + const fn_name = 'corpus_drop_corpus'; + const fn_args = {corpus: corpus}; + let payload = await this.#request(fn_name, fn_args); + return new cqi.status.lookup[payload.code](); + } + + /** + * returns the size of : + * - number of tokens (positional) + * - number of regions (structural) + * - number of alignments (alignment) + * + * @param {string} attribute + * @returns {Promise} + */ + async cl_attribute_size(attribute) { + const fn_name = 'cl_attribute_size'; + const fn_args = {attribute: attribute}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns the number of entries in the lexicon of a positional attribute; + * + * valid lexicon IDs range from 0 .. (lexicon_size - 1) + * + * @param {string} attribute + * @returns {Promise} + */ + async cl_lexicon_size(attribute) { + const fn_name = 'cl_lexicon_size'; + const fn_args = {attribute: attribute}; + return await this.#request(fn_name, fn_args); + } + + /** + * unload attribute from memory + * + * @param {string} attribute + * @returns {Promise} + */ + async cl_drop_attribute(attribute) { + const fn_name = 'cl_drop_attribute'; + const fn_args = {attribute: attribute}; + let payload = await this.#request(fn_name, fn_args); + return new cqi.status.lookup[payload.code](); + } + + /** + * NOTE: simple (scalar) mappings are applied to lists (the returned list + * has exactly the same length as the list passed as an argument) + */ + + /** + * returns -1 for every string in that is not found in the lexicon + * + * @param {string} attribute + * @param {strings[]} string + * @returns {Promise} + */ + async cl_str2id(attribute, strings) { + const fn_name = 'cl_str2id'; + const fn_args = {attribute: attribute, strings: strings}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns "" for every ID in that is out of range + * + * @param {string} attribute + * @param {number[]} id + * @returns {Promise} + */ + async cl_id2str(attribute, id) { + const fn_name = 'cl_id2str'; + const fn_args = {attribute: attribute, id: id}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns 0 for every ID in that is out of range + * + * @param {string} attribute + * @param {number[]} id + * @returns {Promise} + */ + async cl_id2freq(attribute, id) { + const fn_name = 'cl_id2freq'; + const fn_args = {attribute: attribute, id: id}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns -1 for every corpus position in that is out of range + * + * @param {string} attribute + * @param {number[]} cpos + * @returns {Promise} + */ + async cl_cpos2id(attribute, cpos) { + const fn_name = 'cl_cpos2id'; + const fn_args = {attribute: attribute, cpos: cpos}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns "" for every corpus position in that is out of range + * + * @param {string} attribute + * @param {number[]} cpos + * @returns {Promise} + */ + async cl_cpos2str(attribute, cpos) { + const fn_name = 'cl_cpos2str'; + const fn_args = {attribute: attribute, cpos: cpos}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns -1 for every corpus position not inside a structure region + * + * @param {string} attribute + * @param {number[]} cpos + * @returns {Promise} + */ + async cl_cpos2struc(attribute, cpos) { + const fn_name = 'cl_cpos2struc'; + const fn_args = {attribute: attribute, cpos: cpos}; + return await this.#request(fn_name, fn_args); + } + + /** + * NOTE: temporary addition for the Euralex2000 tutorial, but should + * probably be included in CQi specs + */ + + /** + * returns left boundary of s-attribute region enclosing cpos, + * -1 if not in region + * + * @param {string} attribute + * @param {number[]} cpos + * @returns {Promise} + */ + async cl_cpos2lbound(attribute, cpos) { + const fn_name = 'cl_cpos2lbound'; + const fn_args = {attribute: attribute, cpos: cpos}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns right boundary of s-attribute region enclosing cpos, + * -1 if not in region + * + * @param {string} attribute + * @param {number[]} cpos + * @returns {Promise} + */ + async cl_cpos2rbound(attribute, cpos) { + const fn_name = 'cl_cpos2rbound'; + const fn_args = {attribute: attribute, cpos: cpos}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns -1 for every corpus position not inside an alignment + * + * @param {string} attribute + * @param {number[]} cpos + * @returns {Promise} + */ + async cl_cpos2alg(attribute, cpos) { + const fn_name = 'cl_cpos2alg'; + const fn_args = {attribute: attribute, cpos: cpos}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns annotated string values of structure regions in ; + * "" if out of range + * + * check corpus_structural_attribute_has_values() first + * + * @param {string} attribute + * @param {number[]} strucs + * @returns {Promise} + */ + async cl_struc2str(attribute, strucs) { + const fn_name = 'cl_struc2str'; + const fn_args = {attribute: attribute, strucs: strucs}; + return await this.#request(fn_name, fn_args); + } + + /** + * NOTE: the following mappings take a single argument and return multiple + * values, including lists of arbitrary size + */ + + /** + * returns all corpus positions where the given token occurs + * + * @param {string} attribute + * @param {number} id + * @returns {Promise} + */ + async cl_id2cpos(attribute, id) { + const fn_name = 'cl_id2cpos'; + const fn_args = {attribute: attribute, id: id}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns all corpus positions where one of the tokens in occurs; + * the returned list is sorted as a whole, not per token id + * + * @param {string} attribute + * @param {number[]} id_list + * @returns {Promise} + */ + async cl_idlist2cpos(attribute, id_list) { + const fn_name = 'cl_idlist2cpos'; + const fn_args = {attribute: attribute, id_list: id_list}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns lexicon IDs of all tokens that match ; + * the returned list may be empty (size 0); + * + * @param {string} attribute + * @param {string} regex + * @returns {Promise} + */ + async cl_regex2id(attribute, regex) { + const fn_name = 'cl_regex2id'; + const fn_args = {attribute: attribute, regex: regex}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns start and end corpus positions of structure region + * + * @param {string} attribute + * @param {number} struc + * @returns {Promise<[number, number]>} + */ + async cl_struc2cpos(attribute, struc) { + const fn_name = 'cl_struc2cpos'; + const fn_args = {attribute: attribute, struc: struc}; + return await this.#request(fn_name, fn_args); + } + + /** + * returns (src_start, src_end, target_start, target_end) + * + * @param {string} attribute + * @param {number} alg + * @returns {Promise<[number, number, number, number]>} + */ + async alg2cpos(attribute, alg) { + const fn_name = 'alg2cpos'; + const fn_args = {attribute: attribute, alg: alg}; + return await this.#request(fn_name, fn_args); + } + + /** + * must include the ';' character terminating the query. + * + * @param {string} mother_corpus + * @param {string} subcorpus_name + * @param {string} query + * @returns {Promise} + */ + async cqp_query(mother_corpus, subcorpus_name, query) { + const fn_name = 'cqp_query'; + const fn_args = {mother_corpus: mother_corpus, subcorpus_name: subcorpus_name, query: query}; + let payload = await this.#request(fn_name, fn_args); + return new cqi.status.lookup[payload.code](); + } + + /** + * @param {string} corpus + * @returns {Promise} + */ + async cqp_list_subcorpora(corpus) { + const fn_name = 'cqp_list_subcorpora'; + const fn_args = {corpus: corpus}; + return await this.#request(fn_name, fn_args); + } + + /** + * @param {string} subcorpus + * @returns {Promise} + */ + async cqp_subcorpus_size(subcorpus) { + const fn_name = 'cqp_subcorpus_size'; + const fn_args = {subcorpus: subcorpus}; + return await this.#request(fn_name, fn_args); + } + + /** + * @param {string} subcorpus + * @param {number} field + * @returns {Promise} + */ + async cqp_subcorpus_has_field(subcorpus, field) { + const fn_name = 'cqp_subcorpus_has_field'; + const fn_args = {subcorpus: subcorpus, field: field}; + return await this.#request(fn_name, fn_args); + } + + /** + * Dump the values of for match ranges .. + * in . is one of the CQI_CONST_FIELD_* constants. + * + * @param {string} subcorpus + * @param {number} field + * @param {number} first + * @param {number} last + * @returns {Promise} + */ + async cqp_dump_subcorpus(subcorpus, field, first, last) { + const fn_name = 'cqp_dump_subcorpus'; + const fn_args = {subcorpus: subcorpus, field: field, first: first, last: last}; + return await this.#request(fn_name, fn_args); + } + + /** + * delete a subcorpus from memory + * + * @param {string} subcorpus + * @returns {Promise} + */ + async cqp_drop_subcorpus(subcorpus) { + const fn_name = 'cqp_drop_subcorpus'; + const fn_args = {subcorpus: subcorpus}; + let payload = await this.#request(fn_name, fn_args); + return new cqi.status.lookup[payload.code](); + } + + /** + * NOTE: The following two functions are temporarily included for the + * Euralex 2000 tutorial demo + */ + + /** + * frequency distribution of single tokens + * + * returns (id, frequency) pairs flattened into a list of size 2* + * field is one of + * - CQI_CONST_FIELD_MATCH + * - CQI_CONST_FIELD_TARGET + * - CQI_CONST_FIELD_KEYWORD + * + * NB: pairs are sorted by frequency desc. + * + * @param {string} subcorpus + * @param {number} cutoff + * @param {number} field + * @param {string} attribute + * @returns {Promise} + */ + async cqp_fdist_1(subcorpus, cutoff, field, attribute) { + const fn_name = 'cqp_fdist_1'; + const fn_args = {subcorpus: subcorpus, cutoff: cutoff, field: field, attribute: attribute}; + return await this.#request(fn_name, fn_args); + } + + /** + * frequency distribution of pairs of tokens + * + * returns (id1, id2, frequency) pairs flattened into a list of + * size 3* + * + * NB: triples are sorted by frequency desc. + * + * @param {string} subcorpus + * @param {number} cutoff + * @param {number} field1 + * @param {string} attribute1 + * @param {number} field2 + * @param {string} attribute2 + * @returns {Promise} + */ + async cqp_fdist_2(subcorpus, cutoff, field1, attribute1, field2, attribute2) { + const fn_name = 'cqp_fdist_2'; + const fn_args = {subcorpus: subcorpus, cutoff: cutoff, field1: field1, attribute1: attribute1, field2: field2, attribute2: attribute2}; + return await this.#request(fn_name, fn_args); + } + + /************************************************************************** + * NOTE: The following is not included in the CQi specification. * + **************************************************************************/ + /************************************************************************** + * Custom additions for nopaque * + **************************************************************************/ + + /** + * @param {string} corpus + * @returns {Promise} + */ + async ext_corpus_update_db(corpus) { + const fn_name = 'ext_corpus_update_db'; + const fn_args = {corpus: corpus}; + let payload = await this.#request(fn_name, fn_args); + return new cqi.status.lookup[payload.code](); + } + + /** + * @param {string} corpus + * @returns {Promise} + */ + async ext_corpus_static_data(corpus) { + const fn_name = 'ext_corpus_static_data'; + const fn_args = {corpus: corpus}; + let compressedEncodedData = await this.#request(fn_name, fn_args); + let data = pako.inflate(compressedEncodedData, {to: 'string'}); + return JSON.parse(data); + } + + /** + * @param {string} corpus + * @param {number=} page + * @param {number=} per_page + * @returns {Promise} + */ + async ext_corpus_paginate_corpus(corpus, page, per_page) { + const fn_name = 'ext_corpus_paginate_corpus'; + const fn_args = {corpus: corpus} + if (page !== undefined) {fn_args.page = page;} + if (per_page !== undefined) {fn_args.per_page = per_page;} + return await this.#request(fn_name, fn_args); + } + + /** + * @param {string} subcorpus + * @param {number=} context + * @param {number=} page + * @param {number=} per_page + * @returns {Promise} + */ + async ext_cqp_paginate_subcorpus(subcorpus, context, page, per_page) { + const fn_name = 'ext_cqp_paginate_subcorpus'; + const fn_args = {subcorpus: subcorpus} + if (context !== undefined) {fn_args.context = context;} + if (page !== undefined) {fn_args.page = page;} + if (per_page !== undefined) {fn_args.per_page = per_page;} + return await this.#request(fn_name, fn_args); + } + + /** + * @param {string} subcorpus + * @param {number[]} match_id_list + * @param {number=} context + * @returns {Promise} + */ + async ext_cqp_partial_export_subcorpus(subcorpus, match_id_list, context) { + const fn_name = 'ext_cqp_partial_export_subcorpus'; + const fn_args = {subcorpus: subcorpus, match_id_list: match_id_list}; + if (context !== undefined) {fn_args.context = context;} + return await this.#request(fn_name, fn_args); + } + + /** + * @param {string} subcorpus + * @param {number=} context + * @returns {Promise} + */ + async ext_cqp_export_subcorpus(subcorpus, context) { + const fn_name = 'ext_cqp_export_subcorpus'; + const fn_args = {subcorpus: subcorpus}; + if (context !== undefined) {fn_args.context = context;} + return await this.#request(fn_name, fn_args); + } +}; diff --git a/app/static/js/cqi/api/package.js b/app/static/js/cqi/api/package.js new file mode 100644 index 00000000..fb42389b --- /dev/null +++ b/app/static/js/cqi/api/package.js @@ -0,0 +1 @@ +cqi.api = {}; diff --git a/app/static/js/cqi/client.js b/app/static/js/cqi/client.js new file mode 100644 index 00000000..b1d29447 --- /dev/null +++ b/app/static/js/cqi/client.js @@ -0,0 +1,57 @@ +cqi.CQiClient = class CQiClient { + /** + * @param {string} host + * @param {number} [timeout=60] timeout + * @param {string} [version=0.1] version + */ + constructor(host, timeout = 60, version = '0.1') { + /** @type {cqi.api.APIClient} */ + this.api = new cqi.api.APIClient(host, timeout, version); + } + + /** + * @returns {cqi.models.corpora.CorpusCollection} + */ + get corpora() { + return new cqi.models.corpora.CorpusCollection(this); + } + + /** + * @returns {Promise} + */ + async bye() { + return await this.api.ctrl_bye(); + } + + /** + * @param {string} username + * @param {string} password + * @returns {Promise} + */ + async connect(username, password) { + return await this.api.ctrl_connect(username, password); + } + + /** + * @returns {Promise} + */ + async ping() { + return await this.api.ctrl_ping(); + } + + /** + * @returns {Promise} + */ + async userAbort() { + return await this.api.ctrl_user_abort(); + } + + /** + * Alias for "bye" method + * + * @returns {Promise} + */ + async disconnect() { + return await this.api.ctrl_bye(); + } +}; diff --git a/app/static/js/cqi/errors.js b/app/static/js/cqi/errors.js new file mode 100644 index 00000000..c7011eb7 --- /dev/null +++ b/app/static/js/cqi/errors.js @@ -0,0 +1,185 @@ +cqi.errors = {}; + + +/** + * A base class from which all other errors inherit. + * If you want to catch all errors that the CQi package might throw, + * catch this base error. + */ +cqi.errors.CQiError = class CQiError extends Error { + constructor(message) { + super(message); + this.code = undefined; + this.description = undefined; + } +}; + + +cqi.errors.Error = class Error extends cqi.errors.CQiError { + constructor(message) { + super(message); + this.code = 2; + } +}; + + +cqi.errors.ErrorGeneralError = class ErrorGeneralError extends cqi.errors.Error { + constructor(message) { + super(message); + this.code = 513; + } +}; + + +cqi.errors.ErrorConnectRefused = class ErrorConnectRefused extends cqi.errors.Error { + constructor(message) { + super(message); + this.code = 514; + } +}; + + +cqi.errors.ErrorUserAbort = class ErrorUserAbort extends cqi.errors.Error { + constructor(message) { + super(message); + this.code = 515; + } +}; + + +cqi.errors.ErrorSyntaxError = class ErrorSyntaxError extends cqi.errors.Error { + constructor(message) { + super(message); + this.code = 516; + } +}; + + +cqi.errors.CLError = class Error extends cqi.errors.CQiError { + constructor(message) { + super(message); + this.code = 4; + } +}; + + +cqi.errors.CLErrorNoSuchAttribute = class CLErrorNoSuchAttribute extends cqi.errors.CLError { + constructor(message) { + super(message); + this.code = 1025; + this.description = "CQi server couldn't open attribute"; + } +}; + + +cqi.errors.CLErrorWrongAttributeType = class CLErrorWrongAttributeType extends cqi.errors.CLError { + constructor(message) { + super(message); + this.code = 1026; + } +}; + + +cqi.errors.CLErrorOutOfRange = class CLErrorOutOfRange extends cqi.errors.CLError { + constructor(message) { + super(message); + this.code = 1027; + } +}; + + +cqi.errors.CLErrorRegex = class CLErrorRegex extends cqi.errors.CLError { + constructor(message) { + super(message); + this.code = 1028; + } +}; + + +cqi.errors.CLErrorCorpusAccess = class CLErrorCorpusAccess extends cqi.errors.CLError { + constructor(message) { + super(message); + this.code = 1029; + } +}; + + +cqi.errors.CLErrorOutOfMemory = class CLErrorOutOfMemory extends cqi.errors.CLError { + constructor(message) { + super(message); + this.code = 1030; + this.description = 'CQi server has run out of memory; try discarding some other corpora and/or subcorpora'; + } +}; + + +cqi.errors.CLErrorInternal = class CLErrorInternal extends cqi.errors.CLError { + constructor(message) { + super(message); + this.code = 1031; + this.description = "The classical 'please contact technical support' error"; + } +}; + + +cqi.errors.CQPError = class Error extends cqi.errors.CQiError { + constructor(message) { + super(message); + this.code = 5; + } +}; + + +cqi.errors.CQPErrorGeneral = class CQPErrorGeneral extends cqi.errors.CQPError { + constructor(message) { + super(message); + this.code = 1281; + } +}; + + +cqi.errors.CQPErrorNoSuchCorpus = class CQPErrorNoSuchCorpus extends cqi.errors.CQPError { + constructor(message) { + super(message); + this.code = 1282; + } +}; + + +cqi.errors.CQPErrorInvalidField = class CQPErrorInvalidField extends cqi.errors.CQPError { + constructor(message) { + super(message); + this.code = 1283; + } +}; + + +cqi.errors.CQPErrorOutOfRange = class CQPErrorOutOfRange extends cqi.errors.CQPError { + constructor(message) { + super(message); + this.code = 1284; + this.description = 'A number is out of range'; + } +}; + + +cqi.errors.lookup = { + 2: cqi.errors.Error, + 513: cqi.errors.ErrorGeneralError, + 514: cqi.errors.ErrorConnectRefused, + 515: cqi.errors.ErrorUserAbort, + 516: cqi.errors.ErrorSyntaxError, + 4: cqi.errors.CLError, + 1025: cqi.errors.CLErrorNoSuchAttribute, + 1026: cqi.errors.CLErrorWrongAttributeType, + 1027: cqi.errors.CLErrorOutOfRange, + 1028: cqi.errors.CLErrorRegex, + 1029: cqi.errors.CLErrorCorpusAccess, + 1030: cqi.errors.CLErrorOutOfMemory, + 1031: cqi.errors.CLErrorInternal, + 5: cqi.errors.CQPError, + 1281: cqi.errors.CQPErrorGeneral, + 1282: cqi.errors.CQPErrorNoSuchCorpus, + 1283: cqi.errors.CQPErrorInvalidField, + 1284: cqi.errors.CQPErrorOutOfRange +}; diff --git a/app/static/js/cqi/models/attributes.js b/app/static/js/cqi/models/attributes.js new file mode 100644 index 00000000..8a0b987c --- /dev/null +++ b/app/static/js/cqi/models/attributes.js @@ -0,0 +1,289 @@ +cqi.models.attributes = {}; + + +cqi.models.attributes.Attribute = class Attribute extends cqi.models.resource.Model { + /** + * @returns {string} + */ + get apiName() { + return this.attrs.api_name; + } + + /** + * @returns {string} + */ + get name() { + return this.attrs.name; + } + + /** + * @returns {number} + */ + get size() { + return this.attrs.size; + } + + /** + * @returns {Promise} + */ + async drop() { + return await this.client.api.cl_drop_attribute(this.apiName); + } +}; + + +cqi.models.attributes.AttributeCollection = class AttributeCollection extends cqi.models.resource.Collection { + /** @type{typeof cqi.models.attributes.Attribute} */ + static model = cqi.models.attributes.Attribute; + + /** + * @param {cqi.CQiClient} client + * @param {cqi.models.corpora.Corpus} corpus + */ + constructor(client, corpus) { + super(client); + /** @type {cqi.models.corpora.Corpus} */ + this.corpus = corpus; + } + + /** + * @param {string} attributeName + * @returns {Promise} + */ + async _get(attributeName) { + /** @type{string} */ + let apiName = `${this.corpus.apiName}.${attributeName}`; + return { + api_name: apiName, + name: attributeName, + size: await this.client.api.cl_attribute_size(apiName) + } + } + + /** + * @param {string} attributeName + * @returns {Promise} + */ + async get(attributeName) { + return this.prepareModel(await this._get(attributeName)); + } +}; + + +cqi.models.attributes.AlignmentAttribute = class AlignmentAttribute extends cqi.models.attributes.Attribute { + /** + * @param {number} id + * @returns {Promise<[number, number, number, number]>} + */ + async cposById(id) { + return await this.client.api.cl_alg2cpos(this.apiName, id); + } + + /** + * @param {number[]} cposList + * @returns {Promise} + */ + async idsByCpos(cposList) { + return await this.client.api.cl_cpos2alg(this.apiName, cposList); + } +}; + + +cqi.models.attributes.AlignmentAttributeCollection = class AlignmentAttributeCollection extends cqi.models.attributes.AttributeCollection { + /** @type{typeof cqi.models.attributes.AlignmentAttribute} */ + static model = cqi.models.attributes.AlignmentAttribute; + + /** + * @returns {Promise} + */ + async list() { + /** @type {string[]} */ + let alignmentAttributeNames = await this.client.api.corpus_alignment_attributes(this.corpus.apiName); + /** @type {cqi.models.attributes.AlignmentAttribute[]} */ + let alignmentAttributes = []; + for (let alignmentAttributeName of alignmentAttributeNames) { + alignmentAttributes.push(await this.get(alignmentAttributeName)); + } + return alignmentAttributes; + } +}; + + +cqi.models.attributes.PositionalAttribute = class PositionalAttribute extends cqi.models.attributes.Attribute { + /** + * @returns {number} + */ + get lexiconSize() { + return this.attrs.lexicon_size; + } + + /** + * @param {number} id + * @returns {Promise} + */ + async cposById(id) { + return await this.client.api.cl_id2cpos(this.apiName, id); + } + + /** + * @param {number[]} idList + * @returns {Promise} + */ + async cposByIds(idList) { + return await this.client.api.cl_idlist2cpos(this.apiName, idList); + } + + /** + * @param {number[]} idList + * @returns {Promise} + */ + async freqsByIds(idList) { + return await this.client.api.cl_id2freq(this.apiName, idList); + } + + /** + * @param {number[]} cposList + * @returns {Promise} + */ + async idsByCpos(cposList) { + return await this.client.api.cl_cpos2id(this.apiName, cposList); + } + + /** + * @param {string} regex + * @returns {Promise} + */ + async idsByRegex(regex) { + return await this.client.api.cl_regex2id(this.apiName, regex); + } + + /** + * @param {string[]} valueList + * @returns {Promise} + */ + async idsByValues(valueList) { + return await this.client.api.cl_str2id(this.apiName, valueList); + } + + /** + * @param {number[]} cposList + * @returns {Promise} + */ + async valuesByCpos(cposList) { + return await this.client.api.cl_cpos2str(this.apiName, cposList); + } + + /** + * @param {number[]} idList + * @returns {Promise} + */ + async valuesByIds(idList) { + return await this.client.api.cl_id2str(this.apiName, idList); + } +}; + + +cqi.models.attributes.PositionalAttributeCollection = class PositionalAttributeCollection extends cqi.models.attributes.AttributeCollection { + /** @type{typeof cqi.models.attributes.PositionalAttribute} */ + static model = cqi.models.attributes.PositionalAttribute; + + /** + * @param {string} positionalAttributeName + * @returns {Promise} + */ + async _get(positionalAttributeName) { + let positionalAttribute = await super._get(positionalAttributeName); + positionalAttribute.lexicon_size = await this.client.api.cl_lexicon_size(positionalAttribute.api_name); + return positionalAttribute; + } + + /** + * @returns {Promise} + */ + async list() { + let positionalAttributeNames = await this.client.api.corpus_positional_attributes(this.corpus.apiName); + let positionalAttributes = []; + for (let positionalAttributeName of positionalAttributeNames) { + positionalAttributes.push(await this.get(positionalAttributeName)); + } + return positionalAttributes; + } +}; + + +cqi.models.attributes.StructuralAttribute = class StructuralAttribute extends cqi.models.attributes.Attribute { + /** + * @returns {boolean} + */ + get hasValues() { + return this.attrs.has_values; + } + + /** + * @param {number} id + * @returns {Promise<[number, number]>} + */ + async cposById(id) { + return await this.client.api.cl_struc2cpos(this.apiName, id); + } + + /** + * @param {number[]} cposList + * @returns {Promise} + */ + async idsByCpos(cposList) { + return await this.client.api.cl_cpos2struc(this.apiName, cposList); + } + + /** + * @param {number[]} cposList + * @returns {Promise} + */ + async lboundByCpos(cposList) { + return await this.client.api.cl_cpos2lbound(this.apiName, cposList); + } + + /** + * @param {number[]} cposList + * @returns {Promise} + */ + async rboundByCpos(cposList) { + return await this.client.api.cl_cpos2rbound(this.apiName, cposList); + } + + /** + * @param {number[]} idList + * @returns {Promise} + */ + async valuesByIds(idList) { + return await this.client.api.cl_struc2str(this.apiName, idList); + } +}; + + +cqi.models.attributes.StructuralAttributeCollection = class StructuralAttributeCollection extends cqi.models.attributes.AttributeCollection { + /** @type{typeof cqi.models.attributes.StructuralAttribute} */ + static model = cqi.models.attributes.StructuralAttribute; + + /** + * @param {string} structuralAttributeName + * @returns {Promise} + */ + async _get(structuralAttributeName) { + let structuralAttribute = await super._get(structuralAttributeName); + structuralAttribute.has_values = await this.client.api.cl_has_values(structuralAttribute.api_name); + return structuralAttribute; + } + + /** + * @returns {Promise} + */ + async list() { + let structuralAttributeNames = await this.client.api.corpus_structural_attributes(this.corpus.apiName); + let structuralAttributes = []; + for (let structuralAttributeName of structuralAttributeNames) { + structuralAttributes.push(await this.get(structuralAttributeName)); + } + return structuralAttributes; + } +}; diff --git a/app/static/js/cqi/models/corpora.js b/app/static/js/cqi/models/corpora.js new file mode 100644 index 00000000..8128c47f --- /dev/null +++ b/app/static/js/cqi/models/corpora.js @@ -0,0 +1,166 @@ +cqi.models.corpora = {}; + + +cqi.models.corpora.Corpus = class Corpus extends cqi.models.resource.Model { + /** + * @returns {string} + */ + get apiName() { + return this.attrs.api_name; + } + + /** + * @returns {string} + */ + get name() { + return this.attrs.name; + } + + /** + * @returns {number} + */ + get size() { + return this.attrs.size; + } + + /** + * @returns {string} + */ + get charset() { + return this.attrs.charset; + } + + /** + * @returns {string[]} + */ + get properties() { + return this.attrs?.properties; + } + + /** + * @returns {cqi.models.attributes.AlignmentAttributeCollection} + */ + get alignmentAttributes() { + return new cqi.models.attributes.AlignmentAttributeCollection(this.client, this); + } + + /** + * @returns {cqi.models.attributes.PositionalAttributeCollection} + */ + get positionalAttributes() { + return new cqi.models.attributes.PositionalAttributeCollection(this.client, this); + } + + /** + * @returns {cqi.models.attributes.StructuralAttributeCollection} + */ + get structuralAttributes() { + return new cqi.models.attributes.StructuralAttributeCollection(this.client, this); + } + + /** + * @returns {cqi.models.subcorpora.SubcorpusCollection} + */ + get subcorpora() { + return new cqi.models.subcorpora.SubcorpusCollection(this.client, this); + } + + /** + * @returns {Promise} + */ + async drop() { + return await this.client.api.corpus_drop_corpus(this.apiName); + } + + /** + * @param {string} subcorpusName + * @param {string} query + * @returns {Promise} + */ + async query(subcorpusName, query) { + return await this.client.api.cqp_query(this.apiName, subcorpusName, query); + } + + /************************************************************************** + * NOTE: The following is not included in the CQi specification. * + **************************************************************************/ + /************************************************************************** + * Custom additions for nopaque * + **************************************************************************/ + + /** + * @returns {string} + */ + get staticData() { + return this.attrs.static_data; + } + + /** + * @returns {cqi.status.StatusOk} + */ + async updateDb() { + return await this.client.api.ext_corpus_update_db(this.apiName); + } + + /** + * @param {number=} page + * @param {number=} per_page + * @returns {Promise} + */ + async paginate(page, per_page) { + return await this.client.api.ext_corpus_paginate_corpus(this.apiName, page, per_page); + } +}; + + +cqi.models.corpora.CorpusCollection = class CorpusCollection extends cqi.models.resource.Collection { + /** @type {typeof cqi.models.corpora.Corpus} */ + static model = cqi.models.corpora.Corpus; + + /** + * @param {string} corpusName + * @returns {Promise} + */ + async _get(corpusName) { + const returnValue = { + api_name: corpusName, + charset: await this.client.api.corpus_charset(corpusName), + // full_name: await this.client.api.corpus_full_name(corpusName), + // info: await this.client.api.corpus_info(corpusName), + name: corpusName, + properties: await this.client.api.corpus_properties(corpusName), + size: await this.client.api.cl_attribute_size(`${corpusName}.word`) + }; + + /************************************************************************ + * NOTE: The following is not included in the CQi specification. * + ************************************************************************/ + /************************************************************************ + * Custom additions for nopaque * + ************************************************************************/ + returnValue.static_data = await this.client.api.ext_corpus_static_data(corpusName); + return returnValue; + } + + /** + * @param {string} corpusName + * @returns {Promise} + */ + async get(corpusName) { + return this.prepareModel(await this._get(corpusName)); + } + + /** + * @returns {Promise} + */ + async list() { + /** @type {string[]} */ + let corpusNames = await this.client.api.corpus_list_corpora(); + /** @type {cqi.models.corpora.Corpus[]} */ + let corpora = []; + for (let corpusName of corpusNames) { + corpora.push(await this.get(corpusName)); + } + return corpora; + } +}; diff --git a/app/static/js/cqi/models/package.js b/app/static/js/cqi/models/package.js new file mode 100644 index 00000000..4973862f --- /dev/null +++ b/app/static/js/cqi/models/package.js @@ -0,0 +1 @@ +cqi.models = {}; diff --git a/app/static/js/cqi/models/resource.js b/app/static/js/cqi/models/resource.js new file mode 100644 index 00000000..9d3afde3 --- /dev/null +++ b/app/static/js/cqi/models/resource.js @@ -0,0 +1,90 @@ +cqi.models.resource = {}; + + +/** + * A base class for representing a single object on the server. + */ +cqi.models.resource.Model = class Model { + /** + * @param {object} attrs + * @param {cqi.CQiClient} client + * @param {cqi.models.resource.Collection} collection + */ + constructor(attrs, client, collection) { + /** + * A client pointing at the server that this object is on. + * + * @type {cqi.CQiClient} + */ + this.client = client; + /** + * The collection that this model is part of. + * + * @type {cqi.models.resource.Collection} + */ + this.collection = collection; + /** + * The raw representation of this object from the API + * + * @type {object} + */ + this.attrs = attrs; + } + + /** + * @returns {string} + */ + get apiName() { + throw new Error('Not implemented'); + } + + /** + * @returns {Promise} + */ + async reload() { + this.attrs = await this.collection.get(this.apiName).attrs; + } +}; + + +/** + * A base class for representing all objects of a particular type on the server. + */ +cqi.models.resource.Collection = class Collection { + /** + * The type of object this collection represents, set by subclasses + * + * @type {typeof cqi.models.resource.Model} + */ + static model; + + /** + * @param {cqi.CQiClient} client + */ + constructor(client) { + /** + * A client pointing at the server that this object is on. + * + * @type {cqi.CQiClient} + */ + this.client = client; + } + + async list() { + throw new Error('Not implemented'); + } + + async get() { + throw new Error('Not implemented'); + } + + /** + * Create a model from a set of attributes. + * + * @param {object} attrs + * @returns {cqi.models.resource.Model} + */ + prepareModel(attrs) { + return new this.constructor.model(attrs, this.client, this); + } +}; diff --git a/app/static/js/cqi/models/subcorpora.js b/app/static/js/cqi/models/subcorpora.js new file mode 100644 index 00000000..86e6cf67 --- /dev/null +++ b/app/static/js/cqi/models/subcorpora.js @@ -0,0 +1,189 @@ +cqi.models.subcorpora = {}; + + +cqi.models.subcorpora.Subcorpus = class Subcorpus extends cqi.models.resource.Model { + /** + * @returns {string} + */ + get apiName() { + return this.attrs.api_name; + } + + /** + * @returns {object} + */ + get fields() { + return this.attrs.fields; + } + + /** + * @returns {string} + */ + get name() { + return this.attrs.name; + } + + /** + * @returns {number} + */ + get size() { + return this.attrs.size; + } + + /** + * @returns {Promise} + */ + async drop() { + return await this.client.api.cqp_drop_subcorpus(this.apiName); + } + + /** + * @param {number} field + * @param {number} first + * @param {number} last + * @returns {Promise} + */ + async dump(field, first, last) { + return await this.client.api.cqp_dump_subcorpus( + this.apiName, + field, + first, + last + ); + } + + /** + * @param {number} cutoff + * @param {number} field + * @param {cqi.models.attributes.PositionalAttribute} attribute + * @returns {Promise} + */ + async fdist1(cutoff, field, attribute) { + return await this.client.api.cqp_fdist_1( + this.apiName, + cutoff, + field, + attribute.apiName + ); + } + + /** + * @param {number} cutoff + * @param {number} field1 + * @param {cqi.models.attributes.PositionalAttribute} attribute1 + * @param {number} field2 + * @param {cqi.models.attributes.PositionalAttribute} attribute2 + * @returns {Promise} + */ + async fdist2(cutoff, field1, attribute1, field2, attribute2) { + return await this.client.api.cqp_fdist_2( + this.apiName, + cutoff, + field1, + attribute1.apiName, + field2, + attribute2.apiName + ); + } + + /************************************************************************** + * NOTE: The following is not included in the CQi specification. * + **************************************************************************/ + /************************************************************************** + * Custom additions for nopaque * + **************************************************************************/ + + /** + * @param {number=} context + * @param {number=} page + * @param {number=} perPage + * @returns {Promise} + */ + async paginate(context, page, perPage) { + return await this.client.api.ext_cqp_paginate_subcorpus(this.apiName, context, page, perPage); + } + + /** + * @param {number[]} matchIdList + * @param {number=} context + * @returns {Promise} + */ + async partialExport(matchIdList, context) { + return await this.client.api.ext_cqp_partial_export_subcorpus(this.apiName, matchIdList, context); + } + + /** + * @param {number=} context + * @returns {Promise} + */ + async export(context) { + return await this.client.api.ext_cqp_export_subcorpus(this.apiName, context); + } +}; + + +cqi.models.subcorpora.SubcorpusCollection = class SubcorpusCollection extends cqi.models.resource.Collection { + /** @type {typeof cqi.models.subcorpora.Subcorpus} */ + static model = cqi.models.subcorpora.Subcorpus; + + /** + * @param {cqi.CQiClient} client + * @param {cqi.models.corpora.Corpus} corpus + */ + constructor(client, corpus) { + super(client); + /** @type {cqi.models.corpora.Corpus} */ + this.corpus = corpus; + } + + /** + * @param {string} subcorpusName + * @returns {Promise} + */ + async _get(subcorpusName) { + /** @type {string} */ + let apiName = `${this.corpus.apiName}:${subcorpusName}`; + /** @type {object} */ + let fields = {}; + if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_MATCH)) { + fields.match = cqi.CONST_FIELD_MATCH; + } + if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_MATCHEND)) { + fields.matchend = cqi.CONST_FIELD_MATCHEND + } + if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_TARGET)) { + fields.target = cqi.CONST_FIELD_TARGET + } + if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_KEYWORD)) { + fields.keyword = cqi.CONST_FIELD_KEYWORD + } + return { + api_name: apiName, + fields: fields, + name: subcorpusName, + size: await this.client.api.cqp_subcorpus_size(apiName) + } + } + + /** + * @param {string} subcorpusName + * @returns {Promise} + */ + async get(subcorpusName) { + return this.prepareModel(await this._get(subcorpusName)); + } + + /** + * @returns {Promise} + */ + async list() { + /** @type {string[]} */ + let subcorpusNames = await this.client.api.cqp_list_subcorpora(this.corpus.apiName); + /** @type {cqi.models.subcorpora.Subcorpus[]} */ + let subcorpora = []; + for (let subcorpusName of subcorpusNames) { + subcorpora.push(await this.get(subcorpusName)); + } + return subcorpora; + } +}; diff --git a/app/static/js/cqi/package.js b/app/static/js/cqi/package.js new file mode 100644 index 00000000..1558b308 --- /dev/null +++ b/app/static/js/cqi/package.js @@ -0,0 +1,6 @@ +var cqi = {}; + +cqi.CONST_FIELD_KEYWORD = 9; +cqi.CONST_FIELD_MATCH = 16; +cqi.CONST_FIELD_MATCHEND = 17; +cqi.CONST_FIELD_TARGET = 0; diff --git a/app/static/js/cqi/status.js b/app/static/js/cqi/status.js new file mode 100644 index 00000000..0782ee26 --- /dev/null +++ b/app/static/js/cqi/status.js @@ -0,0 +1,51 @@ +cqi.status = {}; + + +/** + * A base class from which all other status inherit. + */ +cqi.status.CQiStatus = class CQiStatus { + constructor() { + this.code = undefined; + } +}; + + +cqi.status.StatusOk = class StatusOk extends cqi.status.CQiStatus { + constructor() { + super(); + this.code = 257; + } +}; + + +cqi.status.StatusConnectOk = class StatusConnectOk extends cqi.status.CQiStatus { + constructor() { + super(); + this.code = 258; + } +}; + + +cqi.status.StatusByeOk = class StatusByeOk extends cqi.status.CQiStatus { + constructor() { + super(); + this.code = 259; + } +}; + + +cqi.status.StatusPingOk = class StatusPingOk extends cqi.status.CQiStatus { + constructor() { + super(); + this.code = 260; + } +}; + + +cqi.status.lookup = { + 257: cqi.status.StatusOk, + 258: cqi.status.StatusConnectOk, + 259: cqi.status.StatusByeOk, + 260: cqi.status.StatusPingOk +}; diff --git a/app/templates/_scripts.html.j2 b/app/templates/_scripts.html.j2 index 88167db7..c68b6146 100644 --- a/app/templates/_scripts.html.j2 +++ b/app/templates/_scripts.html.j2 @@ -1,15 +1,35 @@ - + + + + +{%- assets + filters='rjsmin', + output='gen/cqi.%(version)s.js', + 'js/cqi/package.js', + 'js/cqi/errors.js', + 'js/cqi/status.js', + 'js/cqi/api/package.js', + 'js/cqi/api/client.js', + 'js/cqi/models/package.js', + 'js/cqi/models/resource.js', + 'js/cqi/models/attributes.js', + 'js/cqi/models/subcorpora.js', + 'js/cqi/models/corpora.js', + 'js/cqi/client.js' +%} + +{%- endassets %} {%- assets filters='rjsmin', output='gen/app.%(version)s.js', 'js/App.js', 'js/Utils.js', - 'js/CorpusAnalysis/CQiClient.js', 'js/CorpusAnalysis/CorpusAnalysisApp.js', 'js/CorpusAnalysis/CorpusAnalysisConcordance.js', 'js/CorpusAnalysis/CorpusAnalysisReader.js', + 'js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js', 'js/CorpusAnalysis/QueryBuilder.js', 'js/XMLtoObject.js' %} @@ -49,7 +69,9 @@ 'js/ResourceLists/UserList.js', 'js/ResourceLists/AdminUserList.js', 'js/ResourceLists/CorpusFollowerList.js', - 'js/ResourceLists/DetailledPublicCorpusList.js' + 'js/ResourceLists/CorpusTextInfoList.js', + 'js/ResourceLists/DetailledPublicCorpusList.js', + 'js/ResourceLists/CorpusTokenList.js' %} {%- endassets %} diff --git a/app/templates/corpora/_analysis/concordance.html.j2 b/app/templates/corpora/_analysis/concordance.html.j2 index 5886a4b1..3faf20c0 100644 --- a/app/templates/corpora/_analysis/concordance.html.j2 +++ b/app/templates/corpora/_analysis/concordance.html.j2 @@ -154,6 +154,410 @@ Query your corpus with the CQP query language utilizing a KWIC view. + + {% endset %} {% set scripts %} diff --git a/app/templates/corpora/_analysis/static_visualization.html.j2 b/app/templates/corpora/_analysis/static_visualization.html.j2 new file mode 100644 index 00000000..c457cbf3 --- /dev/null +++ b/app/templates/corpora/_analysis/static_visualization.html.j2 @@ -0,0 +1,161 @@ +{% set name = 'Static Visualization (beta)' %} + +{% set description = '' %} + +{% set id_prefix = name.lower().replace(' ', '-') + '-extension' %} + +{% set tab_content = '' %} + +{% set container_content %} +
      +
      +

      query_stats{{ name }}

      +
      +
      +
      +
      +
      +
      +

      Tokens

      + +
      +
      +
      +
      +
      +
      +

      Sentences

      + +
      +
      +
      +
      +
      +
      +

      Unique words

      + +
      +
      +
      +
      +
      +
      +

      Unique lemmas

      + +
      +
      +
      +
      +
      +
      +

      Unique pos

      + +
      +
      +
      +
      +
      +
      +

      Unique simple_pos

      + +
      +
      +
      +
      +
      +
      +
      +
      + Proportions +

      of texts within the corpus

      +
      + incomplete_circle + sort +
      +
      +
      +
      +
      +
      + Text Information Overview +
      +
      +
      +
      +
      +
      +
      +
      +
      +
      + Frequencies + +
      +
      +
      +
      +
      + +
      +{% endset %} + +{% set modals %} +