diff --git a/app/corpora/__init__.py b/app/corpora/__init__.py index 65129bf6..9ad47224 100644 --- a/app/corpora/__init__.py +++ b/app/corpora/__init__.py @@ -16,5 +16,4 @@ def before_request(): pass -from . import cli, cqi_over_socketio, files, followers, routes, json_routes -from . import cqi_over_sio +from . import cli, cqi_over_sio, files, followers, routes, json_routes diff --git a/app/corpora/cqi_over_sio/extensions/__init__.py b/app/corpora/cqi_over_sio/extensions/__init__.py index 455105fb..09f9210a 100644 --- a/app/corpora/cqi_over_sio/extensions/__init__.py +++ b/app/corpora/cqi_over_sio/extensions/__init__.py @@ -167,7 +167,7 @@ def corpus_paginate_corpus( payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None # Number of the next page payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None - return {'code': 200, 'msg': 'OK', 'payload': payload} + return payload def cqp_paginate_subcorpus( @@ -215,7 +215,7 @@ def cqp_paginate_subcorpus( payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None # Number of the next page payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None - return {'code': 200, 'msg': 'OK', 'payload': payload} + return payload def cqp_partial_export_subcorpus( @@ -228,7 +228,7 @@ def cqp_partial_export_subcorpus( cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context) - return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export} + return cqi_subcorpus_partial_export def cqp_export_subcorpus( @@ -240,4 +240,4 @@ def cqp_export_subcorpus( cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context) - return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export} + return cqi_subcorpus_export diff --git a/app/corpora/cqi_over_socketio/__init__.py b/app/corpora/cqi_over_socketio/__init__.py deleted file mode 100644 index cd02bedd..00000000 --- a/app/corpora/cqi_over_socketio/__init__.py +++ /dev/null @@ -1,115 +0,0 @@ -from flask import session -from flask_login import current_user -from flask_socketio import ConnectionRefusedError -from threading import Lock -import cqi -from app import db, hashids, socketio -from app.decorators import socketio_login_required -from app.models import Corpus, CorpusStatus - - -''' -This package tunnels the Corpus Query interface (CQi) protocol through -Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event. - -This module only handles the SIO connect/disconnect, which handles the setup -and teardown of necessary ressources for later use. Each CQi function has a -corresponding SIO event. The event handlers are spread across the different -modules within this package. - -Basic concept: -1. A client connects to the SIO namespace and provides the id of a corpus to be - analysed. - 1.1 The analysis session counter of the corpus is incremented. - 1.2 A CQiClient and a (Mutex) Lock belonging to it is created. - 1.3 Wait until the CQP server is running. - 1.4 Connect the CQiClient to the server. - 1.5 Save the CQiClient and the Lock in the session for subsequential use. -2. A client emits an event and may provide a single json object with necessary - arguments for the targeted CQi function. -3. A SIO event handler (decorated with cqi_over_socketio) gets executed. - - The event handler function defines all arguments. Hence the client - is sent as a single json object, the decorator decomposes it to fit - the functions signature. This also includes type checking and proper - use of the lock (acquire/release) mechanism. -4. Wait for more events -5. The client disconnects from the SIO namespace - 1.1 The analysis session counter of the corpus is decremented. - 1.2 The CQiClient and (Mutex) Lock belonging to it are teared down. -''' - - -NAMESPACE = '/corpora/corpus/corpus_analysis' - - -# Import all CQi over Socket.IO event handlers -from .cqi_corpora_corpus_subcorpora import * # noqa -from .cqi_corpora_corpus_structural_attributes import * # noqa -from .cqi_corpora_corpus_positional_attributes import * # noqa -from .cqi_corpora_corpus_alignment_attributes import * # noqa -from .cqi_corpora_corpus import * # noqa -from .cqi_corpora import * # noqa -from .cqi import * # noqa - - -@socketio.on('connect', namespace=NAMESPACE) -@socketio_login_required -def connect(auth): - # the auth variable is used in a hacky way. It contains the corpus id for - # which a corpus analysis session should be started. - corpus_id = hashids.decode(auth['corpus_id']) - corpus = Corpus.query.get(corpus_id) - if corpus is None: - # return {'code': 404, 'msg': 'Not Found'} - raise ConnectionRefusedError('Not Found') - if not (corpus.user == current_user - or current_user.is_following_corpus(corpus) - or current_user.is_administrator()): - # return {'code': 403, 'msg': 'Forbidden'} - raise ConnectionRefusedError('Forbidden') - if corpus.status not in [ - CorpusStatus.BUILT, - CorpusStatus.STARTING_ANALYSIS_SESSION, - CorpusStatus.RUNNING_ANALYSIS_SESSION, - CorpusStatus.CANCELING_ANALYSIS_SESSION - ]: - # return {'code': 424, 'msg': 'Failed Dependency'} - raise ConnectionRefusedError('Failed Dependency') - if corpus.num_analysis_sessions is None: - corpus.num_analysis_sessions = 0 - db.session.commit() - corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1 - db.session.commit() - retry_counter = 20 - while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION: - if retry_counter == 0: - corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 - db.session.commit() - return {'code': 408, 'msg': 'Request Timeout'} - socketio.sleep(3) - retry_counter -= 1 - db.session.refresh(corpus) - cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}') - session['d'] = { - 'corpus_id': corpus_id, - 'cqi_client': cqi_client, - 'cqi_client_lock': Lock(), - } - # return {'code': 200, 'msg': 'OK'} - - -@socketio.on('disconnect', namespace=NAMESPACE) -def disconnect(): - if 'd' not in session: - return - session['d']['cqi_client_lock'].acquire() - try: - session['d']['cqi_client'].disconnect() - except (BrokenPipeError, cqi.errors.CQiException): - pass - session['d']['cqi_client_lock'].release() - corpus = Corpus.query.get(session['d']['corpus_id']) - corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 - db.session.commit() - session.pop('d') - # return {'code': 200, 'msg': 'OK'} diff --git a/app/corpora/cqi_over_socketio/cqi.py b/app/corpora/cqi_over_socketio/cqi.py deleted file mode 100644 index 402b6283..00000000 --- a/app/corpora/cqi_over_socketio/cqi.py +++ /dev/null @@ -1,43 +0,0 @@ -from socket import gaierror -import cqi -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio - - -@socketio.on('cqi.connect', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_connect(cqi_client: cqi.CQiClient): - try: - cqi_status = cqi_client.connect() - except gaierror as e: - return { - 'code': 500, - 'msg': 'Internal Server Error', - 'payload': {'code': e.args[0], 'desc': e.args[1]} - } - payload = {'code': cqi_status.code, - 'msg': cqi_status.__class__.__name__} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.disconnect', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_disconnect(cqi_client: cqi.CQiClient): - cqi_status = cqi_client.disconnect() - payload = {'code': cqi_status.code, - 'msg': cqi_status.__class__.__name__} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.ping', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_ping(cqi_client: cqi.CQiClient): - cqi_status = cqi_client.ping() - payload = {'code': cqi_status.code, - 'msg': cqi_status.__class__.__name__} - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora.py b/app/corpora/cqi_over_socketio/cqi_corpora.py deleted file mode 100644 index 7b73429a..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora.py +++ /dev/null @@ -1,22 +0,0 @@ -import cqi -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio - - -@socketio.on('cqi.corpora.get', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str): - cqi_corpus = cqi_client.corpora.get(corpus_name) - payload = {**cqi_corpus.attrs} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.list', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_list(cqi_client: cqi.CQiClient): - payload = [{**x.attrs} for x in cqi_client.corpora.list()] - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py deleted file mode 100644 index 1c15e2f0..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py +++ /dev/null @@ -1,199 +0,0 @@ -from collections import Counter -from flask import session -import cqi -import json -import math -import os -from app import db, socketio -from app.decorators import socketio_login_required -from app.models import Corpus -from . import NAMESPACE as ns -from .utils import cqi_over_socketio, lookups_by_cpos - - -@socketio.on('cqi.corpora.corpus.drop', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str): - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_status = cqi_corpus.drop() - payload = {'code': cqi_status.code, - 'msg': cqi_status.__class__.__name__} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.query', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_status = cqi_corpus.query(subcorpus_name, query) - payload = {'code': cqi_status.code, - 'msg': cqi_status.__class__.__name__} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -############################################################################### -# nopaque specific CQi extensions # -############################################################################### -@socketio.on('cqi.corpora.corpus.update_db', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str): - corpus = Corpus.query.get(session['d']['corpus_id']) - cqi_corpus = cqi_client.corpora.get(corpus_name) - corpus.num_tokens = cqi_corpus.size - db.session.commit() - - -@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str): - corpus = Corpus.query.get(session['d']['corpus_id']) - visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json') - if os.path.exists(visualization_data_file_path): - with open(visualization_data_file_path, 'r') as f: - payload = json.load(f) - return {'code': 200, 'msg': 'OK', 'payload': payload} - cqi_corpus = cqi_client.corpora.get(corpus_name) - ########################################################################## - # A faster way to get cpos boundaries for smaller s_attrs # - ########################################################################## - # cqi_corpus.query('Last', ' []* ;') - # cqi_subcorpus = cqi_corpus.subcorpora.get('Last') - # print(cqi_subcorpus.size) - # first_match = 0 - # last_match = cqi_subcorpus.attrs['size'] - 1 - # match_boundaries = zip( - # list(range(first_match, last_match + 1)), - # cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match), - # cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match) - # ) - # for x in match_boundaries: - # print(x) - cqi_p_attrs = { - p_attr.name: p_attr - for p_attr in cqi_corpus.positional_attributes.list() - } - cqi_s_attrs = { - s_attr.name: s_attr - for s_attr in cqi_corpus.structural_attributes.list() - } - payload = { - 'corpus': { - 'bounds': [0, cqi_corpus.size - 1], - 'counts': { - 'token': cqi_corpus.size - }, - 'freqs': {} - }, - 'p_attrs': {}, - 's_attrs': {}, - 'values': {'p_attrs': {}, 's_attrs': {}} - } - for p_attr in cqi_p_attrs.values(): - payload['corpus']['freqs'][p_attr.name] = dict( - zip( - range(0, p_attr.lexicon_size), - p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size))) - ) - ) - payload['p_attrs'][p_attr.name] = dict( - zip( - range(0, cqi_corpus.size), - p_attr.ids_by_cpos(list(range(0, cqi_corpus.size))) - ) - ) - payload['values']['p_attrs'][p_attr.name] = dict( - zip( - range(0, p_attr.lexicon_size), - p_attr.values_by_ids(list(range(0, p_attr.lexicon_size))) - ) - ) - for s_attr in cqi_s_attrs.values(): - if s_attr.has_values: - continue - payload['corpus']['counts'][s_attr.name] = s_attr.size - payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None} - payload['values']['s_attrs'][s_attr.name] = {} - for id in range(0, s_attr.size): - payload['s_attrs'][s_attr.name]['lexicon'][id] = {} - lbound, rbound = s_attr.cpos_by_id(id) - payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound] - payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {} - payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1 - if s_attr.name not in ['text', 's']: - continue - cpos_range = range(lbound, rbound + 1) - payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1}) - if s_attr.name != 'text': - continue - payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1}) - payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {} - for p_attr in cqi_p_attrs.values(): - payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range)))) - sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr}) - s_attr_value_names = [ - sub_s_attr.name[(len(s_attr.name) + 1):] - for sub_s_attr in sub_s_attrs - ] - sub_s_attr_values = [ - sub_s_attr.values_by_ids(list(range(0, s_attr.size))) - for sub_s_attr in sub_s_attrs - ] - payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names - payload['values']['s_attrs'][s_attr.name] = { - s_attr_id: { - s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx] - for s_attr_value_name_idx, s_attr_value_name in enumerate( - payload['s_attrs'][s_attr.name]['values'] - ) - } for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size)) - } - with open(visualization_data_file_path, 'w') as f: - json.dump(payload, f) - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.paginate', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - # Sanity checks - if ( - per_page < 1 - or page < 1 - or ( - cqi_corpus.size > 0 - and page > math.ceil(cqi_corpus.size / per_page) - ) - ): - return {'code': 416, 'msg': 'Range Not Satisfiable'} - first_cpos = (page - 1) * per_page - last_cpos = min(cqi_corpus.size, first_cpos + per_page) - cpos_list = [*range(first_cpos, last_cpos)] - lookups = lookups_by_cpos(cqi_corpus, cpos_list) - payload = {} - # the items for the current page - payload['items'] = [cpos_list] - # the lookups for the items - payload['lookups'] = lookups - # the total number of items matching the query - payload['total'] = cqi_corpus.size - # the number of items to be displayed on a page. - payload['per_page'] = per_page - # The total number of pages - payload['pages'] = math.ceil(payload['total'] / payload['per_page']) - # the current page number (1 indexed) - payload['page'] = page if payload['pages'] > 0 else None - # True if a previous page exists - payload['has_prev'] = payload['page'] > 1 if payload['page'] else False - # True if a next page exists. - payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa - # Number of the previous page. - payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None - # Number of the next page - payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py deleted file mode 100644 index d6382eaa..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_alignment_attributes.py +++ /dev/null @@ -1,24 +0,0 @@ -import cqi -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio - - -@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa - payload = {**cqi_alignment_attribute.attrs} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()] - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py deleted file mode 100644 index 41d55bb3..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_positional_attributes.py +++ /dev/null @@ -1,24 +0,0 @@ -import cqi -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio - - -@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa - payload = {**cqi_positional_attribute.attrs} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()] - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py deleted file mode 100644 index 11178906..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_structural_attributes.py +++ /dev/null @@ -1,24 +0,0 @@ -import cqi -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio - - -@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa - payload = {**cqi_structural_attribute.attrs} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()] - return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py deleted file mode 100644 index 01c0b048..00000000 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py +++ /dev/null @@ -1,140 +0,0 @@ -import cqi -import math -from app import socketio -from app.decorators import socketio_login_required -from . import NAMESPACE as ns -from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus - - -@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - payload = {**cqi_subcorpus.attrs} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()] - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - cqi_status = cqi_subcorpus.drop() - payload = {'code': cqi_status.code, - 'msg': cqi_status.__class__.__name__} - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, field: int, first: int, last: int): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - payload = cqi_subcorpus.dump(field, first, last) - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_name: str, positional_attribute_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - field = cqi_subcorpus.fields[field_name] - pos_attr = cqi_corpus.positional_attributes.get(positional_attribute_name) - payload = cqi_subcorpus.fdist_1(cutoff, field, pos_attr) - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_1_name: str, positional_attribute_1_name: str, field_2_name: str, positional_attribute_2_name: str): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - field_1 = cqi_subcorpus.fields[field_1_name] - pos_attr_1 = cqi_corpus.positional_attributes.get(positional_attribute_1_name) - field_2 = cqi_subcorpus.fields[field_2_name] - pos_attr_2 = cqi_corpus.positional_attributes.get(positional_attribute_2_name) - payload = cqi_subcorpus.fdist_2(cutoff, field_1, pos_attr_1, field_2, pos_attr_2) - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -############################################################################### -# nopaque specific CQi extensions # -############################################################################### -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - # Sanity checks - if ( - per_page < 1 - or page < 1 - or ( - cqi_subcorpus.attrs['size'] > 0 - and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page) - ) - ): - return {'code': 416, 'msg': 'Range Not Satisfiable'} - offset = (page - 1) * per_page - cutoff = per_page - cqi_results_export = export_subcorpus( - cqi_subcorpus, context=context, cutoff=cutoff, offset=offset) - payload = {} - # the items for the current page - payload['items'] = cqi_results_export.pop('matches') - # the lookups for the items - payload['lookups'] = cqi_results_export - # the total number of items matching the query - payload['total'] = cqi_subcorpus.attrs['size'] - # the number of items to be displayed on a page. - payload['per_page'] = per_page - # The total number of pages - payload['pages'] = math.ceil(payload['total'] / payload['per_page']) - # the current page number (1 indexed) - payload['page'] = page if payload['pages'] > 0 else None - # True if a previous page exists - payload['has_prev'] = payload['page'] > 1 if payload['page'] else False - # True if a next page exists. - payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa - # Number of the previous page. - payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None - # Number of the next page - payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None - return {'code': 200, 'msg': 'OK', 'payload': payload} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context) - return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export} - - -@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns) -@socketio_login_required -@cqi_over_socketio -def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa - cqi_corpus = cqi_client.corpora.get(corpus_name) - cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) - cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context) - return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export} diff --git a/app/corpora/cqi_over_socketio/utils.py b/app/corpora/cqi_over_socketio/utils.py deleted file mode 100644 index 14e71e2b..00000000 --- a/app/corpora/cqi_over_socketio/utils.py +++ /dev/null @@ -1,178 +0,0 @@ -from flask import session -from functools import wraps -from inspect import signature -import cqi - - -def cqi_over_socketio(f): - @wraps(f) - def wrapped(*args): - if 'd' not in session: - return {'code': 424, 'msg': 'Failed Dependency'} - f_args = {} - # Check for missing args and if all provided args are of the right type - for param in signature(f).parameters.values(): - if param.name == 'corpus_name': - f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}' - continue - if param.name == 'cqi_client': - f_args[param.name] = session['d']['cqi_client'] - continue - if param.default is param.empty: - # args - if param.name not in args[0]: - return {'code': 400, 'msg': 'Bad Request'} - arg = args[0][param.name] - if type(arg) is not param.annotation: - return {'code': 400, 'msg': 'Bad Request'} - f_args[param.name] = arg - else: - # kwargs - if param.name not in args[0]: - continue - arg = args[0][param.name] - if type(arg) is not param.annotation: - return {'code': 400, 'msg': 'Bad Request'} - f_args[param.name] = arg - session['d']['cqi_client_lock'].acquire() - try: - return_value = f(**f_args) - except BrokenPipeError: - return_value = { - 'code': 500, - 'msg': 'Internal Server Error' - } - except cqi.errors.CQiException as e: - return_value = { - 'code': 500, - 'msg': 'Internal Server Error', - 'payload': { - 'code': e.code, - 'desc': e.description, - 'msg': e.__class__.__name__ - } - } - finally: - session['d']['cqi_client_lock'].release() - return return_value - return wrapped - - -def lookups_by_cpos(corpus, cpos_list): - lookups = {} - lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list} - for attr in corpus.positional_attributes.list(): - cpos_attr_values = attr.values_by_cpos(cpos_list) - for i, cpos in enumerate(cpos_list): - lookups['cpos_lookup'][cpos][attr.attrs['name']] = \ - cpos_attr_values[i] - for attr in corpus.structural_attributes.list(): - # We only want to iterate over non subattributes, identifiable by - # attr.attrs['has_values'] == False - if attr.attrs['has_values']: - continue - cpos_attr_ids = attr.ids_by_cpos(cpos_list) - for i, cpos in enumerate(cpos_list): - if cpos_attr_ids[i] == -1: - continue - lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i] - occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1] - if not occured_attr_ids: - continue - subattrs = corpus.structural_attributes.list(filters={'part_of': attr}) - if not subattrs: - continue - lookup_name = f'{attr.attrs["name"]}_lookup' - lookups[lookup_name] = {} - for attr_id in occured_attr_ids: - lookups[lookup_name][attr_id] = {} - for subattr in subattrs: - subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa - for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa - lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa - return lookups - - -def partial_export_subcorpus(subcorpus, match_id_list, context=25): - if subcorpus.attrs['size'] == 0: - return {"matches": []} - match_boundaries = [] - for match_id in match_id_list: - if match_id < 0 or match_id >= subcorpus.attrs['size']: - continue - match_boundaries.append( - ( - match_id, - subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0], - subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0] - ) - ) - cpos_set = set() - matches = [] - for match_boundary in match_boundaries: - match_num, match_start, match_end = match_boundary - c = (match_start, match_end) - if match_start == 0 or context == 0: - lc = None - cpos_list_lbound = match_start - else: - lc_lbound = max(0, (match_start - context)) - lc_rbound = match_start - 1 - lc = (lc_lbound, lc_rbound) - cpos_list_lbound = lc_lbound - if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0: - rc = None - cpos_list_rbound = match_end - else: - rc_lbound = match_end + 1 - rc_rbound = min( - (match_end + context), - (subcorpus.collection.corpus.attrs['size'] - 1) - ) - rc = (rc_lbound, rc_rbound) - cpos_list_rbound = rc_rbound - match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc} - matches.append(match) - cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1)) - lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set)) - return {'matches': matches, **lookups} - - -def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0): - if subcorpus.attrs['size'] == 0: - return {"matches": []} - first_match = max(0, offset) - last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1)) - match_boundaries = zip( - list(range(first_match, last_match + 1)), - subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match), - subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match) - ) - cpos_set = set() - matches = [] - for match_num, match_start, match_end in match_boundaries: - c = (match_start, match_end) - if match_start == 0 or context == 0: - lc = None - cpos_list_lbound = match_start - else: - lc_lbound = max(0, (match_start - context)) - lc_rbound = match_start - 1 - lc = (lc_lbound, lc_rbound) - cpos_list_lbound = lc_lbound - if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0: - rc = None - cpos_list_rbound = match_end - else: - rc_lbound = match_end + 1 - rc_rbound = min( - (match_end + context), - (subcorpus.collection.corpus.attrs['size'] - 1) - ) - rc = (rc_lbound, rc_rbound) - cpos_list_rbound = rc_rbound - match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc} - matches.append(match) - cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1)) - lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set)) - return {'matches': matches, **lookups} diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisConcordance.js b/app/static/js/CorpusAnalysis/CorpusAnalysisConcordance.js index bf6bc77c..7c9b57c1 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisConcordance.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisConcordance.js @@ -237,7 +237,7 @@ class CorpusAnalysisConcordance { app.flash('No matches selected', 'error'); return; } - promise = subcorpus.o.partial_export([...subcorpus.selectedItems], 50); + promise = subcorpus.o.partialExport([...subcorpus.selectedItems], 50); } else { promise = subcorpus.o.export(50); }