mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-15 02:20:40 +00:00
Move Socket.IO Namespaces to dedicated directory
This commit is contained in:
223
app/namespaces/cqi_over_sio/__init__.py
Normal file
223
app/namespaces/cqi_over_sio/__init__.py
Normal file
@ -0,0 +1,223 @@
|
||||
from cqi import CQiClient
|
||||
from cqi.errors import CQiException
|
||||
from cqi.status import CQiStatus
|
||||
from flask import current_app
|
||||
from flask_login import current_user
|
||||
from flask_socketio import Namespace
|
||||
from inspect import signature
|
||||
from threading import Lock
|
||||
from app import db, docker_client, hashids, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus, CorpusStatus
|
||||
from . import extensions
|
||||
from .utils import CQiOverSocketIOSessionManager
|
||||
|
||||
|
||||
'''
|
||||
This package tunnels the Corpus Query interface (CQi) protocol through
|
||||
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
|
||||
|
||||
Basic concept:
|
||||
1. A client connects to the namespace.
|
||||
2. The client emits the "init" event and provides a corpus id for the corpus
|
||||
that should be analysed in this session.
|
||||
1.1 The analysis session counter of the corpus is incremented.
|
||||
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
|
||||
1.3 Wait until the CQP server is running.
|
||||
1.4 Connect the CQiClient to the server.
|
||||
1.5 Save the CQiClient, the Lock and the corpus id in the session for
|
||||
subsequential use.
|
||||
3. The client emits "exec" events, within which it provides the name of a CQi
|
||||
API function and the corresponding arguments.
|
||||
3.1 The "exec" event handler will execute the function, make sure that
|
||||
the result is serializable and returns the result back to the client.
|
||||
4. The client disconnects from the namespace
|
||||
4.1 The analysis session counter of the corpus is decremented.
|
||||
4.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
||||
'''
|
||||
|
||||
|
||||
CQI_API_FUNCTION_NAMES = [
|
||||
'ask_feature_cl_2_3',
|
||||
'ask_feature_cqi_1_0',
|
||||
'ask_feature_cqp_2_3',
|
||||
'cl_alg2cpos',
|
||||
'cl_attribute_size',
|
||||
'cl_cpos2alg',
|
||||
'cl_cpos2id',
|
||||
'cl_cpos2lbound',
|
||||
'cl_cpos2rbound',
|
||||
'cl_cpos2str',
|
||||
'cl_cpos2struc',
|
||||
'cl_drop_attribute',
|
||||
'cl_id2cpos',
|
||||
'cl_id2freq',
|
||||
'cl_id2str',
|
||||
'cl_idlist2cpos',
|
||||
'cl_lexicon_size',
|
||||
'cl_regex2id',
|
||||
'cl_str2id',
|
||||
'cl_struc2cpos',
|
||||
'cl_struc2str',
|
||||
'corpus_alignment_attributes',
|
||||
'corpus_charset',
|
||||
'corpus_drop_corpus',
|
||||
'corpus_full_name',
|
||||
'corpus_info',
|
||||
'corpus_list_corpora',
|
||||
'corpus_positional_attributes',
|
||||
'corpus_properties',
|
||||
'corpus_structural_attribute_has_values',
|
||||
'corpus_structural_attributes',
|
||||
'cqp_drop_subcorpus',
|
||||
'cqp_dump_subcorpus',
|
||||
'cqp_fdist_1',
|
||||
'cqp_fdist_2',
|
||||
'cqp_list_subcorpora',
|
||||
'cqp_query',
|
||||
'cqp_subcorpus_has_field',
|
||||
'cqp_subcorpus_size',
|
||||
'ctrl_bye',
|
||||
'ctrl_connect',
|
||||
'ctrl_last_general_error',
|
||||
'ctrl_ping',
|
||||
'ctrl_user_abort'
|
||||
]
|
||||
|
||||
|
||||
class CQiOverSocketIONamespace(Namespace):
|
||||
@socketio_login_required
|
||||
def on_connect(self):
|
||||
pass
|
||||
|
||||
@socketio_login_required
|
||||
def on_init(self, corpus_hashid: str) -> dict:
|
||||
corpus_id = hashids.decode(corpus_hashid)
|
||||
|
||||
if not isinstance(corpus_id, int):
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
|
||||
if corpus is None:
|
||||
return {'code': 404, 'msg': 'Not Found'}
|
||||
|
||||
if not (
|
||||
corpus.user == current_user
|
||||
or current_user.is_following_corpus(corpus)
|
||||
or current_user.is_administrator
|
||||
):
|
||||
return {'code': 403, 'msg': 'Forbidden'}
|
||||
|
||||
if corpus.status not in [
|
||||
CorpusStatus.BUILT,
|
||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||
]:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||
db.session.commit()
|
||||
retry_counter = 20
|
||||
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
||||
if retry_counter == 0:
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
return {'code': 408, 'msg': 'Request Timeout'}
|
||||
socketio.sleep(3)
|
||||
retry_counter -= 1
|
||||
db.session.refresh(corpus)
|
||||
|
||||
cqpserver_container_name = f'nopaque-cqpserver-{corpus_id}'
|
||||
cqpserver_container = docker_client.containers.get(cqpserver_container_name)
|
||||
cqpserver_ip_address = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
|
||||
cqi_client = CQiClient(cqpserver_ip_address)
|
||||
cqi_client_lock = Lock()
|
||||
|
||||
CQiOverSocketIOSessionManager.setup()
|
||||
CQiOverSocketIOSessionManager.set_corpus_id(corpus_id)
|
||||
CQiOverSocketIOSessionManager.set_cqi_client(cqi_client)
|
||||
CQiOverSocketIOSessionManager.set_cqi_client_lock(cqi_client_lock)
|
||||
|
||||
return {'code': 200, 'msg': 'OK'}
|
||||
|
||||
@socketio_login_required
|
||||
def on_exec(self, fn_name: str, fn_args: dict = {}) -> dict:
|
||||
try:
|
||||
cqi_client = CQiOverSocketIOSessionManager.get_cqi_client()
|
||||
cqi_client_lock = CQiOverSocketIOSessionManager.get_cqi_client_lock()
|
||||
except KeyError:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
|
||||
if fn_name in CQI_API_FUNCTION_NAMES:
|
||||
fn = getattr(cqi_client.api, fn_name)
|
||||
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
|
||||
fn = getattr(extensions, fn_name)
|
||||
else:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
|
||||
for param in signature(fn).parameters.values():
|
||||
# Check if the parameter is optional or required
|
||||
if param.default is param.empty:
|
||||
if param.name not in fn_args:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
else:
|
||||
if param.name not in fn_args:
|
||||
continue
|
||||
if type(fn_args[param.name]) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
|
||||
cqi_client_lock.acquire()
|
||||
try:
|
||||
fn_return_value = fn(**fn_args)
|
||||
except BrokenPipeError as e:
|
||||
return {'code': 500, 'msg': 'Internal Server Error'}
|
||||
except CQiException as e:
|
||||
return {
|
||||
'code': 502,
|
||||
'msg': 'Bad Gateway',
|
||||
'payload': {
|
||||
'code': e.code,
|
||||
'desc': e.description,
|
||||
'msg': e.__class__.__name__
|
||||
}
|
||||
}
|
||||
finally:
|
||||
cqi_client_lock.release()
|
||||
|
||||
if isinstance(fn_return_value, CQiStatus):
|
||||
payload = {
|
||||
'code': fn_return_value.code,
|
||||
'msg': fn_return_value.__class__.__name__
|
||||
}
|
||||
else:
|
||||
payload = fn_return_value
|
||||
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
def on_disconnect(self):
|
||||
try:
|
||||
corpus_id = CQiOverSocketIOSessionManager.get_corpus_id()
|
||||
cqi_client = CQiOverSocketIOSessionManager.get_cqi_client()
|
||||
cqi_client_lock = CQiOverSocketIOSessionManager.get_cqi_client_lock()
|
||||
CQiOverSocketIOSessionManager.teardown()
|
||||
except KeyError:
|
||||
return
|
||||
|
||||
cqi_client_lock.acquire()
|
||||
|
||||
try:
|
||||
cqi_client.api.ctrl_bye()
|
||||
except (BrokenPipeError, CQiException):
|
||||
pass
|
||||
|
||||
cqi_client_lock.release()
|
||||
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
|
||||
if corpus is None:
|
||||
return
|
||||
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
406
app/namespaces/cqi_over_sio/extensions.py
Normal file
406
app/namespaces/cqi_over_sio/extensions.py
Normal file
@ -0,0 +1,406 @@
|
||||
from collections import Counter
|
||||
from cqi.models.corpora import Corpus as CQiCorpus
|
||||
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
|
||||
from cqi.status import StatusOk as CQiStatusOk
|
||||
from flask import current_app
|
||||
import gzip
|
||||
import json
|
||||
import math
|
||||
from app import db
|
||||
from app.models import Corpus
|
||||
from .utils import CQiOverSocketIOSessionManager
|
||||
|
||||
|
||||
CQI_EXTENSION_FUNCTION_NAMES = [
|
||||
'ext_corpus_update_db',
|
||||
'ext_corpus_static_data',
|
||||
'ext_corpus_paginate_corpus',
|
||||
'ext_cqp_paginate_subcorpus',
|
||||
'ext_cqp_partial_export_subcorpus',
|
||||
'ext_cqp_export_subcorpus',
|
||||
]
|
||||
|
||||
|
||||
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
|
||||
corpus_id = CQiOverSocketIOSessionManager.get_corpus_id()
|
||||
cqi_client = CQiOverSocketIOSessionManager.get_cqi_client()
|
||||
db_corpus = Corpus.query.get(corpus_id)
|
||||
cqi_corpus = cqi_client.corpora.get(corpus)
|
||||
db_corpus.num_tokens = cqi_corpus.size
|
||||
db.session.commit()
|
||||
return CQiStatusOk()
|
||||
|
||||
|
||||
def ext_corpus_static_data(corpus: str) -> dict:
|
||||
corpus_id = CQiOverSocketIOSessionManager.get_corpus_id()
|
||||
db_corpus = Corpus.query.get(corpus_id)
|
||||
|
||||
static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz'
|
||||
if static_data_file_path.exists():
|
||||
with static_data_file_path.open('rb') as f:
|
||||
return f.read()
|
||||
|
||||
cqi_client = CQiOverSocketIOSessionManager.get_cqi_client()
|
||||
cqi_corpus = cqi_client.corpora.get(corpus)
|
||||
cqi_p_attrs = cqi_corpus.positional_attributes.list()
|
||||
cqi_s_attrs = cqi_corpus.structural_attributes.list()
|
||||
|
||||
static_data = {
|
||||
'corpus': {
|
||||
'bounds': [0, cqi_corpus.size - 1],
|
||||
'freqs': {}
|
||||
},
|
||||
'p_attrs': {},
|
||||
's_attrs': {},
|
||||
'values': {'p_attrs': {}, 's_attrs': {}}
|
||||
}
|
||||
|
||||
for p_attr in cqi_p_attrs:
|
||||
current_app.logger.info(f'corpus.freqs.{p_attr.name}')
|
||||
static_data['corpus']['freqs'][p_attr.name] = []
|
||||
p_attr_id_list = list(range(p_attr.lexicon_size))
|
||||
static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list))
|
||||
del p_attr_id_list
|
||||
|
||||
current_app.logger.info(f'p_attrs.{p_attr.name}')
|
||||
static_data['p_attrs'][p_attr.name] = []
|
||||
cpos_list = list(range(cqi_corpus.size))
|
||||
static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list))
|
||||
del cpos_list
|
||||
|
||||
current_app.logger.info(f'values.p_attrs.{p_attr.name}')
|
||||
static_data['values']['p_attrs'][p_attr.name] = []
|
||||
p_attr_id_list = list(range(p_attr.lexicon_size))
|
||||
static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list))
|
||||
del p_attr_id_list
|
||||
|
||||
for s_attr in cqi_s_attrs:
|
||||
if s_attr.has_values:
|
||||
continue
|
||||
|
||||
static_data['s_attrs'][s_attr.name] = {'lexicon': [], 'values': None}
|
||||
|
||||
if s_attr.name in ['s', 'ent']:
|
||||
##############################################################
|
||||
# A faster way to get cpos boundaries for smaller s_attrs #
|
||||
# Note: Needs more testing, don't use it in production #
|
||||
##############################################################
|
||||
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
|
||||
first_match = 0
|
||||
last_match = cqi_subcorpus.size - 1
|
||||
match_boundaries = zip(
|
||||
range(first_match, last_match + 1),
|
||||
cqi_subcorpus.dump(
|
||||
cqi_subcorpus.fields['match'],
|
||||
first_match,
|
||||
last_match
|
||||
),
|
||||
cqi_subcorpus.dump(
|
||||
cqi_subcorpus.fields['matchend'],
|
||||
first_match,
|
||||
last_match
|
||||
)
|
||||
)
|
||||
cqi_subcorpus.drop()
|
||||
del cqi_subcorpus, first_match, last_match
|
||||
for id, lbound, rbound in match_boundaries:
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
|
||||
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||
del match_boundaries
|
||||
|
||||
if s_attr.name != 'text':
|
||||
continue
|
||||
|
||||
for id in range(0, s_attr.size):
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
|
||||
# This is a very slow operation, thats why we only use it for
|
||||
# the text attribute
|
||||
lbound, rbound = s_attr.cpos_by_id(id)
|
||||
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
||||
cpos_list = list(range(lbound, rbound + 1))
|
||||
for p_attr in cqi_p_attrs:
|
||||
p_attr_ids = []
|
||||
p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list))
|
||||
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
|
||||
del p_attr_ids
|
||||
del cpos_list
|
||||
|
||||
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
||||
current_app.logger.info(f's_attrs.{s_attr.name}.values')
|
||||
static_data['s_attrs'][s_attr.name]['values'] = [
|
||||
sub_s_attr.name[(len(s_attr.name) + 1):]
|
||||
for sub_s_attr in sub_s_attrs
|
||||
]
|
||||
s_attr_id_list = list(range(s_attr.size))
|
||||
sub_s_attr_values = []
|
||||
for sub_s_attr in sub_s_attrs:
|
||||
tmp = []
|
||||
tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list))
|
||||
sub_s_attr_values.append(tmp)
|
||||
del tmp
|
||||
del s_attr_id_list
|
||||
current_app.logger.info(f'values.s_attrs.{s_attr.name}')
|
||||
static_data['values']['s_attrs'][s_attr.name] = [
|
||||
{
|
||||
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id]
|
||||
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
||||
static_data['s_attrs'][s_attr.name]['values']
|
||||
)
|
||||
} for s_attr_id in range(0, s_attr.size)
|
||||
]
|
||||
del sub_s_attr_values
|
||||
current_app.logger.info('Saving static data to file')
|
||||
with gzip.open(static_data_file_path, 'wt') as f:
|
||||
json.dump(static_data, f)
|
||||
del static_data
|
||||
current_app.logger.info('Sending static data to client')
|
||||
with open(static_data_file_path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def ext_corpus_paginate_corpus(
|
||||
corpus: str,
|
||||
page: int = 1,
|
||||
per_page: int = 20
|
||||
) -> dict:
|
||||
cqi_client = CQiOverSocketIOSessionManager.get_cqi_client()
|
||||
cqi_corpus = cqi_client.corpora.get(corpus)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_corpus.size > 0
|
||||
and page > math.ceil(cqi_corpus.size / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
first_cpos = (page - 1) * per_page
|
||||
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
|
||||
cpos_list = [*range(first_cpos, last_cpos)]
|
||||
lookups = _lookups_by_cpos(cqi_corpus, cpos_list)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = [cpos_list]
|
||||
# the lookups for the items
|
||||
payload['lookups'] = lookups
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_corpus.size
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return payload
|
||||
|
||||
|
||||
def ext_cqp_paginate_subcorpus(
|
||||
subcorpus: str,
|
||||
context: int = 50,
|
||||
page: int = 1,
|
||||
per_page: int = 20
|
||||
) -> dict:
|
||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||
cqi_client = CQiOverSocketIOSessionManager.get_cqi_client()
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_subcorpus.size > 0
|
||||
and page > math.ceil(cqi_subcorpus.size / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
offset = (page - 1) * per_page
|
||||
cutoff = per_page
|
||||
cqi_results_export = _export_subcorpus(
|
||||
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = cqi_results_export.pop('matches')
|
||||
# the lookups for the items
|
||||
payload['lookups'] = cqi_results_export
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_subcorpus.size
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return payload
|
||||
|
||||
|
||||
def ext_cqp_partial_export_subcorpus(
|
||||
subcorpus: str,
|
||||
match_id_list: list,
|
||||
context: int = 50
|
||||
) -> dict:
|
||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||
cqi_client = CQiOverSocketIOSessionManager.get_cqi_client()
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_partial_export = _partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
||||
return cqi_subcorpus_partial_export
|
||||
|
||||
|
||||
def ext_cqp_export_subcorpus(subcorpus: str, context: int = 50) -> dict:
|
||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||
cqi_client = CQiOverSocketIOSessionManager.get_cqi_client()
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_export = _export_subcorpus(cqi_subcorpus, context=context)
|
||||
return cqi_subcorpus_export
|
||||
|
||||
|
||||
def _lookups_by_cpos(corpus: CQiCorpus, cpos_list: list[int]) -> dict:
|
||||
lookups = {}
|
||||
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
||||
for attr in corpus.positional_attributes.list():
|
||||
cpos_attr_values = attr.values_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
|
||||
for attr in corpus.structural_attributes.list():
|
||||
# We only want to iterate over non subattributes, identifiable by
|
||||
# attr.has_values == False
|
||||
if attr.has_values:
|
||||
continue
|
||||
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
if cpos_attr_ids[i] == -1:
|
||||
continue
|
||||
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
|
||||
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
||||
if len(occured_attr_ids) == 0:
|
||||
continue
|
||||
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
||||
if len(subattrs) == 0:
|
||||
continue
|
||||
lookup_name = f'{attr.name}_lookup'
|
||||
lookups[lookup_name] = {}
|
||||
for attr_id in occured_attr_ids:
|
||||
lookups[lookup_name][attr_id] = {}
|
||||
for subattr in subattrs:
|
||||
subattr_name = subattr.name[(len(attr.name) + 1):] # noqa
|
||||
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
||||
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
||||
return lookups
|
||||
|
||||
|
||||
def _partial_export_subcorpus(
|
||||
subcorpus: CQiSubcorpus,
|
||||
match_id_list: list[int],
|
||||
context: int = 25
|
||||
) -> dict:
|
||||
if subcorpus.size == 0:
|
||||
return {'matches': []}
|
||||
match_boundaries = []
|
||||
for match_id in match_id_list:
|
||||
if match_id < 0 or match_id >= subcorpus.size:
|
||||
continue
|
||||
match_boundaries.append(
|
||||
(
|
||||
match_id,
|
||||
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
|
||||
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
|
||||
)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_boundary in match_boundaries:
|
||||
match_num, match_start, match_end = match_boundary
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.size - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = _lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
||||
|
||||
|
||||
def _export_subcorpus(
|
||||
subcorpus: CQiSubcorpus,
|
||||
context: int = 25,
|
||||
cutoff: float = float('inf'),
|
||||
offset: int = 0
|
||||
) -> dict:
|
||||
if subcorpus.size == 0:
|
||||
return {'matches': []}
|
||||
first_match = max(0, offset)
|
||||
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
|
||||
match_boundaries = zip(
|
||||
range(first_match, last_match + 1),
|
||||
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
|
||||
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_num, match_start, match_end in match_boundaries:
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.size - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = _lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
37
app/namespaces/cqi_over_sio/utils.py
Normal file
37
app/namespaces/cqi_over_sio/utils.py
Normal file
@ -0,0 +1,37 @@
|
||||
from cqi import CQiClient
|
||||
from threading import Lock
|
||||
from flask import session
|
||||
|
||||
|
||||
class CQiOverSocketIOSessionManager:
|
||||
@staticmethod
|
||||
def setup():
|
||||
session['cqi_over_sio'] = {}
|
||||
|
||||
@staticmethod
|
||||
def teardown():
|
||||
session.pop('cqi_over_sio')
|
||||
|
||||
@staticmethod
|
||||
def set_corpus_id(corpus_id: int):
|
||||
session['cqi_over_sio']['corpus_id'] = corpus_id
|
||||
|
||||
@staticmethod
|
||||
def get_corpus_id() -> int:
|
||||
return session['cqi_over_sio']['corpus_id']
|
||||
|
||||
@staticmethod
|
||||
def set_cqi_client(cqi_client: CQiClient):
|
||||
session['cqi_over_sio']['cqi_client'] = cqi_client
|
||||
|
||||
@staticmethod
|
||||
def get_cqi_client() -> CQiClient:
|
||||
return session['cqi_over_sio']['cqi_client']
|
||||
|
||||
@staticmethod
|
||||
def set_cqi_client_lock(cqi_client_lock: Lock):
|
||||
session['cqi_over_sio']['cqi_client_lock'] = cqi_client_lock
|
||||
|
||||
@staticmethod
|
||||
def get_cqi_client_lock() -> Lock:
|
||||
return session['cqi_over_sio']['cqi_client_lock']
|
Reference in New Issue
Block a user