Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update

This commit is contained in:
Inga Kirschnick
2023-07-03 11:06:59 +02:00
28 changed files with 2197 additions and 122941 deletions

View File

@ -16,4 +16,4 @@ def before_request():
pass
from . import cli, cqi_over_socketio, files, followers, routes, json_routes
from . import cli, cqi_over_sio, files, followers, routes, json_routes

View File

@ -1,8 +1,9 @@
from cqi import CQiClient
from cqi.errors import CQiException
from flask import session
from flask_login import current_user
from flask_socketio import ConnectionRefusedError
from threading import Lock
import cqi
from app import db, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusStatus
@ -39,16 +40,9 @@ Basic concept:
'''
NAMESPACE = '/corpora/corpus/corpus_analysis'
NAMESPACE = '/cqi_over_sio'
# Import all CQi over Socket.IO event handlers
from .cqi_corpora_corpus_subcorpora import * # noqa
from .cqi_corpora_corpus_structural_attributes import * # noqa
from .cqi_corpora_corpus_positional_attributes import * # noqa
from .cqi_corpora_corpus_alignment_attributes import * # noqa
from .cqi_corpora_corpus import * # noqa
from .cqi_corpora import * # noqa
from .cqi import * # noqa
@ -89,8 +83,8 @@ def connect(auth):
socketio.sleep(3)
retry_counter -= 1
db.session.refresh(corpus)
cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}')
session['d'] = {
cqi_client = CQiClient(f'cqpserver_{corpus_id}')
session['cqi_over_sio'] = {
'corpus_id': corpus_id,
'cqi_client': cqi_client,
'cqi_client_lock': Lock(),
@ -100,16 +94,19 @@ def connect(auth):
@socketio.on('disconnect', namespace=NAMESPACE)
def disconnect():
if 'd' not in session:
return
session['d']['cqi_client_lock'].acquire()
try:
session['d']['cqi_client'].disconnect()
except (BrokenPipeError, cqi.errors.CQiException):
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
except KeyError:
return
cqi_client_lock.acquire()
try:
cqi_client.api.ctrl_bye()
except (BrokenPipeError, CQiException):
pass
session['d']['cqi_client_lock'].release()
corpus = Corpus.query.get(session['d']['corpus_id'])
cqi_client_lock.release()
corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
session.pop('d')
session.pop('cqi_over_sio')
# return {'code': 200, 'msg': 'OK'}

View File

@ -0,0 +1,120 @@
from cqi import CQiClient
from cqi.errors import CQiException
from cqi.status import CQiStatus
from flask import session
from inspect import signature
from threading import Lock
from typing import Callable, Dict, List
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .extensions import CQI_EXTENSION_FUNCTION_NAMES
from . import extensions as extensions_module
CQI_FUNCTION_NAMES: List[str] = [
'ask_feature_cl_2_3',
'ask_feature_cqi_1_0',
'ask_feature_cqp_2_3',
'cl_alg2cpos',
'cl_attribute_size',
'cl_cpos2alg',
'cl_cpos2id',
'cl_cpos2lbound',
'cl_cpos2rbound',
'cl_cpos2str',
'cl_cpos2struc',
'cl_drop_attribute',
'cl_id2cpos',
'cl_id2freq',
'cl_id2str',
'cl_idlist2cpos',
'cl_lexicon_size',
'cl_regex2id',
'cl_str2id',
'cl_struc2cpos',
'cl_struc2str',
'corpus_alignment_attributes',
'corpus_charset',
'corpus_drop_corpus',
'corpus_full_name',
'corpus_info',
'corpus_list_corpora',
'corpus_positional_attributes',
'corpus_properties',
'corpus_structural_attribute_has_values',
'corpus_structural_attributes',
'cqp_drop_subcorpus',
'cqp_dump_subcorpus',
'cqp_fdist_1',
'cqp_fdist_2',
'cqp_list_subcorpora',
'cqp_query',
'cqp_subcorpus_has_field',
'cqp_subcorpus_size',
'ctrl_bye',
'ctrl_connect',
'ctrl_last_general_error',
'ctrl_ping',
'ctrl_user_abort'
]
@socketio.on('cqi', namespace=ns)
@socketio_login_required
def cqi_over_sio(fn_data):
try:
fn_name: str = fn_data['fn_name']
except KeyError:
return {'code': 400, 'msg': 'Bad Request'}
fn_name: str = fn_data['fn_name']
fn_args: Dict = fn_data.get('fn_args', {})
try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
except KeyError:
return {'code': 424, 'msg': 'Failed Dependency'}
if fn_name in CQI_FUNCTION_NAMES:
fn: Callable = getattr(cqi_client.api, fn_name)
elif fn_name in CQI_EXTENSION_FUNCTION_NAMES:
fn_args['cqi_client'] = cqi_client
fn: Callable = getattr(extensions_module, fn_name)
else:
return {'code': 400, 'msg': 'Bad Request'}
for param in signature(fn).parameters.values():
if param.default is param.empty:
if param.name not in fn_args:
return {'code': 400, 'msg': 'Bad Request'}
else:
if param.name not in fn_args:
continue
if type(fn_args[param.name]) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
cqi_client_lock.acquire()
try:
return_value = fn(**fn_args)
except BrokenPipeError:
return_value = {
'code': 500,
'msg': 'Internal Server Error'
}
except CQiException as e:
return_value = {
'code': 502,
'msg': 'Bad Gateway',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.__class__.__name__
}
}
finally:
cqi_client_lock.release()
if isinstance(return_value, CQiStatus):
payload = {
'code': return_value.code,
'msg': return_value.__class__.__name__
}
else:
payload = return_value
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,254 @@
from collections import Counter
from cqi import CQiClient
from cqi.models.corpora import Corpus
from cqi.status import StatusOk
from flask import session
from typing import Dict, List
import json
import math
import os
from app import db
from app.models import Corpus
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
'ext_corpus_update_db',
'ext_corpus_static_data',
'ext_corpus_paginate_corpus',
'ext_cqp_paginate_subcorpus',
'ext_cqp_partial_export_subcorpus',
'ext_cqp_export_subcorpus',
]
def ext_corpus_update_db(cqi_client: CQiClient, corpus: str):
db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
cqi_corpus = cqi_client.corpora.get(corpus)
db_corpus.num_tokens = cqi_corpus.size
db.session.commit()
return StatusOk()
def ext_corpus_static_data(cqi_client: CQiClient, corpus: str) -> Dict:
db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
static_corpus_data_file = os.path.join(db_corpus.path, 'cwb', 'static.json')
if os.path.exists(static_corpus_data_file):
with open(static_corpus_data_file, 'r') as f:
return json.load(f)
cqi_corpus = cqi_client.corpora.get(corpus)
##########################################################################
# A faster way to get cpos boundaries for smaller s_attrs #
##########################################################################
# cqi_corpus.query('Last', '<s> []* </s>;')
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
# print(cqi_subcorpus.size)
# first_match = 0
# last_match = cqi_subcorpus.attrs['size'] - 1
# match_boundaries = zip(
# list(range(first_match, last_match + 1)),
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
# )
# for x in match_boundaries:
# print(x)
cqi_p_attrs = {
p_attr.name: p_attr
for p_attr in cqi_corpus.positional_attributes.list()
}
cqi_s_attrs = {
s_attr.name: s_attr
for s_attr in cqi_corpus.structural_attributes.list()
}
static_corpus_data = {
'corpus': {
'bounds': [0, cqi_corpus.size - 1],
'counts': {
'token': cqi_corpus.size
},
'freqs': {}
},
'p_attrs': {},
's_attrs': {},
'values': {'p_attrs': {}, 's_attrs': {}}
}
for p_attr in cqi_p_attrs.values():
static_corpus_data['corpus']['freqs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
static_corpus_data['p_attrs'][p_attr.name] = dict(
zip(
range(0, cqi_corpus.size),
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
)
)
static_corpus_data['values']['p_attrs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
for s_attr in cqi_s_attrs.values():
if s_attr.has_values:
continue
static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size
static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
static_corpus_data['values']['s_attrs'][s_attr.name] = {}
for id in range(0, s_attr.size):
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
lbound, rbound = s_attr.cpos_by_id(id)
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
if s_attr.name not in ['text', 's']:
continue
cpos_range = range(lbound, rbound + 1)
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
if s_attr.name != 'text':
continue
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
for p_attr in cqi_p_attrs.values():
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
s_attr_value_names = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
sub_s_attr_values = [
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
for sub_s_attr in sub_s_attrs
]
static_corpus_data['s_attrs'][s_attr.name]['values'] = s_attr_value_names
static_corpus_data['values']['s_attrs'][s_attr.name] = {
s_attr_id: {
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
for s_attr_value_name_idx, s_attr_value_name in enumerate(
static_corpus_data['s_attrs'][s_attr.name]['values']
)
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
}
with open(static_corpus_data_file, 'w') as f:
json.dump(static_corpus_data, f)
return static_corpus_data
def ext_corpus_paginate_corpus(
cqi_client: CQiClient,
corpus: str,
page: int = 1,
per_page: int = 20
) -> Dict:
cqi_corpus = cqi_client.corpora.get(corpus)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_corpus.size > 0
and page > math.ceil(cqi_corpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
# the lookups for the items
payload['lookups'] = lookups
# the total number of items matching the query
payload['total'] = cqi_corpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return payload
def ext_cqp_paginate_subcorpus(
cqi_client: CQiClient,
subcorpus: str,
context: int = 50,
page: int = 1,
per_page: int = 20
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_subcorpus.size > 0
and page > math.ceil(cqi_subcorpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
payload['items'] = cqi_results_export.pop('matches')
# the lookups for the items
payload['lookups'] = cqi_results_export
# the total number of items matching the query
payload['total'] = cqi_subcorpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return payload
def ext_cqp_partial_export_subcorpus(
cqi_client: CQiClient,
subcorpus: str,
match_id_list: list,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return cqi_subcorpus_partial_export
def ext_cqp_export_subcorpus(
cqi_client: CQiClient,
subcorpus: str,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
return cqi_subcorpus_export

View File

@ -1,64 +1,10 @@
from flask import session
from functools import wraps
from inspect import signature
import cqi
from cqi.models.corpora import Corpus
from cqi.models.subcorpora import Subcorpus
from typing import Dict, List
from app.models import Corpus
def cqi_over_socketio(f):
@wraps(f)
def wrapped(*args):
if 'd' not in session:
return {'code': 424, 'msg': 'Failed Dependency'}
f_args = {}
# Check for missing args and if all provided args are of the right type
for param in signature(f).parameters.values():
if param.name == 'corpus_name':
f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}'
continue
if param.name == 'cqi_client':
f_args[param.name] = session['d']['cqi_client']
continue
if param.default is param.empty:
# args
if param.name not in args[0]:
return {'code': 400, 'msg': 'Bad Request'}
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
else:
# kwargs
if param.name not in args[0]:
continue
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
session['d']['cqi_client_lock'].acquire()
try:
return_value = f(**f_args)
except BrokenPipeError:
return_value = {
'code': 500,
'msg': 'Internal Server Error'
}
except cqi.errors.CQiException as e:
return_value = {
'code': 500,
'msg': 'Internal Server Error',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.name
}
}
finally:
session['d']['cqi_client_lock'].release()
return return_value
return wrapped
def lookups_by_cpos(corpus, cpos_list):
def lookups_by_cpos(corpus: Corpus, cpos_list: List[int]) -> Dict:
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
@ -93,18 +39,22 @@ def lookups_by_cpos(corpus, cpos_list):
return lookups
def partial_export_subcorpus(subcorpus, match_id_list, context=25):
if subcorpus.attrs['size'] == 0:
def partial_export_subcorpus(
subcorpus: Subcorpus,
match_id_list: List[int],
context: int = 25
) -> Dict:
if subcorpus.size == 0:
return {"matches": []}
match_boundaries = []
for match_id in match_id_list:
if match_id < 0 or match_id >= subcorpus.attrs['size']:
if match_id < 0 or match_id >= subcorpus.size:
continue
match_boundaries.append(
(
match_id,
subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0]
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
)
)
cpos_set = set()
@ -120,14 +70,14 @@ def partial_export_subcorpus(subcorpus, match_id_list, context=25):
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.attrs['size'] - 1)
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
@ -138,15 +88,20 @@ def partial_export_subcorpus(subcorpus, match_id_list, context=25):
return {'matches': matches, **lookups}
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
if subcorpus.attrs['size'] == 0:
def export_subcorpus(
subcorpus: Subcorpus,
context: int = 25,
cutoff: float = float('inf'),
offset: int = 0
) -> Dict:
if subcorpus.size == 0:
return {"matches": []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
match_boundaries = zip(
list(range(first_match, last_match + 1)),
subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match),
subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match)
range(first_match, last_match + 1),
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
@ -160,14 +115,14 @@ def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.attrs['size'] - 1)
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound

View File

@ -1,43 +0,0 @@
from socket import gaierror
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.connect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_connect(cqi_client: cqi.CQiClient):
try:
cqi_status = cqi_client.connect()
except gaierror as e:
return {
'code': 500,
'msg': 'Internal Server Error',
'payload': {'code': e.args[0], 'desc': e.args[1]}
}
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.disconnect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_disconnect(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.disconnect()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.ping', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_ping(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.ping()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,22 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = {**cqi_corpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_list(cqi_client: cqi.CQiClient):
payload = [{**x.attrs} for x in cqi_client.corpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,199 +0,0 @@
from collections import Counter
from flask import session
import cqi
import json
import math
import os
from app import db, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, lookups_by_cpos
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.drop()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.query(subcorpus_name, query)
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
cqi_corpus = cqi_client.corpora.get(corpus_name)
corpus.num_tokens = cqi_corpus.size
db.session.commit()
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
if os.path.exists(visualization_data_file_path):
with open(visualization_data_file_path, 'r') as f:
payload = json.load(f)
return {'code': 200, 'msg': 'OK', 'payload': payload}
cqi_corpus = cqi_client.corpora.get(corpus_name)
##########################################################################
# A faster way to get cpos boundaries for smaller s_attrs #
##########################################################################
# cqi_corpus.query('Last', '<s> []* </s>;')
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
# print(cqi_subcorpus.size)
# first_match = 0
# last_match = cqi_subcorpus.attrs['size'] - 1
# match_boundaries = zip(
# list(range(first_match, last_match + 1)),
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
# )
# for x in match_boundaries:
# print(x)
cqi_p_attrs = {
p_attr.name: p_attr
for p_attr in cqi_corpus.positional_attributes.list()
}
cqi_s_attrs = {
s_attr.name: s_attr
for s_attr in cqi_corpus.structural_attributes.list()
}
payload = {
'corpus': {
'bounds': [0, cqi_corpus.size - 1],
'counts': {
'token': cqi_corpus.size
},
'freqs': {}
},
'p_attrs': {},
's_attrs': {},
'values': {'p_attrs': {}, 's_attrs': {}}
}
for p_attr in cqi_p_attrs.values():
payload['corpus']['freqs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
payload['p_attrs'][p_attr.name] = dict(
zip(
range(0, cqi_corpus.size),
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
)
)
payload['values']['p_attrs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
for s_attr in cqi_s_attrs.values():
if s_attr.has_values:
continue
payload['corpus']['counts'][s_attr.name] = s_attr.size
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
payload['values']['s_attrs'][s_attr.name] = {}
for id in range(0, s_attr.size):
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
lbound, rbound = s_attr.cpos_by_id(id)
payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
if s_attr.name not in ['text', 's']:
continue
cpos_range = range(lbound, rbound + 1)
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
if s_attr.name != 'text':
continue
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
for p_attr in cqi_p_attrs.values():
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
s_attr_value_names = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
sub_s_attr_values = [
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
for sub_s_attr in sub_s_attrs
]
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
payload['values']['s_attrs'][s_attr.name] = {
s_attr_id: {
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
for s_attr_value_name_idx, s_attr_value_name in enumerate(
payload['s_attrs'][s_attr.name]['values']
)
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
}
with open(visualization_data_file_path, 'w') as f:
json.dump(payload, f)
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_corpus.size > 0
and page > math.ceil(cqi_corpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
# the lookups for the items
payload['lookups'] = lookups
# the total number of items matching the query
payload['total'] = cqi_corpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
payload = {**cqi_alignment_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
payload = {**cqi_positional_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
payload = {**cqi_structural_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,125 +0,0 @@
import cqi
import math
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
payload = {**cqi_subcorpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_status = cqi_subcorpus.drop()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_subcorpus.attrs['size'] > 0
and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
payload['items'] = cqi_results_export.pop('matches')
# the lookups for the items
payload['lookups'] = cqi_results_export
# the total number of items matching the query
payload['total'] = cqi_subcorpus.attrs['size']
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}