mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-12-25 02:44:18 +00:00
Replace the old js CQiClient with fully featured new one
This commit is contained in:
parent
07103ee4e5
commit
315b538c30
@ -8,6 +8,14 @@ from typing import Callable, Dict, List
|
|||||||
from app import socketio
|
from app import socketio
|
||||||
from app.decorators import socketio_login_required
|
from app.decorators import socketio_login_required
|
||||||
from . import NAMESPACE as ns
|
from . import NAMESPACE as ns
|
||||||
|
from .extensions import (
|
||||||
|
corpus_update_db,
|
||||||
|
corpus_static_data,
|
||||||
|
corpus_paginate_corpus,
|
||||||
|
cqp_paginate_subcorpus,
|
||||||
|
cqp_partial_export_subcorpus,
|
||||||
|
cqp_export_subcorpus,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
CQI_API_FUNCTIONS: List[str] = [
|
CQI_API_FUNCTIONS: List[str] = [
|
||||||
@ -56,15 +64,21 @@ CQI_API_FUNCTIONS: List[str] = [
|
|||||||
'ctrl_ping',
|
'ctrl_ping',
|
||||||
'ctrl_user_abort'
|
'ctrl_user_abort'
|
||||||
]
|
]
|
||||||
|
CQI_NOPAQUE_FUNCTIONS: Dict[str, Callable] = {
|
||||||
|
'nopaque_corpus_update_db': corpus_update_db,
|
||||||
|
'nopaque_corpus_static_data': corpus_static_data,
|
||||||
|
'nopaque_corpus_paginate_corpus': corpus_paginate_corpus,
|
||||||
|
'nopaque_cqp_paginate_subcorpus': cqp_paginate_subcorpus,
|
||||||
|
'nopaque_cqp_partial_export_subcorpus': cqp_partial_export_subcorpus,
|
||||||
|
'nopaque_cqp_export_subcorpus': cqp_export_subcorpus,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi_client.api', namespace=ns)
|
@socketio.on('cqi', namespace=ns)
|
||||||
@socketio_login_required
|
@socketio_login_required
|
||||||
def cqi_over_sio(fn_data):
|
def cqi_over_sio(fn_data):
|
||||||
try:
|
try:
|
||||||
fn_name: str = fn_data['fn_name']
|
fn_name: str = fn_data['fn_name']
|
||||||
if fn_name not in CQI_API_FUNCTIONS:
|
|
||||||
raise KeyError
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
return {'code': 400, 'msg': 'Bad Request'}
|
||||||
fn_name: str = fn_data['fn_name']
|
fn_name: str = fn_data['fn_name']
|
||||||
@ -74,7 +88,13 @@ def cqi_over_sio(fn_data):
|
|||||||
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||||
|
if fn_name in CQI_API_FUNCTIONS:
|
||||||
fn: Callable = getattr(cqi_client.api, fn_name)
|
fn: Callable = getattr(cqi_client.api, fn_name)
|
||||||
|
elif fn_name in CQI_NOPAQUE_FUNCTIONS:
|
||||||
|
fn_args['cqi_client'] = cqi_client
|
||||||
|
fn: Callable = CQI_NOPAQUE_FUNCTIONS[fn_name]
|
||||||
|
else:
|
||||||
|
return {'code': 400, 'msg': 'Bad Request'}
|
||||||
for param in signature(fn).parameters.values():
|
for param in signature(fn).parameters.values():
|
||||||
if param.default is param.empty:
|
if param.default is param.empty:
|
||||||
if param.name not in fn_args:
|
if param.name not in fn_args:
|
||||||
|
243
app/corpora/cqi_over_sio/extensions/__init__.py
Normal file
243
app/corpora/cqi_over_sio/extensions/__init__.py
Normal file
@ -0,0 +1,243 @@
|
|||||||
|
from collections import Counter
|
||||||
|
from cqi import CQiClient
|
||||||
|
from cqi.status import StatusOk
|
||||||
|
from flask import session
|
||||||
|
from typing import Dict
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
from app import db
|
||||||
|
from app.models import Corpus
|
||||||
|
from .utils import lookups_by_cpos, export_subcorpus, partial_export_subcorpus
|
||||||
|
|
||||||
|
|
||||||
|
def corpus_update_db(cqi_client: CQiClient, corpus: str):
|
||||||
|
db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
|
||||||
|
cqi_corpus = cqi_client.corpora.get(corpus)
|
||||||
|
db_corpus.num_tokens = cqi_corpus.size
|
||||||
|
db.session.commit()
|
||||||
|
return StatusOk()
|
||||||
|
|
||||||
|
|
||||||
|
def corpus_static_data(cqi_client: CQiClient, corpus: str) -> Dict:
|
||||||
|
db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
|
||||||
|
static_corpus_data_file = os.path.join(db_corpus.path, 'cwb', 'static.json')
|
||||||
|
if os.path.exists(static_corpus_data_file):
|
||||||
|
with open(static_corpus_data_file, 'r') as f:
|
||||||
|
return json.load(f)
|
||||||
|
cqi_corpus = cqi_client.corpora.get(corpus)
|
||||||
|
##########################################################################
|
||||||
|
# A faster way to get cpos boundaries for smaller s_attrs #
|
||||||
|
##########################################################################
|
||||||
|
# cqi_corpus.query('Last', '<s> []* </s>;')
|
||||||
|
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
|
||||||
|
# print(cqi_subcorpus.size)
|
||||||
|
# first_match = 0
|
||||||
|
# last_match = cqi_subcorpus.attrs['size'] - 1
|
||||||
|
# match_boundaries = zip(
|
||||||
|
# list(range(first_match, last_match + 1)),
|
||||||
|
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
|
||||||
|
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
||||||
|
# )
|
||||||
|
# for x in match_boundaries:
|
||||||
|
# print(x)
|
||||||
|
cqi_p_attrs = {
|
||||||
|
p_attr.name: p_attr
|
||||||
|
for p_attr in cqi_corpus.positional_attributes.list()
|
||||||
|
}
|
||||||
|
cqi_s_attrs = {
|
||||||
|
s_attr.name: s_attr
|
||||||
|
for s_attr in cqi_corpus.structural_attributes.list()
|
||||||
|
}
|
||||||
|
static_corpus_data = {
|
||||||
|
'corpus': {
|
||||||
|
'bounds': [0, cqi_corpus.size - 1],
|
||||||
|
'counts': {
|
||||||
|
'token': cqi_corpus.size
|
||||||
|
},
|
||||||
|
'freqs': {}
|
||||||
|
},
|
||||||
|
'p_attrs': {},
|
||||||
|
's_attrs': {},
|
||||||
|
'values': {'p_attrs': {}, 's_attrs': {}}
|
||||||
|
}
|
||||||
|
for p_attr in cqi_p_attrs.values():
|
||||||
|
static_corpus_data['corpus']['freqs'][p_attr.name] = dict(
|
||||||
|
zip(
|
||||||
|
range(0, p_attr.lexicon_size),
|
||||||
|
p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
static_corpus_data['p_attrs'][p_attr.name] = dict(
|
||||||
|
zip(
|
||||||
|
range(0, cqi_corpus.size),
|
||||||
|
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
static_corpus_data['values']['p_attrs'][p_attr.name] = dict(
|
||||||
|
zip(
|
||||||
|
range(0, p_attr.lexicon_size),
|
||||||
|
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for s_attr in cqi_s_attrs.values():
|
||||||
|
if s_attr.has_values:
|
||||||
|
continue
|
||||||
|
static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
|
||||||
|
static_corpus_data['values']['s_attrs'][s_attr.name] = {}
|
||||||
|
for id in range(0, s_attr.size):
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
|
||||||
|
lbound, rbound = s_attr.cpos_by_id(id)
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
|
||||||
|
if s_attr.name not in ['text', 's']:
|
||||||
|
continue
|
||||||
|
cpos_range = range(lbound, rbound + 1)
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
|
||||||
|
if s_attr.name != 'text':
|
||||||
|
continue
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
||||||
|
for p_attr in cqi_p_attrs.values():
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
|
||||||
|
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
||||||
|
s_attr_value_names = [
|
||||||
|
sub_s_attr.name[(len(s_attr.name) + 1):]
|
||||||
|
for sub_s_attr in sub_s_attrs
|
||||||
|
]
|
||||||
|
sub_s_attr_values = [
|
||||||
|
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
|
||||||
|
for sub_s_attr in sub_s_attrs
|
||||||
|
]
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['values'] = s_attr_value_names
|
||||||
|
static_corpus_data['values']['s_attrs'][s_attr.name] = {
|
||||||
|
s_attr_id: {
|
||||||
|
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
|
||||||
|
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['values']
|
||||||
|
)
|
||||||
|
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
|
||||||
|
}
|
||||||
|
with open(static_corpus_data_file, 'w') as f:
|
||||||
|
json.dump(static_corpus_data, f)
|
||||||
|
return static_corpus_data
|
||||||
|
|
||||||
|
|
||||||
|
def corpus_paginate_corpus(
|
||||||
|
cqi_client: CQiClient,
|
||||||
|
corpus: str,
|
||||||
|
page: int = 1,
|
||||||
|
per_page: int = 20
|
||||||
|
) -> Dict:
|
||||||
|
cqi_corpus = cqi_client.corpora.get(corpus)
|
||||||
|
# Sanity checks
|
||||||
|
if (
|
||||||
|
per_page < 1
|
||||||
|
or page < 1
|
||||||
|
or (
|
||||||
|
cqi_corpus.size > 0
|
||||||
|
and page > math.ceil(cqi_corpus.size / per_page)
|
||||||
|
)
|
||||||
|
):
|
||||||
|
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||||
|
first_cpos = (page - 1) * per_page
|
||||||
|
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
|
||||||
|
cpos_list = [*range(first_cpos, last_cpos)]
|
||||||
|
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
|
||||||
|
payload = {}
|
||||||
|
# the items for the current page
|
||||||
|
payload['items'] = [cpos_list]
|
||||||
|
# the lookups for the items
|
||||||
|
payload['lookups'] = lookups
|
||||||
|
# the total number of items matching the query
|
||||||
|
payload['total'] = cqi_corpus.size
|
||||||
|
# the number of items to be displayed on a page.
|
||||||
|
payload['per_page'] = per_page
|
||||||
|
# The total number of pages
|
||||||
|
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||||
|
# the current page number (1 indexed)
|
||||||
|
payload['page'] = page if payload['pages'] > 0 else None
|
||||||
|
# True if a previous page exists
|
||||||
|
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||||
|
# True if a next page exists.
|
||||||
|
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||||
|
# Number of the previous page.
|
||||||
|
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||||
|
# Number of the next page
|
||||||
|
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||||
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
|
|
||||||
|
def cqp_paginate_subcorpus(
|
||||||
|
cqi_client: CQiClient,
|
||||||
|
subcorpus: str,
|
||||||
|
context: int = 50,
|
||||||
|
page: int = 1,
|
||||||
|
per_page: int = 20
|
||||||
|
) -> Dict:
|
||||||
|
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||||
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
|
# Sanity checks
|
||||||
|
if (
|
||||||
|
per_page < 1
|
||||||
|
or page < 1
|
||||||
|
or (
|
||||||
|
cqi_subcorpus.size > 0
|
||||||
|
and page > math.ceil(cqi_subcorpus.size / per_page)
|
||||||
|
)
|
||||||
|
):
|
||||||
|
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||||
|
offset = (page - 1) * per_page
|
||||||
|
cutoff = per_page
|
||||||
|
cqi_results_export = export_subcorpus(
|
||||||
|
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
||||||
|
payload = {}
|
||||||
|
# the items for the current page
|
||||||
|
payload['items'] = cqi_results_export.pop('matches')
|
||||||
|
# the lookups for the items
|
||||||
|
payload['lookups'] = cqi_results_export
|
||||||
|
# the total number of items matching the query
|
||||||
|
payload['total'] = cqi_subcorpus.size
|
||||||
|
# the number of items to be displayed on a page.
|
||||||
|
payload['per_page'] = per_page
|
||||||
|
# The total number of pages
|
||||||
|
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||||
|
# the current page number (1 indexed)
|
||||||
|
payload['page'] = page if payload['pages'] > 0 else None
|
||||||
|
# True if a previous page exists
|
||||||
|
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||||
|
# True if a next page exists.
|
||||||
|
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||||
|
# Number of the previous page.
|
||||||
|
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||||
|
# Number of the next page
|
||||||
|
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||||
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
|
|
||||||
|
def cqp_partial_export_subcorpus(
|
||||||
|
cqi_client: CQiClient,
|
||||||
|
subcorpus: str,
|
||||||
|
match_id_list: list,
|
||||||
|
context: int = 50
|
||||||
|
) -> Dict:
|
||||||
|
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||||
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
|
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
||||||
|
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
|
||||||
|
|
||||||
|
|
||||||
|
def cqp_export_subcorpus(
|
||||||
|
cqi_client: CQiClient,
|
||||||
|
subcorpus: str,
|
||||||
|
context: int = 50
|
||||||
|
) -> Dict:
|
||||||
|
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||||
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
|
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
||||||
|
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}
|
132
app/corpora/cqi_over_sio/extensions/utils.py
Normal file
132
app/corpora/cqi_over_sio/extensions/utils.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
from typing import Dict, List
|
||||||
|
from cqi.models.corpora import Corpus
|
||||||
|
from cqi.models.subcorpora import Subcorpus
|
||||||
|
|
||||||
|
|
||||||
|
def lookups_by_cpos(corpus: Corpus, cpos_list: List[int]) -> Dict:
|
||||||
|
lookups = {}
|
||||||
|
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
||||||
|
for attr in corpus.positional_attributes.list():
|
||||||
|
cpos_attr_values = attr.values_by_cpos(cpos_list)
|
||||||
|
for i, cpos in enumerate(cpos_list):
|
||||||
|
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
|
||||||
|
cpos_attr_values[i]
|
||||||
|
for attr in corpus.structural_attributes.list():
|
||||||
|
# We only want to iterate over non subattributes, identifiable by
|
||||||
|
# attr.attrs['has_values'] == False
|
||||||
|
if attr.attrs['has_values']:
|
||||||
|
continue
|
||||||
|
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
|
||||||
|
for i, cpos in enumerate(cpos_list):
|
||||||
|
if cpos_attr_ids[i] == -1:
|
||||||
|
continue
|
||||||
|
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
|
||||||
|
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
||||||
|
if not occured_attr_ids:
|
||||||
|
continue
|
||||||
|
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
||||||
|
if not subattrs:
|
||||||
|
continue
|
||||||
|
lookup_name = f'{attr.attrs["name"]}_lookup'
|
||||||
|
lookups[lookup_name] = {}
|
||||||
|
for attr_id in occured_attr_ids:
|
||||||
|
lookups[lookup_name][attr_id] = {}
|
||||||
|
for subattr in subattrs:
|
||||||
|
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
|
||||||
|
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
||||||
|
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
||||||
|
return lookups
|
||||||
|
|
||||||
|
|
||||||
|
def partial_export_subcorpus(
|
||||||
|
subcorpus: Subcorpus,
|
||||||
|
match_id_list: List[int],
|
||||||
|
context: int = 25
|
||||||
|
) -> Dict:
|
||||||
|
if subcorpus.size == 0:
|
||||||
|
return {"matches": []}
|
||||||
|
match_boundaries = []
|
||||||
|
for match_id in match_id_list:
|
||||||
|
if match_id < 0 or match_id >= subcorpus.size:
|
||||||
|
continue
|
||||||
|
match_boundaries.append(
|
||||||
|
(
|
||||||
|
match_id,
|
||||||
|
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
|
||||||
|
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
cpos_set = set()
|
||||||
|
matches = []
|
||||||
|
for match_boundary in match_boundaries:
|
||||||
|
match_num, match_start, match_end = match_boundary
|
||||||
|
c = (match_start, match_end)
|
||||||
|
if match_start == 0 or context == 0:
|
||||||
|
lc = None
|
||||||
|
cpos_list_lbound = match_start
|
||||||
|
else:
|
||||||
|
lc_lbound = max(0, (match_start - context))
|
||||||
|
lc_rbound = match_start - 1
|
||||||
|
lc = (lc_lbound, lc_rbound)
|
||||||
|
cpos_list_lbound = lc_lbound
|
||||||
|
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||||
|
rc = None
|
||||||
|
cpos_list_rbound = match_end
|
||||||
|
else:
|
||||||
|
rc_lbound = match_end + 1
|
||||||
|
rc_rbound = min(
|
||||||
|
(match_end + context),
|
||||||
|
(subcorpus.collection.corpus.size - 1)
|
||||||
|
)
|
||||||
|
rc = (rc_lbound, rc_rbound)
|
||||||
|
cpos_list_rbound = rc_rbound
|
||||||
|
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||||
|
matches.append(match)
|
||||||
|
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||||
|
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||||
|
return {'matches': matches, **lookups}
|
||||||
|
|
||||||
|
|
||||||
|
def export_subcorpus(
|
||||||
|
subcorpus: Subcorpus,
|
||||||
|
context: int = 25,
|
||||||
|
cutoff: float = float('inf'),
|
||||||
|
offset: int = 0
|
||||||
|
) -> Dict:
|
||||||
|
if subcorpus.size == 0:
|
||||||
|
return {"matches": []}
|
||||||
|
first_match = max(0, offset)
|
||||||
|
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
|
||||||
|
match_boundaries = zip(
|
||||||
|
range(first_match, last_match + 1),
|
||||||
|
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
|
||||||
|
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
|
||||||
|
)
|
||||||
|
cpos_set = set()
|
||||||
|
matches = []
|
||||||
|
for match_num, match_start, match_end in match_boundaries:
|
||||||
|
c = (match_start, match_end)
|
||||||
|
if match_start == 0 or context == 0:
|
||||||
|
lc = None
|
||||||
|
cpos_list_lbound = match_start
|
||||||
|
else:
|
||||||
|
lc_lbound = max(0, (match_start - context))
|
||||||
|
lc_rbound = match_start - 1
|
||||||
|
lc = (lc_lbound, lc_rbound)
|
||||||
|
cpos_list_lbound = lc_lbound
|
||||||
|
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||||
|
rc = None
|
||||||
|
cpos_list_rbound = match_end
|
||||||
|
else:
|
||||||
|
rc_lbound = match_end + 1
|
||||||
|
rc_rbound = min(
|
||||||
|
(match_end + context),
|
||||||
|
(subcorpus.collection.corpus.size - 1)
|
||||||
|
)
|
||||||
|
rc = (rc_lbound, rc_rbound)
|
||||||
|
cpos_list_rbound = rc_rbound
|
||||||
|
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||||
|
matches.append(match)
|
||||||
|
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||||
|
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||||
|
return {'matches': matches, **lookups}
|
File diff suppressed because it is too large
Load Diff
@ -26,25 +26,19 @@ class CorpusAnalysisApp {
|
|||||||
this.disableActionElements();
|
this.disableActionElements();
|
||||||
this.elements.m.initModal.open();
|
this.elements.m.initModal.open();
|
||||||
// Init data
|
// Init data
|
||||||
this.data.cQiClient = new CQiClient(this.settings.corpusId);
|
this.data.cqiClient = new cqi.CQiClient('/cqi_over_sio', this.settings.corpusId);
|
||||||
this.data.cQiClient.connect()
|
this.data.cqiClient.connect('anonymous', '')
|
||||||
.then(cQiStatus => {
|
.then((cqiStatus) => {
|
||||||
return this.data.cQiClient.corpora.get(`NOPAQUE_${this.settings.corpusId}`);
|
return this.data.cqiClient.corpora.list();
|
||||||
})
|
})
|
||||||
.then(
|
.then((cqiCorpora) => {
|
||||||
cQiCorpus => {
|
this.data.corpus = {o: cqiCorpora[0]};
|
||||||
this.data.corpus = {o: cQiCorpus};
|
console.log(this.data.corpus.o.staticData);
|
||||||
this.data.corpus.o.getVisualizationData()
|
this.renderGeneralCorpusInfo(this.data.corpus.o.staticData);
|
||||||
.then(
|
this.renderTextInfoList(this.data.corpus.o.staticData);
|
||||||
(data) => {
|
this.renderTextProportionsGraphic(this.data.corpus.o.staticData);
|
||||||
console.log(data);
|
this.renderFrequenciesGraphic(this.data.corpus.o.staticData);
|
||||||
this.renderGeneralCorpusInfo(data);
|
this.renderBoundsGraphic(this.data.corpus.o.staticData);
|
||||||
this.renderTextInfoList(data);
|
|
||||||
this.renderTextProportionsGraphic(data);
|
|
||||||
this.renderFrequenciesGraphic(data);
|
|
||||||
this.renderBoundsGraphic(data);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
// this.data.corpus.o.getCorpusData()
|
// this.data.corpus.o.getCorpusData()
|
||||||
// .then(corpusData => {
|
// .then(corpusData => {
|
||||||
// console.log(corpusData);
|
// console.log(corpusData);
|
||||||
@ -55,19 +49,20 @@ class CorpusAnalysisApp {
|
|||||||
// this.renderBoundsGraphic(corpusData);
|
// this.renderBoundsGraphic(corpusData);
|
||||||
// });
|
// });
|
||||||
// TODO: Don't do this hgere
|
// TODO: Don't do this hgere
|
||||||
cQiCorpus.updateDb();
|
this.data.corpus.o.updateDb();
|
||||||
this.enableActionElements();
|
this.enableActionElements();
|
||||||
for (let extension of Object.values(this.extensions)) {extension.init();}
|
for (let extension of Object.values(this.extensions)) {extension.init();}
|
||||||
this.elements.m.initModal.close();
|
this.elements.m.initModal.close();
|
||||||
},
|
},
|
||||||
cQiError => {
|
(cqiError) => {
|
||||||
|
// TODO: CHECK THIS!
|
||||||
let errorsElement = this.elements.initModal.querySelector('.errors');
|
let errorsElement = this.elements.initModal.querySelector('.errors');
|
||||||
let progressElement = this.elements.initModal.querySelector('.progress');
|
let progressElement = this.elements.initModal.querySelector('.progress');
|
||||||
errorsElement.innerText = JSON.stringify(cQiError);
|
errorsElement.innerText = JSON.stringify(cqiError);
|
||||||
errorsElement.classList.remove('hide');
|
errorsElement.classList.remove('hide');
|
||||||
progressElement.classList.add('hide');
|
progressElement.classList.add('hide');
|
||||||
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
|
if ('payload' in cqiError && 'code' in cqiError.payload && 'msg' in cqiError.payload) {
|
||||||
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
|
app.flash(`${cqiError.payload.code}: ${cqiError.payload.msg}`, 'error');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
@ -45,18 +45,18 @@ class CorpusAnalysisConcordance {
|
|||||||
this.elements.progress.classList.remove('hide');
|
this.elements.progress.classList.remove('hide');
|
||||||
let subcorpus = {};
|
let subcorpus = {};
|
||||||
this.data.corpus.o.query(subcorpusName, query)
|
this.data.corpus.o.query(subcorpusName, query)
|
||||||
.then(cQiStatus => {
|
.then((cqiStatus) => {
|
||||||
subcorpus.q = query;
|
subcorpus.q = query;
|
||||||
subcorpus.selectedItems = new Set();
|
subcorpus.selectedItems = new Set();
|
||||||
if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;}
|
if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;}
|
||||||
return this.data.corpus.o.subcorpora.get(subcorpusName);
|
return this.data.corpus.o.subcorpora.get(subcorpusName);
|
||||||
})
|
})
|
||||||
.then(cQiSubcorpus => {
|
.then((cqiSubcorpus) => {
|
||||||
subcorpus.o = cQiSubcorpus;
|
subcorpus.o = cqiSubcorpus;
|
||||||
return cQiSubcorpus.paginate(1, this.settings.perPage, this.settings.context);
|
return cqiSubcorpus.paginate(this.settings.context, 1, this.settings.perPage);
|
||||||
})
|
})
|
||||||
.then(
|
.then(
|
||||||
paginatedSubcorpus => {
|
(paginatedSubcorpus) => {
|
||||||
subcorpus.p = paginatedSubcorpus;
|
subcorpus.p = paginatedSubcorpus;
|
||||||
this.data.subcorpora[subcorpusName] = subcorpus;
|
this.data.subcorpora[subcorpusName] = subcorpus;
|
||||||
this.settings.selectedSubcorpus = subcorpusName;
|
this.settings.selectedSubcorpus = subcorpusName;
|
||||||
@ -68,11 +68,12 @@ class CorpusAnalysisConcordance {
|
|||||||
this.elements.progress.classList.add('hide');
|
this.elements.progress.classList.add('hide');
|
||||||
this.app.enableActionElements();
|
this.app.enableActionElements();
|
||||||
},
|
},
|
||||||
cQiError => {
|
(cqiStatus) => {
|
||||||
this.elements.error.innerText = JSON.stringify(cQiError);
|
// TODDO: CHECK THIS!
|
||||||
|
this.elements.error.innerText = JSON.stringify(cqiStatus);
|
||||||
this.elements.error.classList.remove('hide');
|
this.elements.error.classList.remove('hide');
|
||||||
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
|
if ('payload' in cqiStatus && 'code' in cqiStatus.payload && 'msg' in cqiStatus.payload) {
|
||||||
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
|
app.flash(`${cqiStatus.payload.code}: ${cqiStatus.payload.msg}`, 'error');
|
||||||
}
|
}
|
||||||
this.elements.progress.classList.add('hide');
|
this.elements.progress.classList.add('hide');
|
||||||
this.app.enableActionElements();
|
this.app.enableActionElements();
|
||||||
|
@ -28,7 +28,7 @@ class CorpusAnalysisReader {
|
|||||||
init() {
|
init() {
|
||||||
// Init data
|
// Init data
|
||||||
this.data.corpus = this.app.data.corpus;
|
this.data.corpus = this.app.data.corpus;
|
||||||
this.data.subcorpora = {};
|
this.data.subcorpora = {}; // TODO: DO WE NEED THIS HERE?
|
||||||
// Add event listeners
|
// Add event listeners
|
||||||
this.elements.form.addEventListener('submit', (event) => {
|
this.elements.form.addEventListener('submit', (event) => {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
@ -38,14 +38,15 @@ class CorpusAnalysisReader {
|
|||||||
this.elements.progress.classList.remove('hide');
|
this.elements.progress.classList.remove('hide');
|
||||||
this.data.corpus.o.paginate(1, this.settings.perPage)
|
this.data.corpus.o.paginate(1, this.settings.perPage)
|
||||||
.then(
|
.then(
|
||||||
paginatedCorpus => {
|
(paginatedCorpus) => {
|
||||||
this.data.corpus.p = paginatedCorpus;
|
this.data.corpus.p = paginatedCorpus;
|
||||||
this.renderCorpus();
|
this.renderCorpus();
|
||||||
this.renderCorpusPagination();
|
this.renderCorpusPagination();
|
||||||
this.elements.progress.classList.add('hide');
|
this.elements.progress.classList.add('hide');
|
||||||
this.app.enableActionElements();
|
this.app.enableActionElements();
|
||||||
},
|
},
|
||||||
error => {
|
(cqiError) => {
|
||||||
|
// TODO: CHECK THIS!
|
||||||
this.elements.error.innerText = JSON.stringify(error);
|
this.elements.error.innerText = JSON.stringify(error);
|
||||||
this.elements.error.classList.remove('hide');
|
this.elements.error.classList.remove('hide');
|
||||||
if ('payload' in error && 'code' in error.payload && 'msg' in error.payload) {
|
if ('payload' in error && 'code' in error.payload && 'msg' in error.payload) {
|
||||||
@ -247,7 +248,7 @@ class CorpusAnalysisReader {
|
|||||||
this.elements.progress.classList.remove('hide');
|
this.elements.progress.classList.remove('hide');
|
||||||
this.data.corpus.o.paginate(pageNum, this.settings.perPage)
|
this.data.corpus.o.paginate(pageNum, this.settings.perPage)
|
||||||
.then(
|
.then(
|
||||||
paginatedCorpus => {
|
(paginatedCorpus) => {
|
||||||
this.data.corpus.p = paginatedCorpus;
|
this.data.corpus.p = paginatedCorpus;
|
||||||
this.renderCorpus();
|
this.renderCorpus();
|
||||||
this.renderCorpusPagination();
|
this.renderCorpusPagination();
|
||||||
|
@ -1,6 +1,13 @@
|
|||||||
cqi.api.APIClient = class APIClient {
|
cqi.api.APIClient = class APIClient {
|
||||||
constructor(host, corpus_id, version = '0.1') {
|
/**
|
||||||
|
* @param {string} host
|
||||||
|
* @param {string} corpusId
|
||||||
|
* @param {number} [timeout=Infinity] timeout
|
||||||
|
* @param {string} [version=0.1] version
|
||||||
|
*/
|
||||||
|
constructor(host, corpus_id, timeout = Infinity, version = '0.1') {
|
||||||
this.host = host;
|
this.host = host;
|
||||||
|
this.timeout = timeout;
|
||||||
this.version = version;
|
this.version = version;
|
||||||
this.socket = io(
|
this.socket = io(
|
||||||
this.host,
|
this.host,
|
||||||
@ -19,7 +26,8 @@ cqi.api.APIClient = class APIClient {
|
|||||||
*/
|
*/
|
||||||
#request(fn_name, fn_args = {}) {
|
#request(fn_name, fn_args = {}) {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
this.socket.emit('cqi_client.api', {fn_name: fn_name, fn_args: fn_args}, (response) => {
|
// TODO: implement this.socket.timeout(this.timeout).emit(...)
|
||||||
|
this.socket.emit('cqi', {fn_name: fn_name, fn_args: fn_args}, (response) => {
|
||||||
if (response.code === 200) {
|
if (response.code === 200) {
|
||||||
resolve(response.payload);
|
resolve(response.payload);
|
||||||
}
|
}
|
||||||
@ -595,4 +603,84 @@ cqi.api.APIClient = class APIClient {
|
|||||||
const fn_args = {subcorpus: subcorpus, cutoff: cutoff, field1: field1, attribute1: attribute1, field2: field2, attribute2: attribute2};
|
const fn_args = {subcorpus: subcorpus, cutoff: cutoff, field1: field1, attribute1: attribute1, field2: field2, attribute2: attribute2};
|
||||||
return await this.#request(fn_name, fn_args);
|
return await this.#request(fn_name, fn_args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NOTE: The following functions are non standard implementations of nopaque
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} corpus
|
||||||
|
* @returns {Promise<cqi.status.StatusOk>}
|
||||||
|
*/
|
||||||
|
async corpus_update_db(corpus) {
|
||||||
|
const fn_name = 'nopaque_corpus_update_db';
|
||||||
|
const fn_args = {corpus: corpus};
|
||||||
|
let payload = await this.#request(fn_name, fn_args);
|
||||||
|
return new cqi.status.lookup[payload.code]();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} corpus
|
||||||
|
* @returns {Promise<object>}
|
||||||
|
*/
|
||||||
|
async corpus_static_data(corpus) {
|
||||||
|
const fn_name = 'nopaque_corpus_static_data';
|
||||||
|
const fn_args = {corpus: corpus};
|
||||||
|
return await this.#request(fn_name, fn_args);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} corpus
|
||||||
|
* @param {number=} page
|
||||||
|
* @param {number=} per_page
|
||||||
|
* @returns {Promise<object>}
|
||||||
|
*/
|
||||||
|
async corpus_paginate_corpus(corpus, page, per_page) {
|
||||||
|
const fn_name = 'nopaque_corpus_paginate_corpus';
|
||||||
|
const fn_args = {corpus: corpus}
|
||||||
|
if (page !== undefined) {fn_args.page = page;}
|
||||||
|
if (per_page !== undefined) {fn_args.per_page = per_page;}
|
||||||
|
return await this.#request(fn_name, fn_args);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} subcorpus
|
||||||
|
* @param {number=} context
|
||||||
|
* @param {number=} page
|
||||||
|
* @param {number=} per_page
|
||||||
|
* @returns {Promise<object>}
|
||||||
|
*/
|
||||||
|
async cqp_paginate_subcorpus(subcorpus, context, page, per_page) {
|
||||||
|
const fn_name = 'nopaque_cqp_paginate_subcorpus';
|
||||||
|
const fn_args = {subcorpus: subcorpus}
|
||||||
|
if (context !== undefined) {fn_args.context = context;}
|
||||||
|
if (page !== undefined) {fn_args.page = page;}
|
||||||
|
if (per_page !== undefined) {fn_args.per_page = per_page;}
|
||||||
|
return await this.#request(fn_name, fn_args);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} subcorpus
|
||||||
|
* @param {number[]} match_id_list
|
||||||
|
* @param {number=} context
|
||||||
|
* @returns {Promise<object>}
|
||||||
|
*/
|
||||||
|
async cqp_partial_export_subcorpus(subcorpus, match_id_list, context) {
|
||||||
|
const fn_name = 'nopaque_cqp_partial_export_subcorpus';
|
||||||
|
const fn_args = {subcorpus: subcorpus, match_id_list: match_id_list};
|
||||||
|
if (context !== undefined) {fn_args.context = context;}
|
||||||
|
return await this.#request(fn_name, fn_args);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} subcorpus
|
||||||
|
* @param {number=} context
|
||||||
|
* @returns {Promise<object>}
|
||||||
|
*/
|
||||||
|
async cqp_export_subcorpus(subcorpus, context) {
|
||||||
|
const fn_name = 'nopaque_cqp_export_subcorpus';
|
||||||
|
const fn_args = {subcorpus: subcorpus};
|
||||||
|
if (context !== undefined) {fn_args.context = context;}
|
||||||
|
return await this.#request(fn_name, fn_args);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
@ -2,9 +2,10 @@ cqi.CQiClient = class CQiClient {
|
|||||||
/**
|
/**
|
||||||
* @param {string} host
|
* @param {string} host
|
||||||
* @param {string} corpusId
|
* @param {string} corpusId
|
||||||
|
* @param {number} [timeout=Infinity] timeout
|
||||||
* @param {string} [version=0.1] version
|
* @param {string} [version=0.1] version
|
||||||
*/
|
*/
|
||||||
constructor(host, corpusId, version = '0.1') {
|
constructor(host, corpusId, timeout = Infinity, version = '0.1') {
|
||||||
/** @type {cqi.api.APIClient} */
|
/** @type {cqi.api.APIClient} */
|
||||||
this.api = new cqi.api.APIClient(host, corpusId, version);
|
this.api = new cqi.api.APIClient(host, corpusId, version);
|
||||||
}
|
}
|
||||||
|
@ -80,6 +80,33 @@ cqi.models.corpora.Corpus = class Corpus extends cqi.models.resource.Model {
|
|||||||
async query(subcorpusName, query) {
|
async query(subcorpusName, query) {
|
||||||
return await this.client.api.cqp_query(this.apiName, subcorpusName, query);
|
return await this.client.api.cqp_query(this.apiName, subcorpusName, query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
* NOTE: The following are non standard implementations of nopaque *
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
get staticData() {
|
||||||
|
return this.attrs.static_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @returns {cqi.status.StatusOk}
|
||||||
|
*/
|
||||||
|
async updateDb() {
|
||||||
|
return await this.client.api.corpus_update_db(this.apiName);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {number=} page
|
||||||
|
* @param {number=} per_page
|
||||||
|
* @returns {Promise<object>}
|
||||||
|
*/
|
||||||
|
async paginate(page, per_page) {
|
||||||
|
return await this.client.api.corpus_paginate_corpus(this.apiName, page, per_page);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -95,11 +122,12 @@ cqi.models.corpora.CorpusCollection = class CorpusCollection extends cqi.models.
|
|||||||
return {
|
return {
|
||||||
api_name: corpusName,
|
api_name: corpusName,
|
||||||
charset: await this.client.api.corpus_charset(corpusName),
|
charset: await this.client.api.corpus_charset(corpusName),
|
||||||
// full_name: await this.client.api.corpus_full_name(api_name),
|
// full_name: await this.client.api.corpus_full_name(corpusName),
|
||||||
// info: await this.client.api.corpus_info(api_name),
|
// info: await this.client.api.corpus_info(corpusName),
|
||||||
name: corpusName,
|
name: corpusName,
|
||||||
properties: await this.client.api.corpus_properties(corpusName),
|
properties: await this.client.api.corpus_properties(corpusName),
|
||||||
size: await this.client.api.cl_attribute_size(`${corpusName}.word`)
|
size: await this.client.api.cl_attribute_size(`${corpusName}.word`),
|
||||||
|
static_data: await this.client.api.corpus_static_data(corpusName),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,6 +85,37 @@ cqi.models.subcorpora.Subcorpus = class Subcorpus extends cqi.models.resource.Mo
|
|||||||
attribute2.apiName
|
attribute2.apiName
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
* NOTE: The following are non standard implementations of nopaque *
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {number=} context
|
||||||
|
* @param {number=} page
|
||||||
|
* @param {number=} perPage
|
||||||
|
* @returns {Promise<object>}
|
||||||
|
*/
|
||||||
|
async paginate(context, page, perPage) {
|
||||||
|
return await this.client.api.cqp_paginate_subcorpus(this.apiName, context, page, perPage);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {number[]} matchIdList
|
||||||
|
* @param {number=} context
|
||||||
|
* @returns {Promise<object>}
|
||||||
|
*/
|
||||||
|
async partialExport(matchIdList, context) {
|
||||||
|
return await this.client.api.cqp_partial_export_subcorpus(this.apiName, matchIdList, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {number=} context
|
||||||
|
* @returns {Promise<object>}
|
||||||
|
*/
|
||||||
|
async export(context) {
|
||||||
|
return await this.client.api.cqp_export_subcorpus(this.apiName, context);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user