Small fixes and remove old cqi_over_socketio interface

This commit is contained in:
Patrick Jentsch 2023-06-30 12:19:18 +02:00
parent 3a97b1a07a
commit a7a948908f
12 changed files with 6 additions and 776 deletions

View File

@ -16,5 +16,4 @@ def before_request():
pass
from . import cli, cqi_over_socketio, files, followers, routes, json_routes
from . import cqi_over_sio
from . import cli, cqi_over_sio, files, followers, routes, json_routes

View File

@ -167,7 +167,7 @@ def corpus_paginate_corpus(
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}
return payload
def cqp_paginate_subcorpus(
@ -215,7 +215,7 @@ def cqp_paginate_subcorpus(
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}
return payload
def cqp_partial_export_subcorpus(
@ -228,7 +228,7 @@ def cqp_partial_export_subcorpus(
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
return cqi_subcorpus_partial_export
def cqp_export_subcorpus(
@ -240,4 +240,4 @@ def cqp_export_subcorpus(
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}
return cqi_subcorpus_export

View File

@ -1,115 +0,0 @@
from flask import session
from flask_login import current_user
from flask_socketio import ConnectionRefusedError
from threading import Lock
import cqi
from app import db, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusStatus
'''
This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
This module only handles the SIO connect/disconnect, which handles the setup
and teardown of necessary ressources for later use. Each CQi function has a
corresponding SIO event. The event handlers are spread across the different
modules within this package.
Basic concept:
1. A client connects to the SIO namespace and provides the id of a corpus to be
analysed.
1.1 The analysis session counter of the corpus is incremented.
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
1.3 Wait until the CQP server is running.
1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient and the Lock in the session for subsequential use.
2. A client emits an event and may provide a single json object with necessary
arguments for the targeted CQi function.
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
- The event handler function defines all arguments. Hence the client
is sent as a single json object, the decorator decomposes it to fit
the functions signature. This also includes type checking and proper
use of the lock (acquire/release) mechanism.
4. Wait for more events
5. The client disconnects from the SIO namespace
1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
'''
NAMESPACE = '/corpora/corpus/corpus_analysis'
# Import all CQi over Socket.IO event handlers
from .cqi_corpora_corpus_subcorpora import * # noqa
from .cqi_corpora_corpus_structural_attributes import * # noqa
from .cqi_corpora_corpus_positional_attributes import * # noqa
from .cqi_corpora_corpus_alignment_attributes import * # noqa
from .cqi_corpora_corpus import * # noqa
from .cqi_corpora import * # noqa
from .cqi import * # noqa
@socketio.on('connect', namespace=NAMESPACE)
@socketio_login_required
def connect(auth):
# the auth variable is used in a hacky way. It contains the corpus id for
# which a corpus analysis session should be started.
corpus_id = hashids.decode(auth['corpus_id'])
corpus = Corpus.query.get(corpus_id)
if corpus is None:
# return {'code': 404, 'msg': 'Not Found'}
raise ConnectionRefusedError('Not Found')
if not (corpus.user == current_user
or current_user.is_following_corpus(corpus)
or current_user.is_administrator()):
# return {'code': 403, 'msg': 'Forbidden'}
raise ConnectionRefusedError('Forbidden')
if corpus.status not in [
CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION
]:
# return {'code': 424, 'msg': 'Failed Dependency'}
raise ConnectionRefusedError('Failed Dependency')
if corpus.num_analysis_sessions is None:
corpus.num_analysis_sessions = 0
db.session.commit()
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
db.session.commit()
retry_counter = 20
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
if retry_counter == 0:
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
return {'code': 408, 'msg': 'Request Timeout'}
socketio.sleep(3)
retry_counter -= 1
db.session.refresh(corpus)
cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}')
session['d'] = {
'corpus_id': corpus_id,
'cqi_client': cqi_client,
'cqi_client_lock': Lock(),
}
# return {'code': 200, 'msg': 'OK'}
@socketio.on('disconnect', namespace=NAMESPACE)
def disconnect():
if 'd' not in session:
return
session['d']['cqi_client_lock'].acquire()
try:
session['d']['cqi_client'].disconnect()
except (BrokenPipeError, cqi.errors.CQiException):
pass
session['d']['cqi_client_lock'].release()
corpus = Corpus.query.get(session['d']['corpus_id'])
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
session.pop('d')
# return {'code': 200, 'msg': 'OK'}

View File

@ -1,43 +0,0 @@
from socket import gaierror
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.connect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_connect(cqi_client: cqi.CQiClient):
try:
cqi_status = cqi_client.connect()
except gaierror as e:
return {
'code': 500,
'msg': 'Internal Server Error',
'payload': {'code': e.args[0], 'desc': e.args[1]}
}
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.disconnect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_disconnect(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.disconnect()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.ping', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_ping(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.ping()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,22 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = {**cqi_corpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_list(cqi_client: cqi.CQiClient):
payload = [{**x.attrs} for x in cqi_client.corpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,199 +0,0 @@
from collections import Counter
from flask import session
import cqi
import json
import math
import os
from app import db, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, lookups_by_cpos
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.drop()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.query(subcorpus_name, query)
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
cqi_corpus = cqi_client.corpora.get(corpus_name)
corpus.num_tokens = cqi_corpus.size
db.session.commit()
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
if os.path.exists(visualization_data_file_path):
with open(visualization_data_file_path, 'r') as f:
payload = json.load(f)
return {'code': 200, 'msg': 'OK', 'payload': payload}
cqi_corpus = cqi_client.corpora.get(corpus_name)
##########################################################################
# A faster way to get cpos boundaries for smaller s_attrs #
##########################################################################
# cqi_corpus.query('Last', '<s> []* </s>;')
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
# print(cqi_subcorpus.size)
# first_match = 0
# last_match = cqi_subcorpus.attrs['size'] - 1
# match_boundaries = zip(
# list(range(first_match, last_match + 1)),
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
# )
# for x in match_boundaries:
# print(x)
cqi_p_attrs = {
p_attr.name: p_attr
for p_attr in cqi_corpus.positional_attributes.list()
}
cqi_s_attrs = {
s_attr.name: s_attr
for s_attr in cqi_corpus.structural_attributes.list()
}
payload = {
'corpus': {
'bounds': [0, cqi_corpus.size - 1],
'counts': {
'token': cqi_corpus.size
},
'freqs': {}
},
'p_attrs': {},
's_attrs': {},
'values': {'p_attrs': {}, 's_attrs': {}}
}
for p_attr in cqi_p_attrs.values():
payload['corpus']['freqs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
payload['p_attrs'][p_attr.name] = dict(
zip(
range(0, cqi_corpus.size),
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
)
)
payload['values']['p_attrs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
for s_attr in cqi_s_attrs.values():
if s_attr.has_values:
continue
payload['corpus']['counts'][s_attr.name] = s_attr.size
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
payload['values']['s_attrs'][s_attr.name] = {}
for id in range(0, s_attr.size):
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
lbound, rbound = s_attr.cpos_by_id(id)
payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
if s_attr.name not in ['text', 's']:
continue
cpos_range = range(lbound, rbound + 1)
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
if s_attr.name != 'text':
continue
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
for p_attr in cqi_p_attrs.values():
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
s_attr_value_names = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
sub_s_attr_values = [
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
for sub_s_attr in sub_s_attrs
]
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
payload['values']['s_attrs'][s_attr.name] = {
s_attr_id: {
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
for s_attr_value_name_idx, s_attr_value_name in enumerate(
payload['s_attrs'][s_attr.name]['values']
)
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
}
with open(visualization_data_file_path, 'w') as f:
json.dump(payload, f)
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_corpus.size > 0
and page > math.ceil(cqi_corpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
# the lookups for the items
payload['lookups'] = lookups
# the total number of items matching the query
payload['total'] = cqi_corpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
payload = {**cqi_alignment_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
payload = {**cqi_positional_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
payload = {**cqi_structural_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,140 +0,0 @@
import cqi
import math
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
payload = {**cqi_subcorpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_status = cqi_subcorpus.drop()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, field: int, first: int, last: int): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
payload = cqi_subcorpus.dump(field, first, last)
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_name: str, positional_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
field = cqi_subcorpus.fields[field_name]
pos_attr = cqi_corpus.positional_attributes.get(positional_attribute_name)
payload = cqi_subcorpus.fdist_1(cutoff, field, pos_attr)
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_1_name: str, positional_attribute_1_name: str, field_2_name: str, positional_attribute_2_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
field_1 = cqi_subcorpus.fields[field_1_name]
pos_attr_1 = cqi_corpus.positional_attributes.get(positional_attribute_1_name)
field_2 = cqi_subcorpus.fields[field_2_name]
pos_attr_2 = cqi_corpus.positional_attributes.get(positional_attribute_2_name)
payload = cqi_subcorpus.fdist_2(cutoff, field_1, pos_attr_1, field_2, pos_attr_2)
return {'code': 200, 'msg': 'OK', 'payload': payload}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_subcorpus.attrs['size'] > 0
and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
payload['items'] = cqi_results_export.pop('matches')
# the lookups for the items
payload['lookups'] = cqi_results_export
# the total number of items matching the query
payload['total'] = cqi_subcorpus.attrs['size']
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}

View File

@ -1,178 +0,0 @@
from flask import session
from functools import wraps
from inspect import signature
import cqi
def cqi_over_socketio(f):
@wraps(f)
def wrapped(*args):
if 'd' not in session:
return {'code': 424, 'msg': 'Failed Dependency'}
f_args = {}
# Check for missing args and if all provided args are of the right type
for param in signature(f).parameters.values():
if param.name == 'corpus_name':
f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}'
continue
if param.name == 'cqi_client':
f_args[param.name] = session['d']['cqi_client']
continue
if param.default is param.empty:
# args
if param.name not in args[0]:
return {'code': 400, 'msg': 'Bad Request'}
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
else:
# kwargs
if param.name not in args[0]:
continue
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
session['d']['cqi_client_lock'].acquire()
try:
return_value = f(**f_args)
except BrokenPipeError:
return_value = {
'code': 500,
'msg': 'Internal Server Error'
}
except cqi.errors.CQiException as e:
return_value = {
'code': 500,
'msg': 'Internal Server Error',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.__class__.__name__
}
}
finally:
session['d']['cqi_client_lock'].release()
return return_value
return wrapped
def lookups_by_cpos(corpus, cpos_list):
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
cpos_attr_values = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
cpos_attr_values[i]
for attr in corpus.structural_attributes.list():
# We only want to iterate over non subattributes, identifiable by
# attr.attrs['has_values'] == False
if attr.attrs['has_values']:
continue
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1:
continue
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
if not occured_attr_ids:
continue
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
if not subattrs:
continue
lookup_name = f'{attr.attrs["name"]}_lookup'
lookups[lookup_name] = {}
for attr_id in occured_attr_ids:
lookups[lookup_name][attr_id] = {}
for subattr in subattrs:
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
return lookups
def partial_export_subcorpus(subcorpus, match_id_list, context=25):
if subcorpus.attrs['size'] == 0:
return {"matches": []}
match_boundaries = []
for match_id in match_id_list:
if match_id < 0 or match_id >= subcorpus.attrs['size']:
continue
match_boundaries.append(
(
match_id,
subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0]
)
)
cpos_set = set()
matches = []
for match_boundary in match_boundaries:
match_num, match_start, match_end = match_boundary
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.attrs['size'] - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
if subcorpus.attrs['size'] == 0:
return {"matches": []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
match_boundaries = zip(
list(range(first_match, last_match + 1)),
subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match),
subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
for match_num, match_start, match_end in match_boundaries:
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.attrs['size'] - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}

View File

@ -237,7 +237,7 @@ class CorpusAnalysisConcordance {
app.flash('No matches selected', 'error');
return;
}
promise = subcorpus.o.partial_export([...subcorpus.selectedItems], 50);
promise = subcorpus.o.partialExport([...subcorpus.selectedItems], 50);
} else {
promise = subcorpus.o.export(50);
}