mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-15 01:05:42 +00:00
Small fixes and remove old cqi_over_socketio interface
This commit is contained in:
parent
3a97b1a07a
commit
a7a948908f
@ -16,5 +16,4 @@ def before_request():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
from . import cli, cqi_over_socketio, files, followers, routes, json_routes
|
from . import cli, cqi_over_sio, files, followers, routes, json_routes
|
||||||
from . import cqi_over_sio
|
|
||||||
|
@ -167,7 +167,7 @@ def corpus_paginate_corpus(
|
|||||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||||
# Number of the next page
|
# Number of the next page
|
||||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return payload
|
||||||
|
|
||||||
|
|
||||||
def cqp_paginate_subcorpus(
|
def cqp_paginate_subcorpus(
|
||||||
@ -215,7 +215,7 @@ def cqp_paginate_subcorpus(
|
|||||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||||
# Number of the next page
|
# Number of the next page
|
||||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return payload
|
||||||
|
|
||||||
|
|
||||||
def cqp_partial_export_subcorpus(
|
def cqp_partial_export_subcorpus(
|
||||||
@ -228,7 +228,7 @@ def cqp_partial_export_subcorpus(
|
|||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
|
return cqi_subcorpus_partial_export
|
||||||
|
|
||||||
|
|
||||||
def cqp_export_subcorpus(
|
def cqp_export_subcorpus(
|
||||||
@ -240,4 +240,4 @@ def cqp_export_subcorpus(
|
|||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}
|
return cqi_subcorpus_export
|
||||||
|
@ -1,115 +0,0 @@
|
|||||||
from flask import session
|
|
||||||
from flask_login import current_user
|
|
||||||
from flask_socketio import ConnectionRefusedError
|
|
||||||
from threading import Lock
|
|
||||||
import cqi
|
|
||||||
from app import db, hashids, socketio
|
|
||||||
from app.decorators import socketio_login_required
|
|
||||||
from app.models import Corpus, CorpusStatus
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
|
||||||
This package tunnels the Corpus Query interface (CQi) protocol through
|
|
||||||
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
|
|
||||||
|
|
||||||
This module only handles the SIO connect/disconnect, which handles the setup
|
|
||||||
and teardown of necessary ressources for later use. Each CQi function has a
|
|
||||||
corresponding SIO event. The event handlers are spread across the different
|
|
||||||
modules within this package.
|
|
||||||
|
|
||||||
Basic concept:
|
|
||||||
1. A client connects to the SIO namespace and provides the id of a corpus to be
|
|
||||||
analysed.
|
|
||||||
1.1 The analysis session counter of the corpus is incremented.
|
|
||||||
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
|
|
||||||
1.3 Wait until the CQP server is running.
|
|
||||||
1.4 Connect the CQiClient to the server.
|
|
||||||
1.5 Save the CQiClient and the Lock in the session for subsequential use.
|
|
||||||
2. A client emits an event and may provide a single json object with necessary
|
|
||||||
arguments for the targeted CQi function.
|
|
||||||
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
|
|
||||||
- The event handler function defines all arguments. Hence the client
|
|
||||||
is sent as a single json object, the decorator decomposes it to fit
|
|
||||||
the functions signature. This also includes type checking and proper
|
|
||||||
use of the lock (acquire/release) mechanism.
|
|
||||||
4. Wait for more events
|
|
||||||
5. The client disconnects from the SIO namespace
|
|
||||||
1.1 The analysis session counter of the corpus is decremented.
|
|
||||||
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
NAMESPACE = '/corpora/corpus/corpus_analysis'
|
|
||||||
|
|
||||||
|
|
||||||
# Import all CQi over Socket.IO event handlers
|
|
||||||
from .cqi_corpora_corpus_subcorpora import * # noqa
|
|
||||||
from .cqi_corpora_corpus_structural_attributes import * # noqa
|
|
||||||
from .cqi_corpora_corpus_positional_attributes import * # noqa
|
|
||||||
from .cqi_corpora_corpus_alignment_attributes import * # noqa
|
|
||||||
from .cqi_corpora_corpus import * # noqa
|
|
||||||
from .cqi_corpora import * # noqa
|
|
||||||
from .cqi import * # noqa
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('connect', namespace=NAMESPACE)
|
|
||||||
@socketio_login_required
|
|
||||||
def connect(auth):
|
|
||||||
# the auth variable is used in a hacky way. It contains the corpus id for
|
|
||||||
# which a corpus analysis session should be started.
|
|
||||||
corpus_id = hashids.decode(auth['corpus_id'])
|
|
||||||
corpus = Corpus.query.get(corpus_id)
|
|
||||||
if corpus is None:
|
|
||||||
# return {'code': 404, 'msg': 'Not Found'}
|
|
||||||
raise ConnectionRefusedError('Not Found')
|
|
||||||
if not (corpus.user == current_user
|
|
||||||
or current_user.is_following_corpus(corpus)
|
|
||||||
or current_user.is_administrator()):
|
|
||||||
# return {'code': 403, 'msg': 'Forbidden'}
|
|
||||||
raise ConnectionRefusedError('Forbidden')
|
|
||||||
if corpus.status not in [
|
|
||||||
CorpusStatus.BUILT,
|
|
||||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
|
||||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
|
||||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
|
||||||
]:
|
|
||||||
# return {'code': 424, 'msg': 'Failed Dependency'}
|
|
||||||
raise ConnectionRefusedError('Failed Dependency')
|
|
||||||
if corpus.num_analysis_sessions is None:
|
|
||||||
corpus.num_analysis_sessions = 0
|
|
||||||
db.session.commit()
|
|
||||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
|
||||||
db.session.commit()
|
|
||||||
retry_counter = 20
|
|
||||||
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
|
||||||
if retry_counter == 0:
|
|
||||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
|
||||||
db.session.commit()
|
|
||||||
return {'code': 408, 'msg': 'Request Timeout'}
|
|
||||||
socketio.sleep(3)
|
|
||||||
retry_counter -= 1
|
|
||||||
db.session.refresh(corpus)
|
|
||||||
cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}')
|
|
||||||
session['d'] = {
|
|
||||||
'corpus_id': corpus_id,
|
|
||||||
'cqi_client': cqi_client,
|
|
||||||
'cqi_client_lock': Lock(),
|
|
||||||
}
|
|
||||||
# return {'code': 200, 'msg': 'OK'}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('disconnect', namespace=NAMESPACE)
|
|
||||||
def disconnect():
|
|
||||||
if 'd' not in session:
|
|
||||||
return
|
|
||||||
session['d']['cqi_client_lock'].acquire()
|
|
||||||
try:
|
|
||||||
session['d']['cqi_client'].disconnect()
|
|
||||||
except (BrokenPipeError, cqi.errors.CQiException):
|
|
||||||
pass
|
|
||||||
session['d']['cqi_client_lock'].release()
|
|
||||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
|
||||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
|
||||||
db.session.commit()
|
|
||||||
session.pop('d')
|
|
||||||
# return {'code': 200, 'msg': 'OK'}
|
|
@ -1,43 +0,0 @@
|
|||||||
from socket import gaierror
|
|
||||||
import cqi
|
|
||||||
from app import socketio
|
|
||||||
from app.decorators import socketio_login_required
|
|
||||||
from . import NAMESPACE as ns
|
|
||||||
from .utils import cqi_over_socketio
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.connect', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_connect(cqi_client: cqi.CQiClient):
|
|
||||||
try:
|
|
||||||
cqi_status = cqi_client.connect()
|
|
||||||
except gaierror as e:
|
|
||||||
return {
|
|
||||||
'code': 500,
|
|
||||||
'msg': 'Internal Server Error',
|
|
||||||
'payload': {'code': e.args[0], 'desc': e.args[1]}
|
|
||||||
}
|
|
||||||
payload = {'code': cqi_status.code,
|
|
||||||
'msg': cqi_status.__class__.__name__}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.disconnect', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_disconnect(cqi_client: cqi.CQiClient):
|
|
||||||
cqi_status = cqi_client.disconnect()
|
|
||||||
payload = {'code': cqi_status.code,
|
|
||||||
'msg': cqi_status.__class__.__name__}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.ping', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_ping(cqi_client: cqi.CQiClient):
|
|
||||||
cqi_status = cqi_client.ping()
|
|
||||||
payload = {'code': cqi_status.code,
|
|
||||||
'msg': cqi_status.__class__.__name__}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
@ -1,22 +0,0 @@
|
|||||||
import cqi
|
|
||||||
from app import socketio
|
|
||||||
from app.decorators import socketio_login_required
|
|
||||||
from . import NAMESPACE as ns
|
|
||||||
from .utils import cqi_over_socketio
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.get', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
payload = {**cqi_corpus.attrs}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.list', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_list(cqi_client: cqi.CQiClient):
|
|
||||||
payload = [{**x.attrs} for x in cqi_client.corpora.list()]
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
@ -1,199 +0,0 @@
|
|||||||
from collections import Counter
|
|
||||||
from flask import session
|
|
||||||
import cqi
|
|
||||||
import json
|
|
||||||
import math
|
|
||||||
import os
|
|
||||||
from app import db, socketio
|
|
||||||
from app.decorators import socketio_login_required
|
|
||||||
from app.models import Corpus
|
|
||||||
from . import NAMESPACE as ns
|
|
||||||
from .utils import cqi_over_socketio, lookups_by_cpos
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_status = cqi_corpus.drop()
|
|
||||||
payload = {'code': cqi_status.code,
|
|
||||||
'msg': cqi_status.__class__.__name__}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_status = cqi_corpus.query(subcorpus_name, query)
|
|
||||||
payload = {'code': cqi_status.code,
|
|
||||||
'msg': cqi_status.__class__.__name__}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
# nopaque specific CQi extensions #
|
|
||||||
###############################################################################
|
|
||||||
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
|
|
||||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
corpus.num_tokens = cqi_corpus.size
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
|
|
||||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
|
||||||
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
|
|
||||||
if os.path.exists(visualization_data_file_path):
|
|
||||||
with open(visualization_data_file_path, 'r') as f:
|
|
||||||
payload = json.load(f)
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
##########################################################################
|
|
||||||
# A faster way to get cpos boundaries for smaller s_attrs #
|
|
||||||
##########################################################################
|
|
||||||
# cqi_corpus.query('Last', '<s> []* </s>;')
|
|
||||||
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
|
|
||||||
# print(cqi_subcorpus.size)
|
|
||||||
# first_match = 0
|
|
||||||
# last_match = cqi_subcorpus.attrs['size'] - 1
|
|
||||||
# match_boundaries = zip(
|
|
||||||
# list(range(first_match, last_match + 1)),
|
|
||||||
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
|
|
||||||
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
|
||||||
# )
|
|
||||||
# for x in match_boundaries:
|
|
||||||
# print(x)
|
|
||||||
cqi_p_attrs = {
|
|
||||||
p_attr.name: p_attr
|
|
||||||
for p_attr in cqi_corpus.positional_attributes.list()
|
|
||||||
}
|
|
||||||
cqi_s_attrs = {
|
|
||||||
s_attr.name: s_attr
|
|
||||||
for s_attr in cqi_corpus.structural_attributes.list()
|
|
||||||
}
|
|
||||||
payload = {
|
|
||||||
'corpus': {
|
|
||||||
'bounds': [0, cqi_corpus.size - 1],
|
|
||||||
'counts': {
|
|
||||||
'token': cqi_corpus.size
|
|
||||||
},
|
|
||||||
'freqs': {}
|
|
||||||
},
|
|
||||||
'p_attrs': {},
|
|
||||||
's_attrs': {},
|
|
||||||
'values': {'p_attrs': {}, 's_attrs': {}}
|
|
||||||
}
|
|
||||||
for p_attr in cqi_p_attrs.values():
|
|
||||||
payload['corpus']['freqs'][p_attr.name] = dict(
|
|
||||||
zip(
|
|
||||||
range(0, p_attr.lexicon_size),
|
|
||||||
p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
|
|
||||||
)
|
|
||||||
)
|
|
||||||
payload['p_attrs'][p_attr.name] = dict(
|
|
||||||
zip(
|
|
||||||
range(0, cqi_corpus.size),
|
|
||||||
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
|
|
||||||
)
|
|
||||||
)
|
|
||||||
payload['values']['p_attrs'][p_attr.name] = dict(
|
|
||||||
zip(
|
|
||||||
range(0, p_attr.lexicon_size),
|
|
||||||
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
|
|
||||||
)
|
|
||||||
)
|
|
||||||
for s_attr in cqi_s_attrs.values():
|
|
||||||
if s_attr.has_values:
|
|
||||||
continue
|
|
||||||
payload['corpus']['counts'][s_attr.name] = s_attr.size
|
|
||||||
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
|
|
||||||
payload['values']['s_attrs'][s_attr.name] = {}
|
|
||||||
for id in range(0, s_attr.size):
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
|
|
||||||
lbound, rbound = s_attr.cpos_by_id(id)
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
|
|
||||||
if s_attr.name not in ['text', 's']:
|
|
||||||
continue
|
|
||||||
cpos_range = range(lbound, rbound + 1)
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
|
|
||||||
if s_attr.name != 'text':
|
|
||||||
continue
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
|
||||||
for p_attr in cqi_p_attrs.values():
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
|
|
||||||
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
|
||||||
s_attr_value_names = [
|
|
||||||
sub_s_attr.name[(len(s_attr.name) + 1):]
|
|
||||||
for sub_s_attr in sub_s_attrs
|
|
||||||
]
|
|
||||||
sub_s_attr_values = [
|
|
||||||
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
|
|
||||||
for sub_s_attr in sub_s_attrs
|
|
||||||
]
|
|
||||||
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
|
|
||||||
payload['values']['s_attrs'][s_attr.name] = {
|
|
||||||
s_attr_id: {
|
|
||||||
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
|
|
||||||
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
|
||||||
payload['s_attrs'][s_attr.name]['values']
|
|
||||||
)
|
|
||||||
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
|
|
||||||
}
|
|
||||||
with open(visualization_data_file_path, 'w') as f:
|
|
||||||
json.dump(payload, f)
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
# Sanity checks
|
|
||||||
if (
|
|
||||||
per_page < 1
|
|
||||||
or page < 1
|
|
||||||
or (
|
|
||||||
cqi_corpus.size > 0
|
|
||||||
and page > math.ceil(cqi_corpus.size / per_page)
|
|
||||||
)
|
|
||||||
):
|
|
||||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
|
||||||
first_cpos = (page - 1) * per_page
|
|
||||||
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
|
|
||||||
cpos_list = [*range(first_cpos, last_cpos)]
|
|
||||||
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
|
|
||||||
payload = {}
|
|
||||||
# the items for the current page
|
|
||||||
payload['items'] = [cpos_list]
|
|
||||||
# the lookups for the items
|
|
||||||
payload['lookups'] = lookups
|
|
||||||
# the total number of items matching the query
|
|
||||||
payload['total'] = cqi_corpus.size
|
|
||||||
# the number of items to be displayed on a page.
|
|
||||||
payload['per_page'] = per_page
|
|
||||||
# The total number of pages
|
|
||||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
|
||||||
# the current page number (1 indexed)
|
|
||||||
payload['page'] = page if payload['pages'] > 0 else None
|
|
||||||
# True if a previous page exists
|
|
||||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
|
||||||
# True if a next page exists.
|
|
||||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
|
||||||
# Number of the previous page.
|
|
||||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
|
||||||
# Number of the next page
|
|
||||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
@ -1,24 +0,0 @@
|
|||||||
import cqi
|
|
||||||
from app import socketio
|
|
||||||
from app.decorators import socketio_login_required
|
|
||||||
from . import NAMESPACE as ns
|
|
||||||
from .utils import cqi_over_socketio
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
|
|
||||||
payload = {**cqi_alignment_attribute.attrs}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
@ -1,24 +0,0 @@
|
|||||||
import cqi
|
|
||||||
from app import socketio
|
|
||||||
from app.decorators import socketio_login_required
|
|
||||||
from . import NAMESPACE as ns
|
|
||||||
from .utils import cqi_over_socketio
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
|
|
||||||
payload = {**cqi_positional_attribute.attrs}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
@ -1,24 +0,0 @@
|
|||||||
import cqi
|
|
||||||
from app import socketio
|
|
||||||
from app.decorators import socketio_login_required
|
|
||||||
from . import NAMESPACE as ns
|
|
||||||
from .utils import cqi_over_socketio
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
|
|
||||||
payload = {**cqi_structural_attribute.attrs}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
@ -1,140 +0,0 @@
|
|||||||
import cqi
|
|
||||||
import math
|
|
||||||
from app import socketio
|
|
||||||
from app.decorators import socketio_login_required
|
|
||||||
from . import NAMESPACE as ns
|
|
||||||
from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
|
||||||
payload = {**cqi_subcorpus.attrs}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
|
||||||
cqi_status = cqi_subcorpus.drop()
|
|
||||||
payload = {'code': cqi_status.code,
|
|
||||||
'msg': cqi_status.__class__.__name__}
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, field: int, first: int, last: int): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
|
||||||
payload = cqi_subcorpus.dump(field, first, last)
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_name: str, positional_attribute_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
|
||||||
field = cqi_subcorpus.fields[field_name]
|
|
||||||
pos_attr = cqi_corpus.positional_attributes.get(positional_attribute_name)
|
|
||||||
payload = cqi_subcorpus.fdist_1(cutoff, field, pos_attr)
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_1_name: str, positional_attribute_1_name: str, field_2_name: str, positional_attribute_2_name: str): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
|
||||||
field_1 = cqi_subcorpus.fields[field_1_name]
|
|
||||||
pos_attr_1 = cqi_corpus.positional_attributes.get(positional_attribute_1_name)
|
|
||||||
field_2 = cqi_subcorpus.fields[field_2_name]
|
|
||||||
pos_attr_2 = cqi_corpus.positional_attributes.get(positional_attribute_2_name)
|
|
||||||
payload = cqi_subcorpus.fdist_2(cutoff, field_1, pos_attr_1, field_2, pos_attr_2)
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
# nopaque specific CQi extensions #
|
|
||||||
###############################################################################
|
|
||||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
|
||||||
# Sanity checks
|
|
||||||
if (
|
|
||||||
per_page < 1
|
|
||||||
or page < 1
|
|
||||||
or (
|
|
||||||
cqi_subcorpus.attrs['size'] > 0
|
|
||||||
and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
|
|
||||||
)
|
|
||||||
):
|
|
||||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
|
||||||
offset = (page - 1) * per_page
|
|
||||||
cutoff = per_page
|
|
||||||
cqi_results_export = export_subcorpus(
|
|
||||||
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
|
||||||
payload = {}
|
|
||||||
# the items for the current page
|
|
||||||
payload['items'] = cqi_results_export.pop('matches')
|
|
||||||
# the lookups for the items
|
|
||||||
payload['lookups'] = cqi_results_export
|
|
||||||
# the total number of items matching the query
|
|
||||||
payload['total'] = cqi_subcorpus.attrs['size']
|
|
||||||
# the number of items to be displayed on a page.
|
|
||||||
payload['per_page'] = per_page
|
|
||||||
# The total number of pages
|
|
||||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
|
||||||
# the current page number (1 indexed)
|
|
||||||
payload['page'] = page if payload['pages'] > 0 else None
|
|
||||||
# True if a previous page exists
|
|
||||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
|
||||||
# True if a next page exists.
|
|
||||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
|
||||||
# Number of the previous page.
|
|
||||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
|
||||||
# Number of the next page
|
|
||||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
|
||||||
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
|
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
|
|
||||||
@socketio_login_required
|
|
||||||
@cqi_over_socketio
|
|
||||||
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
|
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
|
||||||
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}
|
|
@ -1,178 +0,0 @@
|
|||||||
from flask import session
|
|
||||||
from functools import wraps
|
|
||||||
from inspect import signature
|
|
||||||
import cqi
|
|
||||||
|
|
||||||
|
|
||||||
def cqi_over_socketio(f):
|
|
||||||
@wraps(f)
|
|
||||||
def wrapped(*args):
|
|
||||||
if 'd' not in session:
|
|
||||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
|
||||||
f_args = {}
|
|
||||||
# Check for missing args and if all provided args are of the right type
|
|
||||||
for param in signature(f).parameters.values():
|
|
||||||
if param.name == 'corpus_name':
|
|
||||||
f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}'
|
|
||||||
continue
|
|
||||||
if param.name == 'cqi_client':
|
|
||||||
f_args[param.name] = session['d']['cqi_client']
|
|
||||||
continue
|
|
||||||
if param.default is param.empty:
|
|
||||||
# args
|
|
||||||
if param.name not in args[0]:
|
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
|
||||||
arg = args[0][param.name]
|
|
||||||
if type(arg) is not param.annotation:
|
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
|
||||||
f_args[param.name] = arg
|
|
||||||
else:
|
|
||||||
# kwargs
|
|
||||||
if param.name not in args[0]:
|
|
||||||
continue
|
|
||||||
arg = args[0][param.name]
|
|
||||||
if type(arg) is not param.annotation:
|
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
|
||||||
f_args[param.name] = arg
|
|
||||||
session['d']['cqi_client_lock'].acquire()
|
|
||||||
try:
|
|
||||||
return_value = f(**f_args)
|
|
||||||
except BrokenPipeError:
|
|
||||||
return_value = {
|
|
||||||
'code': 500,
|
|
||||||
'msg': 'Internal Server Error'
|
|
||||||
}
|
|
||||||
except cqi.errors.CQiException as e:
|
|
||||||
return_value = {
|
|
||||||
'code': 500,
|
|
||||||
'msg': 'Internal Server Error',
|
|
||||||
'payload': {
|
|
||||||
'code': e.code,
|
|
||||||
'desc': e.description,
|
|
||||||
'msg': e.__class__.__name__
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finally:
|
|
||||||
session['d']['cqi_client_lock'].release()
|
|
||||||
return return_value
|
|
||||||
return wrapped
|
|
||||||
|
|
||||||
|
|
||||||
def lookups_by_cpos(corpus, cpos_list):
|
|
||||||
lookups = {}
|
|
||||||
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
|
||||||
for attr in corpus.positional_attributes.list():
|
|
||||||
cpos_attr_values = attr.values_by_cpos(cpos_list)
|
|
||||||
for i, cpos in enumerate(cpos_list):
|
|
||||||
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
|
|
||||||
cpos_attr_values[i]
|
|
||||||
for attr in corpus.structural_attributes.list():
|
|
||||||
# We only want to iterate over non subattributes, identifiable by
|
|
||||||
# attr.attrs['has_values'] == False
|
|
||||||
if attr.attrs['has_values']:
|
|
||||||
continue
|
|
||||||
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
|
|
||||||
for i, cpos in enumerate(cpos_list):
|
|
||||||
if cpos_attr_ids[i] == -1:
|
|
||||||
continue
|
|
||||||
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
|
|
||||||
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
|
||||||
if not occured_attr_ids:
|
|
||||||
continue
|
|
||||||
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
|
||||||
if not subattrs:
|
|
||||||
continue
|
|
||||||
lookup_name = f'{attr.attrs["name"]}_lookup'
|
|
||||||
lookups[lookup_name] = {}
|
|
||||||
for attr_id in occured_attr_ids:
|
|
||||||
lookups[lookup_name][attr_id] = {}
|
|
||||||
for subattr in subattrs:
|
|
||||||
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
|
|
||||||
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
|
||||||
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
|
||||||
return lookups
|
|
||||||
|
|
||||||
|
|
||||||
def partial_export_subcorpus(subcorpus, match_id_list, context=25):
|
|
||||||
if subcorpus.attrs['size'] == 0:
|
|
||||||
return {"matches": []}
|
|
||||||
match_boundaries = []
|
|
||||||
for match_id in match_id_list:
|
|
||||||
if match_id < 0 or match_id >= subcorpus.attrs['size']:
|
|
||||||
continue
|
|
||||||
match_boundaries.append(
|
|
||||||
(
|
|
||||||
match_id,
|
|
||||||
subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0],
|
|
||||||
subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
cpos_set = set()
|
|
||||||
matches = []
|
|
||||||
for match_boundary in match_boundaries:
|
|
||||||
match_num, match_start, match_end = match_boundary
|
|
||||||
c = (match_start, match_end)
|
|
||||||
if match_start == 0 or context == 0:
|
|
||||||
lc = None
|
|
||||||
cpos_list_lbound = match_start
|
|
||||||
else:
|
|
||||||
lc_lbound = max(0, (match_start - context))
|
|
||||||
lc_rbound = match_start - 1
|
|
||||||
lc = (lc_lbound, lc_rbound)
|
|
||||||
cpos_list_lbound = lc_lbound
|
|
||||||
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
|
|
||||||
rc = None
|
|
||||||
cpos_list_rbound = match_end
|
|
||||||
else:
|
|
||||||
rc_lbound = match_end + 1
|
|
||||||
rc_rbound = min(
|
|
||||||
(match_end + context),
|
|
||||||
(subcorpus.collection.corpus.attrs['size'] - 1)
|
|
||||||
)
|
|
||||||
rc = (rc_lbound, rc_rbound)
|
|
||||||
cpos_list_rbound = rc_rbound
|
|
||||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
|
||||||
matches.append(match)
|
|
||||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
|
||||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
|
||||||
return {'matches': matches, **lookups}
|
|
||||||
|
|
||||||
|
|
||||||
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
|
|
||||||
if subcorpus.attrs['size'] == 0:
|
|
||||||
return {"matches": []}
|
|
||||||
first_match = max(0, offset)
|
|
||||||
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
|
|
||||||
match_boundaries = zip(
|
|
||||||
list(range(first_match, last_match + 1)),
|
|
||||||
subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match),
|
|
||||||
subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
|
||||||
)
|
|
||||||
cpos_set = set()
|
|
||||||
matches = []
|
|
||||||
for match_num, match_start, match_end in match_boundaries:
|
|
||||||
c = (match_start, match_end)
|
|
||||||
if match_start == 0 or context == 0:
|
|
||||||
lc = None
|
|
||||||
cpos_list_lbound = match_start
|
|
||||||
else:
|
|
||||||
lc_lbound = max(0, (match_start - context))
|
|
||||||
lc_rbound = match_start - 1
|
|
||||||
lc = (lc_lbound, lc_rbound)
|
|
||||||
cpos_list_lbound = lc_lbound
|
|
||||||
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
|
|
||||||
rc = None
|
|
||||||
cpos_list_rbound = match_end
|
|
||||||
else:
|
|
||||||
rc_lbound = match_end + 1
|
|
||||||
rc_rbound = min(
|
|
||||||
(match_end + context),
|
|
||||||
(subcorpus.collection.corpus.attrs['size'] - 1)
|
|
||||||
)
|
|
||||||
rc = (rc_lbound, rc_rbound)
|
|
||||||
cpos_list_rbound = rc_rbound
|
|
||||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
|
||||||
matches.append(match)
|
|
||||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
|
||||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
|
||||||
return {'matches': matches, **lookups}
|
|
@ -237,7 +237,7 @@ class CorpusAnalysisConcordance {
|
|||||||
app.flash('No matches selected', 'error');
|
app.flash('No matches selected', 'error');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
promise = subcorpus.o.partial_export([...subcorpus.selectedItems], 50);
|
promise = subcorpus.o.partialExport([...subcorpus.selectedItems], 50);
|
||||||
} else {
|
} else {
|
||||||
promise = subcorpus.o.export(50);
|
promise = subcorpus.o.export(50);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user