mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 12:22:47 +00:00 
			
		
		
		
	Small fixes and remove old cqi_over_socketio interface
This commit is contained in:
		@@ -16,5 +16,4 @@ def before_request():
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from . import cli, cqi_over_socketio, files, followers, routes, json_routes
 | 
			
		||||
from . import cqi_over_sio
 | 
			
		||||
from . import cli, cqi_over_sio, files, followers, routes, json_routes
 | 
			
		||||
 
 | 
			
		||||
@@ -167,7 +167,7 @@ def corpus_paginate_corpus(
 | 
			
		||||
    payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
 | 
			
		||||
    # Number of the next page
 | 
			
		||||
    payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
    return payload
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cqp_paginate_subcorpus(
 | 
			
		||||
@@ -215,7 +215,7 @@ def cqp_paginate_subcorpus(
 | 
			
		||||
    payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
 | 
			
		||||
    # Number of the next page
 | 
			
		||||
    payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
    return payload
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cqp_partial_export_subcorpus(
 | 
			
		||||
@@ -228,7 +228,7 @@ def cqp_partial_export_subcorpus(
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
 | 
			
		||||
    return cqi_subcorpus_partial_export
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cqp_export_subcorpus(
 | 
			
		||||
@@ -240,4 +240,4 @@ def cqp_export_subcorpus(
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}
 | 
			
		||||
    return cqi_subcorpus_export
 | 
			
		||||
 
 | 
			
		||||
@@ -1,115 +0,0 @@
 | 
			
		||||
from flask import session
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from flask_socketio import ConnectionRefusedError
 | 
			
		||||
from threading import Lock
 | 
			
		||||
import cqi
 | 
			
		||||
from app import db, hashids, socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from app.models import Corpus, CorpusStatus
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
This package tunnels the Corpus Query interface (CQi) protocol through
 | 
			
		||||
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
 | 
			
		||||
 | 
			
		||||
This module only handles the SIO connect/disconnect, which handles the setup
 | 
			
		||||
and teardown of necessary ressources for later use. Each CQi function has a
 | 
			
		||||
corresponding SIO event. The event handlers are spread across the different
 | 
			
		||||
modules within this package.
 | 
			
		||||
 | 
			
		||||
Basic concept:
 | 
			
		||||
1. A client connects to the SIO namespace and provides the id of a corpus to be
 | 
			
		||||
   analysed.
 | 
			
		||||
     1.1 The analysis session counter of the corpus is incremented.
 | 
			
		||||
     1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
 | 
			
		||||
     1.3 Wait until the CQP server is running.
 | 
			
		||||
     1.4 Connect the CQiClient to the server.
 | 
			
		||||
     1.5 Save the CQiClient and the Lock in the session for subsequential use.
 | 
			
		||||
2. A client emits an event and may provide a single json object with necessary
 | 
			
		||||
   arguments for the targeted CQi function.
 | 
			
		||||
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
 | 
			
		||||
     - The event handler function defines all arguments. Hence the client
 | 
			
		||||
       is sent as a single json object, the decorator decomposes it to fit
 | 
			
		||||
       the functions signature. This also includes type checking and proper
 | 
			
		||||
       use of the lock (acquire/release) mechanism.
 | 
			
		||||
4. Wait for more events
 | 
			
		||||
5. The client disconnects from the SIO namespace
 | 
			
		||||
     1.1 The analysis session counter of the corpus is decremented.
 | 
			
		||||
     1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
NAMESPACE = '/corpora/corpus/corpus_analysis'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Import all CQi over Socket.IO event handlers
 | 
			
		||||
from .cqi_corpora_corpus_subcorpora import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus_structural_attributes import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus_positional_attributes import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus_alignment_attributes import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus import *  # noqa
 | 
			
		||||
from .cqi_corpora import *  # noqa
 | 
			
		||||
from .cqi import *  # noqa
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('connect', namespace=NAMESPACE)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
def connect(auth):
 | 
			
		||||
    # the auth variable is used in a hacky way. It contains the corpus id for
 | 
			
		||||
    # which a corpus analysis session should be started.
 | 
			
		||||
    corpus_id = hashids.decode(auth['corpus_id'])
 | 
			
		||||
    corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
    if corpus is None:
 | 
			
		||||
        # return {'code': 404, 'msg': 'Not Found'}
 | 
			
		||||
        raise ConnectionRefusedError('Not Found')
 | 
			
		||||
    if not (corpus.user == current_user
 | 
			
		||||
            or current_user.is_following_corpus(corpus)
 | 
			
		||||
            or current_user.is_administrator()):
 | 
			
		||||
        # return {'code': 403, 'msg': 'Forbidden'}
 | 
			
		||||
        raise ConnectionRefusedError('Forbidden')
 | 
			
		||||
    if corpus.status not in [
 | 
			
		||||
        CorpusStatus.BUILT,
 | 
			
		||||
        CorpusStatus.STARTING_ANALYSIS_SESSION,
 | 
			
		||||
        CorpusStatus.RUNNING_ANALYSIS_SESSION,
 | 
			
		||||
        CorpusStatus.CANCELING_ANALYSIS_SESSION
 | 
			
		||||
    ]:
 | 
			
		||||
        # return {'code': 424, 'msg': 'Failed Dependency'}
 | 
			
		||||
        raise ConnectionRefusedError('Failed Dependency')
 | 
			
		||||
    if corpus.num_analysis_sessions is None:
 | 
			
		||||
        corpus.num_analysis_sessions = 0
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
    corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
    retry_counter = 20
 | 
			
		||||
    while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
 | 
			
		||||
        if retry_counter == 0:
 | 
			
		||||
            corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
            return {'code': 408, 'msg': 'Request Timeout'}
 | 
			
		||||
        socketio.sleep(3)
 | 
			
		||||
        retry_counter -= 1
 | 
			
		||||
        db.session.refresh(corpus)
 | 
			
		||||
    cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}')
 | 
			
		||||
    session['d'] = {
 | 
			
		||||
        'corpus_id': corpus_id,
 | 
			
		||||
        'cqi_client': cqi_client,
 | 
			
		||||
        'cqi_client_lock': Lock(),
 | 
			
		||||
    }
 | 
			
		||||
    # return {'code': 200, 'msg': 'OK'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('disconnect', namespace=NAMESPACE)
 | 
			
		||||
def disconnect():
 | 
			
		||||
    if 'd' not in session:
 | 
			
		||||
        return
 | 
			
		||||
    session['d']['cqi_client_lock'].acquire()
 | 
			
		||||
    try:
 | 
			
		||||
        session['d']['cqi_client'].disconnect()
 | 
			
		||||
    except (BrokenPipeError, cqi.errors.CQiException):
 | 
			
		||||
        pass
 | 
			
		||||
    session['d']['cqi_client_lock'].release()
 | 
			
		||||
    corpus = Corpus.query.get(session['d']['corpus_id'])
 | 
			
		||||
    corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
    session.pop('d')
 | 
			
		||||
    # return {'code': 200, 'msg': 'OK'}
 | 
			
		||||
@@ -1,43 +0,0 @@
 | 
			
		||||
from socket import gaierror
 | 
			
		||||
import cqi
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.connect', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_connect(cqi_client: cqi.CQiClient):
 | 
			
		||||
    try:
 | 
			
		||||
        cqi_status = cqi_client.connect()
 | 
			
		||||
    except gaierror as e:
 | 
			
		||||
        return {
 | 
			
		||||
            'code': 500,
 | 
			
		||||
            'msg': 'Internal Server Error',
 | 
			
		||||
            'payload': {'code': e.args[0], 'desc': e.args[1]}
 | 
			
		||||
        }
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.disconnect', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_disconnect(cqi_client: cqi.CQiClient):
 | 
			
		||||
    cqi_status = cqi_client.disconnect()
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.ping', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_ping(cqi_client: cqi.CQiClient):
 | 
			
		||||
    cqi_status = cqi_client.ping()
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,22 +0,0 @@
 | 
			
		||||
import cqi
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = {**cqi_corpus.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_list(cqi_client: cqi.CQiClient):
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_client.corpora.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,199 +0,0 @@
 | 
			
		||||
from collections import Counter
 | 
			
		||||
from flask import session
 | 
			
		||||
import cqi
 | 
			
		||||
import json
 | 
			
		||||
import math
 | 
			
		||||
import os
 | 
			
		||||
from app import db, socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from app.models import Corpus
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio, lookups_by_cpos
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_status = cqi_corpus.drop()
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_status = cqi_corpus.query(subcorpus_name, query)
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
# nopaque specific CQi extensions                                             #
 | 
			
		||||
###############################################################################
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    corpus = Corpus.query.get(session['d']['corpus_id'])
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    corpus.num_tokens = cqi_corpus.size
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    corpus = Corpus.query.get(session['d']['corpus_id'])
 | 
			
		||||
    visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
 | 
			
		||||
    if os.path.exists(visualization_data_file_path):
 | 
			
		||||
        with open(visualization_data_file_path, 'r') as f:
 | 
			
		||||
            payload = json.load(f)
 | 
			
		||||
        return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    ##########################################################################
 | 
			
		||||
    # A faster way to get cpos boundaries for smaller s_attrs                #
 | 
			
		||||
    ##########################################################################
 | 
			
		||||
    # cqi_corpus.query('Last', '<s> []* </s>;')
 | 
			
		||||
    # cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
 | 
			
		||||
    # print(cqi_subcorpus.size)
 | 
			
		||||
    # first_match = 0
 | 
			
		||||
    # last_match = cqi_subcorpus.attrs['size'] - 1
 | 
			
		||||
    # match_boundaries = zip(
 | 
			
		||||
    #     list(range(first_match, last_match + 1)),
 | 
			
		||||
    #     cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
 | 
			
		||||
    #     cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
 | 
			
		||||
    # )
 | 
			
		||||
    # for x in match_boundaries:
 | 
			
		||||
    #     print(x)
 | 
			
		||||
    cqi_p_attrs = {
 | 
			
		||||
        p_attr.name: p_attr
 | 
			
		||||
        for p_attr in cqi_corpus.positional_attributes.list()
 | 
			
		||||
    }
 | 
			
		||||
    cqi_s_attrs = {
 | 
			
		||||
        s_attr.name: s_attr
 | 
			
		||||
        for s_attr in cqi_corpus.structural_attributes.list()
 | 
			
		||||
    }
 | 
			
		||||
    payload = {
 | 
			
		||||
        'corpus': {
 | 
			
		||||
            'bounds': [0, cqi_corpus.size - 1],
 | 
			
		||||
            'counts': {
 | 
			
		||||
                'token': cqi_corpus.size
 | 
			
		||||
            },
 | 
			
		||||
            'freqs': {}
 | 
			
		||||
        },
 | 
			
		||||
        'p_attrs': {},
 | 
			
		||||
        's_attrs': {},
 | 
			
		||||
        'values': {'p_attrs': {}, 's_attrs': {}}
 | 
			
		||||
    }
 | 
			
		||||
    for p_attr in cqi_p_attrs.values():
 | 
			
		||||
        payload['corpus']['freqs'][p_attr.name] = dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, p_attr.lexicon_size),
 | 
			
		||||
                p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
        payload['p_attrs'][p_attr.name] = dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, cqi_corpus.size),
 | 
			
		||||
                p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
        payload['values']['p_attrs'][p_attr.name] = dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, p_attr.lexicon_size),
 | 
			
		||||
                p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
    for s_attr in cqi_s_attrs.values():
 | 
			
		||||
        if s_attr.has_values:
 | 
			
		||||
            continue
 | 
			
		||||
        payload['corpus']['counts'][s_attr.name] = s_attr.size
 | 
			
		||||
        payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
 | 
			
		||||
        payload['values']['s_attrs'][s_attr.name] = {}
 | 
			
		||||
        for id in range(0, s_attr.size):
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
 | 
			
		||||
            lbound, rbound = s_attr.cpos_by_id(id)
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
 | 
			
		||||
            if s_attr.name not in ['text', 's']:
 | 
			
		||||
                continue
 | 
			
		||||
            cpos_range = range(lbound, rbound + 1)
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
 | 
			
		||||
            if s_attr.name != 'text':
 | 
			
		||||
                continue
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
 | 
			
		||||
            for p_attr in cqi_p_attrs.values():
 | 
			
		||||
                payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
 | 
			
		||||
        sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
 | 
			
		||||
        s_attr_value_names = [
 | 
			
		||||
            sub_s_attr.name[(len(s_attr.name) + 1):]
 | 
			
		||||
            for sub_s_attr in sub_s_attrs
 | 
			
		||||
        ]
 | 
			
		||||
        sub_s_attr_values = [
 | 
			
		||||
            sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
 | 
			
		||||
            for sub_s_attr in sub_s_attrs
 | 
			
		||||
        ]
 | 
			
		||||
        payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
 | 
			
		||||
        payload['values']['s_attrs'][s_attr.name] = {
 | 
			
		||||
            s_attr_id: {
 | 
			
		||||
                s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
 | 
			
		||||
                for s_attr_value_name_idx, s_attr_value_name in enumerate(
 | 
			
		||||
                    payload['s_attrs'][s_attr.name]['values']
 | 
			
		||||
                )
 | 
			
		||||
            } for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
 | 
			
		||||
        }
 | 
			
		||||
    with open(visualization_data_file_path, 'w') as f:
 | 
			
		||||
        json.dump(payload, f)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    # Sanity checks
 | 
			
		||||
    if (
 | 
			
		||||
        per_page < 1
 | 
			
		||||
        or page < 1
 | 
			
		||||
        or (
 | 
			
		||||
            cqi_corpus.size > 0
 | 
			
		||||
            and page > math.ceil(cqi_corpus.size / per_page)
 | 
			
		||||
        )
 | 
			
		||||
    ):
 | 
			
		||||
        return {'code': 416, 'msg': 'Range Not Satisfiable'}
 | 
			
		||||
    first_cpos = (page - 1) * per_page
 | 
			
		||||
    last_cpos = min(cqi_corpus.size, first_cpos + per_page)
 | 
			
		||||
    cpos_list = [*range(first_cpos, last_cpos)]
 | 
			
		||||
    lookups = lookups_by_cpos(cqi_corpus, cpos_list)
 | 
			
		||||
    payload = {}
 | 
			
		||||
    # the items for the current page
 | 
			
		||||
    payload['items'] = [cpos_list]
 | 
			
		||||
    # the lookups for the items
 | 
			
		||||
    payload['lookups'] = lookups
 | 
			
		||||
    # the total number of items matching the query
 | 
			
		||||
    payload['total'] = cqi_corpus.size
 | 
			
		||||
    # the number of items to be displayed on a page.
 | 
			
		||||
    payload['per_page'] = per_page
 | 
			
		||||
    # The total number of pages
 | 
			
		||||
    payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
 | 
			
		||||
    # the current page number (1 indexed)
 | 
			
		||||
    payload['page'] = page if payload['pages'] > 0 else None
 | 
			
		||||
    # True if a previous page exists
 | 
			
		||||
    payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
 | 
			
		||||
    # True if a next page exists.
 | 
			
		||||
    payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False  # noqa
 | 
			
		||||
    # Number of the previous page.
 | 
			
		||||
    payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
 | 
			
		||||
    # Number of the next page
 | 
			
		||||
    payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,24 +0,0 @@
 | 
			
		||||
import cqi
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name)  # noqa
 | 
			
		||||
    payload = {**cqi_alignment_attribute.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,24 +0,0 @@
 | 
			
		||||
import cqi
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name)  # noqa
 | 
			
		||||
    payload = {**cqi_positional_attribute.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,24 +0,0 @@
 | 
			
		||||
import cqi
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name)  # noqa
 | 
			
		||||
    payload = {**cqi_structural_attribute.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,140 +0,0 @@
 | 
			
		||||
import cqi
 | 
			
		||||
import math
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    payload = {**cqi_subcorpus.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_status = cqi_subcorpus.drop()
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, field: int, first: int, last: int):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    payload = cqi_subcorpus.dump(field, first, last)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_name: str, positional_attribute_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    field = cqi_subcorpus.fields[field_name]
 | 
			
		||||
    pos_attr = cqi_corpus.positional_attributes.get(positional_attribute_name)
 | 
			
		||||
    payload = cqi_subcorpus.fdist_1(cutoff, field, pos_attr)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_1_name: str, positional_attribute_1_name: str, field_2_name: str, positional_attribute_2_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    field_1 = cqi_subcorpus.fields[field_1_name]
 | 
			
		||||
    pos_attr_1 = cqi_corpus.positional_attributes.get(positional_attribute_1_name)
 | 
			
		||||
    field_2 = cqi_subcorpus.fields[field_2_name]
 | 
			
		||||
    pos_attr_2 = cqi_corpus.positional_attributes.get(positional_attribute_2_name)
 | 
			
		||||
    payload = cqi_subcorpus.fdist_2(cutoff, field_1, pos_attr_1, field_2, pos_attr_2)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
# nopaque specific CQi extensions                                             #
 | 
			
		||||
###############################################################################
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    # Sanity checks
 | 
			
		||||
    if (
 | 
			
		||||
        per_page < 1
 | 
			
		||||
        or page < 1
 | 
			
		||||
        or (
 | 
			
		||||
            cqi_subcorpus.attrs['size'] > 0
 | 
			
		||||
            and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
 | 
			
		||||
        )
 | 
			
		||||
    ):
 | 
			
		||||
        return {'code': 416, 'msg': 'Range Not Satisfiable'}
 | 
			
		||||
    offset = (page - 1) * per_page
 | 
			
		||||
    cutoff = per_page
 | 
			
		||||
    cqi_results_export = export_subcorpus(
 | 
			
		||||
        cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
 | 
			
		||||
    payload = {}
 | 
			
		||||
    # the items for the current page
 | 
			
		||||
    payload['items'] = cqi_results_export.pop('matches')
 | 
			
		||||
    # the lookups for the items
 | 
			
		||||
    payload['lookups'] = cqi_results_export
 | 
			
		||||
    # the total number of items matching the query
 | 
			
		||||
    payload['total'] = cqi_subcorpus.attrs['size']
 | 
			
		||||
    # the number of items to be displayed on a page.
 | 
			
		||||
    payload['per_page'] = per_page
 | 
			
		||||
    # The total number of pages
 | 
			
		||||
    payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
 | 
			
		||||
    # the current page number (1 indexed)
 | 
			
		||||
    payload['page'] = page if payload['pages'] > 0 else None
 | 
			
		||||
    # True if a previous page exists
 | 
			
		||||
    payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
 | 
			
		||||
    # True if a next page exists.
 | 
			
		||||
    payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False  # noqa
 | 
			
		||||
    # Number of the previous page.
 | 
			
		||||
    payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
 | 
			
		||||
    # Number of the next page
 | 
			
		||||
    payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}
 | 
			
		||||
@@ -1,178 +0,0 @@
 | 
			
		||||
from flask import session
 | 
			
		||||
from functools import wraps
 | 
			
		||||
from inspect import signature
 | 
			
		||||
import cqi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cqi_over_socketio(f):
 | 
			
		||||
    @wraps(f)
 | 
			
		||||
    def wrapped(*args):
 | 
			
		||||
        if 'd' not in session:
 | 
			
		||||
            return {'code': 424, 'msg': 'Failed Dependency'}
 | 
			
		||||
        f_args = {}
 | 
			
		||||
        # Check for missing args and if all provided args are of the right type
 | 
			
		||||
        for param in signature(f).parameters.values():
 | 
			
		||||
            if param.name == 'corpus_name':
 | 
			
		||||
                f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}'
 | 
			
		||||
                continue
 | 
			
		||||
            if param.name == 'cqi_client':
 | 
			
		||||
                f_args[param.name] = session['d']['cqi_client']
 | 
			
		||||
                continue
 | 
			
		||||
            if param.default is param.empty:
 | 
			
		||||
                # args
 | 
			
		||||
                if param.name not in args[0]:
 | 
			
		||||
                    return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
                arg = args[0][param.name]
 | 
			
		||||
                if type(arg) is not param.annotation:
 | 
			
		||||
                    return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
                f_args[param.name] = arg
 | 
			
		||||
            else:
 | 
			
		||||
                # kwargs
 | 
			
		||||
                if param.name not in args[0]:
 | 
			
		||||
                    continue
 | 
			
		||||
                arg = args[0][param.name]
 | 
			
		||||
                if type(arg) is not param.annotation:
 | 
			
		||||
                    return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
                f_args[param.name] = arg
 | 
			
		||||
        session['d']['cqi_client_lock'].acquire()
 | 
			
		||||
        try:
 | 
			
		||||
            return_value = f(**f_args)
 | 
			
		||||
        except BrokenPipeError:
 | 
			
		||||
            return_value = {
 | 
			
		||||
                'code': 500,
 | 
			
		||||
                'msg': 'Internal Server Error'
 | 
			
		||||
            }
 | 
			
		||||
        except cqi.errors.CQiException as e:
 | 
			
		||||
            return_value = {
 | 
			
		||||
                'code': 500,
 | 
			
		||||
                'msg': 'Internal Server Error',
 | 
			
		||||
                'payload': {
 | 
			
		||||
                    'code': e.code,
 | 
			
		||||
                    'desc': e.description,
 | 
			
		||||
                    'msg': e.__class__.__name__
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        finally:
 | 
			
		||||
            session['d']['cqi_client_lock'].release()
 | 
			
		||||
        return return_value
 | 
			
		||||
    return wrapped
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def lookups_by_cpos(corpus, cpos_list):
 | 
			
		||||
    lookups = {}
 | 
			
		||||
    lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
 | 
			
		||||
    for attr in corpus.positional_attributes.list():
 | 
			
		||||
        cpos_attr_values = attr.values_by_cpos(cpos_list)
 | 
			
		||||
        for i, cpos in enumerate(cpos_list):
 | 
			
		||||
            lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
 | 
			
		||||
                cpos_attr_values[i]
 | 
			
		||||
    for attr in corpus.structural_attributes.list():
 | 
			
		||||
        # We only want to iterate over non subattributes, identifiable by
 | 
			
		||||
        # attr.attrs['has_values'] == False
 | 
			
		||||
        if attr.attrs['has_values']:
 | 
			
		||||
            continue
 | 
			
		||||
        cpos_attr_ids = attr.ids_by_cpos(cpos_list)
 | 
			
		||||
        for i, cpos in enumerate(cpos_list):
 | 
			
		||||
            if cpos_attr_ids[i] == -1:
 | 
			
		||||
                continue
 | 
			
		||||
            lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
 | 
			
		||||
        occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
 | 
			
		||||
        if not occured_attr_ids:
 | 
			
		||||
            continue
 | 
			
		||||
        subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
 | 
			
		||||
        if not subattrs:
 | 
			
		||||
            continue
 | 
			
		||||
        lookup_name = f'{attr.attrs["name"]}_lookup'
 | 
			
		||||
        lookups[lookup_name] = {}
 | 
			
		||||
        for attr_id in occured_attr_ids:
 | 
			
		||||
            lookups[lookup_name][attr_id] = {}
 | 
			
		||||
        for subattr in subattrs:
 | 
			
		||||
            subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):]  # noqa
 | 
			
		||||
            for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)):  # noqa
 | 
			
		||||
                lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value  # noqa
 | 
			
		||||
    return lookups
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def partial_export_subcorpus(subcorpus, match_id_list, context=25):
 | 
			
		||||
    if subcorpus.attrs['size'] == 0:
 | 
			
		||||
        return {"matches": []}
 | 
			
		||||
    match_boundaries = []
 | 
			
		||||
    for match_id in match_id_list:
 | 
			
		||||
        if match_id < 0 or match_id >= subcorpus.attrs['size']:
 | 
			
		||||
            continue
 | 
			
		||||
        match_boundaries.append(
 | 
			
		||||
            (
 | 
			
		||||
                match_id,
 | 
			
		||||
                subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0],
 | 
			
		||||
                subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0]
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
    cpos_set = set()
 | 
			
		||||
    matches = []
 | 
			
		||||
    for match_boundary in match_boundaries:
 | 
			
		||||
        match_num, match_start, match_end = match_boundary
 | 
			
		||||
        c = (match_start, match_end)
 | 
			
		||||
        if match_start == 0 or context == 0:
 | 
			
		||||
            lc = None
 | 
			
		||||
            cpos_list_lbound = match_start
 | 
			
		||||
        else:
 | 
			
		||||
            lc_lbound = max(0, (match_start - context))
 | 
			
		||||
            lc_rbound = match_start - 1
 | 
			
		||||
            lc = (lc_lbound, lc_rbound)
 | 
			
		||||
            cpos_list_lbound = lc_lbound
 | 
			
		||||
        if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
 | 
			
		||||
            rc = None
 | 
			
		||||
            cpos_list_rbound = match_end
 | 
			
		||||
        else:
 | 
			
		||||
            rc_lbound = match_end + 1
 | 
			
		||||
            rc_rbound = min(
 | 
			
		||||
                (match_end + context),
 | 
			
		||||
                (subcorpus.collection.corpus.attrs['size'] - 1)
 | 
			
		||||
            )
 | 
			
		||||
            rc = (rc_lbound, rc_rbound)
 | 
			
		||||
            cpos_list_rbound = rc_rbound
 | 
			
		||||
        match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
 | 
			
		||||
        matches.append(match)
 | 
			
		||||
        cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
 | 
			
		||||
    lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
 | 
			
		||||
    return {'matches': matches, **lookups}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
 | 
			
		||||
    if subcorpus.attrs['size'] == 0:
 | 
			
		||||
        return {"matches": []}
 | 
			
		||||
    first_match = max(0, offset)
 | 
			
		||||
    last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
 | 
			
		||||
    match_boundaries = zip(
 | 
			
		||||
        list(range(first_match, last_match + 1)),
 | 
			
		||||
        subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match),
 | 
			
		||||
        subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match)
 | 
			
		||||
    )
 | 
			
		||||
    cpos_set = set()
 | 
			
		||||
    matches = []
 | 
			
		||||
    for match_num, match_start, match_end in match_boundaries:
 | 
			
		||||
        c = (match_start, match_end)
 | 
			
		||||
        if match_start == 0 or context == 0:
 | 
			
		||||
            lc = None
 | 
			
		||||
            cpos_list_lbound = match_start
 | 
			
		||||
        else:
 | 
			
		||||
            lc_lbound = max(0, (match_start - context))
 | 
			
		||||
            lc_rbound = match_start - 1
 | 
			
		||||
            lc = (lc_lbound, lc_rbound)
 | 
			
		||||
            cpos_list_lbound = lc_lbound
 | 
			
		||||
        if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
 | 
			
		||||
            rc = None
 | 
			
		||||
            cpos_list_rbound = match_end
 | 
			
		||||
        else:
 | 
			
		||||
            rc_lbound = match_end + 1
 | 
			
		||||
            rc_rbound = min(
 | 
			
		||||
                (match_end + context),
 | 
			
		||||
                (subcorpus.collection.corpus.attrs['size'] - 1)
 | 
			
		||||
            )
 | 
			
		||||
            rc = (rc_lbound, rc_rbound)
 | 
			
		||||
            cpos_list_rbound = rc_rbound
 | 
			
		||||
        match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
 | 
			
		||||
        matches.append(match)
 | 
			
		||||
        cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
 | 
			
		||||
    lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
 | 
			
		||||
    return {'matches': matches, **lookups}
 | 
			
		||||
@@ -237,7 +237,7 @@ class CorpusAnalysisConcordance {
 | 
			
		||||
            app.flash('No matches selected', 'error');
 | 
			
		||||
            return;
 | 
			
		||||
          }
 | 
			
		||||
          promise = subcorpus.o.partial_export([...subcorpus.selectedItems], 50);
 | 
			
		||||
          promise = subcorpus.o.partialExport([...subcorpus.selectedItems], 50);
 | 
			
		||||
        } else {
 | 
			
		||||
          promise = subcorpus.o.export(50);
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user