mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-03 20:02:47 +00:00 
			
		
		
		
	Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update
This commit is contained in:
		@@ -16,4 +16,4 @@ def before_request():
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from . import cli, cqi_over_socketio, files, followers, routes, json_routes
 | 
			
		||||
from . import cli, cqi_over_sio, files, followers, routes, json_routes
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,9 @@
 | 
			
		||||
from cqi import CQiClient
 | 
			
		||||
from cqi.errors import CQiException
 | 
			
		||||
from flask import session
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from flask_socketio import ConnectionRefusedError
 | 
			
		||||
from threading import Lock
 | 
			
		||||
import cqi
 | 
			
		||||
from app import db, hashids, socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from app.models import Corpus, CorpusStatus
 | 
			
		||||
@@ -39,16 +40,9 @@ Basic concept:
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
NAMESPACE = '/corpora/corpus/corpus_analysis'
 | 
			
		||||
NAMESPACE = '/cqi_over_sio'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Import all CQi over Socket.IO event handlers
 | 
			
		||||
from .cqi_corpora_corpus_subcorpora import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus_structural_attributes import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus_positional_attributes import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus_alignment_attributes import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus import *  # noqa
 | 
			
		||||
from .cqi_corpora import *  # noqa
 | 
			
		||||
from .cqi import *  # noqa
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -89,8 +83,8 @@ def connect(auth):
 | 
			
		||||
        socketio.sleep(3)
 | 
			
		||||
        retry_counter -= 1
 | 
			
		||||
        db.session.refresh(corpus)
 | 
			
		||||
    cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}')
 | 
			
		||||
    session['d'] = {
 | 
			
		||||
    cqi_client = CQiClient(f'cqpserver_{corpus_id}')
 | 
			
		||||
    session['cqi_over_sio'] = {
 | 
			
		||||
        'corpus_id': corpus_id,
 | 
			
		||||
        'cqi_client': cqi_client,
 | 
			
		||||
        'cqi_client_lock': Lock(),
 | 
			
		||||
@@ -100,16 +94,19 @@ def connect(auth):
 | 
			
		||||
 | 
			
		||||
@socketio.on('disconnect', namespace=NAMESPACE)
 | 
			
		||||
def disconnect():
 | 
			
		||||
    if 'd' not in session:
 | 
			
		||||
        return
 | 
			
		||||
    session['d']['cqi_client_lock'].acquire()
 | 
			
		||||
    try:
 | 
			
		||||
        session['d']['cqi_client'].disconnect()
 | 
			
		||||
    except (BrokenPipeError, cqi.errors.CQiException):
 | 
			
		||||
        cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
 | 
			
		||||
        cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
 | 
			
		||||
    except KeyError:
 | 
			
		||||
        return
 | 
			
		||||
    cqi_client_lock.acquire()
 | 
			
		||||
    try:
 | 
			
		||||
        cqi_client.api.ctrl_bye()
 | 
			
		||||
    except (BrokenPipeError, CQiException):
 | 
			
		||||
        pass
 | 
			
		||||
    session['d']['cqi_client_lock'].release()
 | 
			
		||||
    corpus = Corpus.query.get(session['d']['corpus_id'])
 | 
			
		||||
    cqi_client_lock.release()
 | 
			
		||||
    corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
 | 
			
		||||
    corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
    session.pop('d')
 | 
			
		||||
    session.pop('cqi_over_sio')
 | 
			
		||||
    # return {'code': 200, 'msg': 'OK'}
 | 
			
		||||
							
								
								
									
										120
									
								
								app/corpora/cqi_over_sio/cqi.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								app/corpora/cqi_over_sio/cqi.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,120 @@
 | 
			
		||||
from cqi import CQiClient
 | 
			
		||||
from cqi.errors import CQiException
 | 
			
		||||
from cqi.status import CQiStatus
 | 
			
		||||
from flask import session
 | 
			
		||||
from inspect import signature
 | 
			
		||||
from threading import Lock
 | 
			
		||||
from typing import Callable, Dict, List
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .extensions import CQI_EXTENSION_FUNCTION_NAMES
 | 
			
		||||
from . import extensions as extensions_module
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
CQI_FUNCTION_NAMES: List[str] = [
 | 
			
		||||
    'ask_feature_cl_2_3',
 | 
			
		||||
    'ask_feature_cqi_1_0',
 | 
			
		||||
    'ask_feature_cqp_2_3',
 | 
			
		||||
    'cl_alg2cpos',
 | 
			
		||||
    'cl_attribute_size',
 | 
			
		||||
    'cl_cpos2alg',
 | 
			
		||||
    'cl_cpos2id',
 | 
			
		||||
    'cl_cpos2lbound',
 | 
			
		||||
    'cl_cpos2rbound',
 | 
			
		||||
    'cl_cpos2str',
 | 
			
		||||
    'cl_cpos2struc',
 | 
			
		||||
    'cl_drop_attribute',
 | 
			
		||||
    'cl_id2cpos',
 | 
			
		||||
    'cl_id2freq',
 | 
			
		||||
    'cl_id2str',
 | 
			
		||||
    'cl_idlist2cpos',
 | 
			
		||||
    'cl_lexicon_size',
 | 
			
		||||
    'cl_regex2id',
 | 
			
		||||
    'cl_str2id',
 | 
			
		||||
    'cl_struc2cpos',
 | 
			
		||||
    'cl_struc2str',
 | 
			
		||||
    'corpus_alignment_attributes',
 | 
			
		||||
    'corpus_charset',
 | 
			
		||||
    'corpus_drop_corpus',
 | 
			
		||||
    'corpus_full_name',
 | 
			
		||||
    'corpus_info',
 | 
			
		||||
    'corpus_list_corpora',
 | 
			
		||||
    'corpus_positional_attributes',
 | 
			
		||||
    'corpus_properties',
 | 
			
		||||
    'corpus_structural_attribute_has_values',
 | 
			
		||||
    'corpus_structural_attributes',
 | 
			
		||||
    'cqp_drop_subcorpus',
 | 
			
		||||
    'cqp_dump_subcorpus',
 | 
			
		||||
    'cqp_fdist_1',
 | 
			
		||||
    'cqp_fdist_2',
 | 
			
		||||
    'cqp_list_subcorpora',
 | 
			
		||||
    'cqp_query',
 | 
			
		||||
    'cqp_subcorpus_has_field',
 | 
			
		||||
    'cqp_subcorpus_size',
 | 
			
		||||
    'ctrl_bye',
 | 
			
		||||
    'ctrl_connect',
 | 
			
		||||
    'ctrl_last_general_error',
 | 
			
		||||
    'ctrl_ping',
 | 
			
		||||
    'ctrl_user_abort'
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
def cqi_over_sio(fn_data):
 | 
			
		||||
    try:
 | 
			
		||||
        fn_name: str = fn_data['fn_name']
 | 
			
		||||
    except KeyError:
 | 
			
		||||
        return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
    fn_name: str = fn_data['fn_name']
 | 
			
		||||
    fn_args: Dict = fn_data.get('fn_args', {})
 | 
			
		||||
    try:
 | 
			
		||||
        cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
 | 
			
		||||
        cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
 | 
			
		||||
    except KeyError:
 | 
			
		||||
        return {'code': 424, 'msg': 'Failed Dependency'}
 | 
			
		||||
    if fn_name in CQI_FUNCTION_NAMES:
 | 
			
		||||
        fn: Callable = getattr(cqi_client.api, fn_name)
 | 
			
		||||
    elif fn_name in CQI_EXTENSION_FUNCTION_NAMES:
 | 
			
		||||
        fn_args['cqi_client'] = cqi_client
 | 
			
		||||
        fn: Callable = getattr(extensions_module, fn_name)
 | 
			
		||||
    else:
 | 
			
		||||
        return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
    for param in signature(fn).parameters.values():
 | 
			
		||||
        if param.default is param.empty:
 | 
			
		||||
            if param.name not in fn_args:
 | 
			
		||||
                return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
        else:
 | 
			
		||||
            if param.name not in fn_args:
 | 
			
		||||
                continue
 | 
			
		||||
        if type(fn_args[param.name]) is not param.annotation:
 | 
			
		||||
            return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
    cqi_client_lock.acquire()
 | 
			
		||||
    try:
 | 
			
		||||
        return_value = fn(**fn_args)
 | 
			
		||||
    except BrokenPipeError:
 | 
			
		||||
        return_value = {
 | 
			
		||||
            'code': 500,
 | 
			
		||||
            'msg': 'Internal Server Error'
 | 
			
		||||
        }
 | 
			
		||||
    except CQiException as e:
 | 
			
		||||
        return_value = {
 | 
			
		||||
            'code': 502,
 | 
			
		||||
            'msg': 'Bad Gateway',
 | 
			
		||||
            'payload': {
 | 
			
		||||
                'code': e.code,
 | 
			
		||||
                'desc': e.description,
 | 
			
		||||
                'msg': e.__class__.__name__
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    finally:
 | 
			
		||||
        cqi_client_lock.release()
 | 
			
		||||
    if isinstance(return_value, CQiStatus):
 | 
			
		||||
        payload = {
 | 
			
		||||
            'code': return_value.code,
 | 
			
		||||
            'msg': return_value.__class__.__name__
 | 
			
		||||
        }
 | 
			
		||||
    else:
 | 
			
		||||
        payload = return_value
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
							
								
								
									
										254
									
								
								app/corpora/cqi_over_sio/extensions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										254
									
								
								app/corpora/cqi_over_sio/extensions.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,254 @@
 | 
			
		||||
from collections import Counter
 | 
			
		||||
from cqi import CQiClient
 | 
			
		||||
from cqi.models.corpora import Corpus
 | 
			
		||||
from cqi.status import StatusOk
 | 
			
		||||
from flask import session
 | 
			
		||||
from typing import Dict, List
 | 
			
		||||
import json
 | 
			
		||||
import math
 | 
			
		||||
import os
 | 
			
		||||
from app import db
 | 
			
		||||
from app.models import Corpus
 | 
			
		||||
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
 | 
			
		||||
    'ext_corpus_update_db',
 | 
			
		||||
    'ext_corpus_static_data',
 | 
			
		||||
    'ext_corpus_paginate_corpus',
 | 
			
		||||
    'ext_cqp_paginate_subcorpus',
 | 
			
		||||
    'ext_cqp_partial_export_subcorpus',
 | 
			
		||||
    'ext_cqp_export_subcorpus',
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def ext_corpus_update_db(cqi_client: CQiClient, corpus: str):
 | 
			
		||||
    db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus)
 | 
			
		||||
    db_corpus.num_tokens = cqi_corpus.size
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
    return StatusOk()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def ext_corpus_static_data(cqi_client: CQiClient, corpus: str) -> Dict:
 | 
			
		||||
    db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
 | 
			
		||||
    static_corpus_data_file = os.path.join(db_corpus.path, 'cwb', 'static.json')
 | 
			
		||||
    if os.path.exists(static_corpus_data_file):
 | 
			
		||||
        with open(static_corpus_data_file, 'r') as f:
 | 
			
		||||
            return json.load(f)
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus)
 | 
			
		||||
    ##########################################################################
 | 
			
		||||
    # A faster way to get cpos boundaries for smaller s_attrs                #
 | 
			
		||||
    ##########################################################################
 | 
			
		||||
    # cqi_corpus.query('Last', '<s> []* </s>;')
 | 
			
		||||
    # cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
 | 
			
		||||
    # print(cqi_subcorpus.size)
 | 
			
		||||
    # first_match = 0
 | 
			
		||||
    # last_match = cqi_subcorpus.attrs['size'] - 1
 | 
			
		||||
    # match_boundaries = zip(
 | 
			
		||||
    #     list(range(first_match, last_match + 1)),
 | 
			
		||||
    #     cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
 | 
			
		||||
    #     cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
 | 
			
		||||
    # )
 | 
			
		||||
    # for x in match_boundaries:
 | 
			
		||||
    #     print(x)
 | 
			
		||||
    cqi_p_attrs = {
 | 
			
		||||
        p_attr.name: p_attr
 | 
			
		||||
        for p_attr in cqi_corpus.positional_attributes.list()
 | 
			
		||||
    }
 | 
			
		||||
    cqi_s_attrs = {
 | 
			
		||||
        s_attr.name: s_attr
 | 
			
		||||
        for s_attr in cqi_corpus.structural_attributes.list()
 | 
			
		||||
    }
 | 
			
		||||
    static_corpus_data = {
 | 
			
		||||
        'corpus': {
 | 
			
		||||
            'bounds': [0, cqi_corpus.size - 1],
 | 
			
		||||
            'counts': {
 | 
			
		||||
                'token': cqi_corpus.size
 | 
			
		||||
            },
 | 
			
		||||
            'freqs': {}
 | 
			
		||||
        },
 | 
			
		||||
        'p_attrs': {},
 | 
			
		||||
        's_attrs': {},
 | 
			
		||||
        'values': {'p_attrs': {}, 's_attrs': {}}
 | 
			
		||||
    }
 | 
			
		||||
    for p_attr in cqi_p_attrs.values():
 | 
			
		||||
        static_corpus_data['corpus']['freqs'][p_attr.name] = dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, p_attr.lexicon_size),
 | 
			
		||||
                p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
        static_corpus_data['p_attrs'][p_attr.name] = dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, cqi_corpus.size),
 | 
			
		||||
                p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
        static_corpus_data['values']['p_attrs'][p_attr.name] = dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, p_attr.lexicon_size),
 | 
			
		||||
                p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
    for s_attr in cqi_s_attrs.values():
 | 
			
		||||
        if s_attr.has_values:
 | 
			
		||||
            continue
 | 
			
		||||
        static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size
 | 
			
		||||
        static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
 | 
			
		||||
        static_corpus_data['values']['s_attrs'][s_attr.name] = {}
 | 
			
		||||
        for id in range(0, s_attr.size):
 | 
			
		||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
 | 
			
		||||
            lbound, rbound = s_attr.cpos_by_id(id)
 | 
			
		||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
 | 
			
		||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
 | 
			
		||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
 | 
			
		||||
            if s_attr.name not in ['text', 's']:
 | 
			
		||||
                continue
 | 
			
		||||
            cpos_range = range(lbound, rbound + 1)
 | 
			
		||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
 | 
			
		||||
            if s_attr.name != 'text':
 | 
			
		||||
                continue
 | 
			
		||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
 | 
			
		||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
 | 
			
		||||
            for p_attr in cqi_p_attrs.values():
 | 
			
		||||
                static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
 | 
			
		||||
        sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
 | 
			
		||||
        s_attr_value_names = [
 | 
			
		||||
            sub_s_attr.name[(len(s_attr.name) + 1):]
 | 
			
		||||
            for sub_s_attr in sub_s_attrs
 | 
			
		||||
        ]
 | 
			
		||||
        sub_s_attr_values = [
 | 
			
		||||
            sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
 | 
			
		||||
            for sub_s_attr in sub_s_attrs
 | 
			
		||||
        ]
 | 
			
		||||
        static_corpus_data['s_attrs'][s_attr.name]['values'] = s_attr_value_names
 | 
			
		||||
        static_corpus_data['values']['s_attrs'][s_attr.name] = {
 | 
			
		||||
            s_attr_id: {
 | 
			
		||||
                s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
 | 
			
		||||
                for s_attr_value_name_idx, s_attr_value_name in enumerate(
 | 
			
		||||
                    static_corpus_data['s_attrs'][s_attr.name]['values']
 | 
			
		||||
                )
 | 
			
		||||
            } for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
 | 
			
		||||
        }
 | 
			
		||||
    with open(static_corpus_data_file, 'w') as f:
 | 
			
		||||
        json.dump(static_corpus_data, f)
 | 
			
		||||
    return static_corpus_data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def ext_corpus_paginate_corpus(
 | 
			
		||||
    cqi_client: CQiClient,
 | 
			
		||||
    corpus: str,
 | 
			
		||||
    page: int = 1,
 | 
			
		||||
    per_page: int = 20
 | 
			
		||||
) -> Dict:
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus)
 | 
			
		||||
    # Sanity checks
 | 
			
		||||
    if (
 | 
			
		||||
        per_page < 1
 | 
			
		||||
        or page < 1
 | 
			
		||||
        or (
 | 
			
		||||
            cqi_corpus.size > 0
 | 
			
		||||
            and page > math.ceil(cqi_corpus.size / per_page)
 | 
			
		||||
        )
 | 
			
		||||
    ):
 | 
			
		||||
        return {'code': 416, 'msg': 'Range Not Satisfiable'}
 | 
			
		||||
    first_cpos = (page - 1) * per_page
 | 
			
		||||
    last_cpos = min(cqi_corpus.size, first_cpos + per_page)
 | 
			
		||||
    cpos_list = [*range(first_cpos, last_cpos)]
 | 
			
		||||
    lookups = lookups_by_cpos(cqi_corpus, cpos_list)
 | 
			
		||||
    payload = {}
 | 
			
		||||
    # the items for the current page
 | 
			
		||||
    payload['items'] = [cpos_list]
 | 
			
		||||
    # the lookups for the items
 | 
			
		||||
    payload['lookups'] = lookups
 | 
			
		||||
    # the total number of items matching the query
 | 
			
		||||
    payload['total'] = cqi_corpus.size
 | 
			
		||||
    # the number of items to be displayed on a page.
 | 
			
		||||
    payload['per_page'] = per_page
 | 
			
		||||
    # The total number of pages
 | 
			
		||||
    payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
 | 
			
		||||
    # the current page number (1 indexed)
 | 
			
		||||
    payload['page'] = page if payload['pages'] > 0 else None
 | 
			
		||||
    # True if a previous page exists
 | 
			
		||||
    payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
 | 
			
		||||
    # True if a next page exists.
 | 
			
		||||
    payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False  # noqa
 | 
			
		||||
    # Number of the previous page.
 | 
			
		||||
    payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
 | 
			
		||||
    # Number of the next page
 | 
			
		||||
    payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
 | 
			
		||||
    return payload
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def ext_cqp_paginate_subcorpus(
 | 
			
		||||
    cqi_client: CQiClient,
 | 
			
		||||
    subcorpus: str,
 | 
			
		||||
    context: int = 50,
 | 
			
		||||
    page: int = 1,
 | 
			
		||||
    per_page: int = 20
 | 
			
		||||
) -> Dict:
 | 
			
		||||
    corpus_name, subcorpus_name = subcorpus.split(':', 1)
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    # Sanity checks
 | 
			
		||||
    if (
 | 
			
		||||
        per_page < 1
 | 
			
		||||
        or page < 1
 | 
			
		||||
        or (
 | 
			
		||||
            cqi_subcorpus.size > 0
 | 
			
		||||
            and page > math.ceil(cqi_subcorpus.size / per_page)
 | 
			
		||||
        )
 | 
			
		||||
    ):
 | 
			
		||||
        return {'code': 416, 'msg': 'Range Not Satisfiable'}
 | 
			
		||||
    offset = (page - 1) * per_page
 | 
			
		||||
    cutoff = per_page
 | 
			
		||||
    cqi_results_export = export_subcorpus(
 | 
			
		||||
        cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
 | 
			
		||||
    payload = {}
 | 
			
		||||
    # the items for the current page
 | 
			
		||||
    payload['items'] = cqi_results_export.pop('matches')
 | 
			
		||||
    # the lookups for the items
 | 
			
		||||
    payload['lookups'] = cqi_results_export
 | 
			
		||||
    # the total number of items matching the query
 | 
			
		||||
    payload['total'] = cqi_subcorpus.size
 | 
			
		||||
    # the number of items to be displayed on a page.
 | 
			
		||||
    payload['per_page'] = per_page
 | 
			
		||||
    # The total number of pages
 | 
			
		||||
    payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
 | 
			
		||||
    # the current page number (1 indexed)
 | 
			
		||||
    payload['page'] = page if payload['pages'] > 0 else None
 | 
			
		||||
    # True if a previous page exists
 | 
			
		||||
    payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
 | 
			
		||||
    # True if a next page exists.
 | 
			
		||||
    payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False  # noqa
 | 
			
		||||
    # Number of the previous page.
 | 
			
		||||
    payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
 | 
			
		||||
    # Number of the next page
 | 
			
		||||
    payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
 | 
			
		||||
    return payload
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def ext_cqp_partial_export_subcorpus(
 | 
			
		||||
    cqi_client: CQiClient,
 | 
			
		||||
    subcorpus: str,
 | 
			
		||||
    match_id_list: list,
 | 
			
		||||
    context: int = 50
 | 
			
		||||
) -> Dict:
 | 
			
		||||
    corpus_name, subcorpus_name = subcorpus.split(':', 1)
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
 | 
			
		||||
    return cqi_subcorpus_partial_export
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def ext_cqp_export_subcorpus(
 | 
			
		||||
    cqi_client: CQiClient,
 | 
			
		||||
    subcorpus: str,
 | 
			
		||||
    context: int = 50
 | 
			
		||||
) -> Dict:
 | 
			
		||||
    corpus_name, subcorpus_name = subcorpus.split(':', 1)
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
 | 
			
		||||
    return cqi_subcorpus_export
 | 
			
		||||
@@ -1,64 +1,10 @@
 | 
			
		||||
from flask import session
 | 
			
		||||
from functools import wraps
 | 
			
		||||
from inspect import signature
 | 
			
		||||
import cqi
 | 
			
		||||
from cqi.models.corpora import Corpus
 | 
			
		||||
from cqi.models.subcorpora import Subcorpus
 | 
			
		||||
from typing import Dict, List
 | 
			
		||||
from app.models import Corpus
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cqi_over_socketio(f):
 | 
			
		||||
    @wraps(f)
 | 
			
		||||
    def wrapped(*args):
 | 
			
		||||
        if 'd' not in session:
 | 
			
		||||
            return {'code': 424, 'msg': 'Failed Dependency'}
 | 
			
		||||
        f_args = {}
 | 
			
		||||
        # Check for missing args and if all provided args are of the right type
 | 
			
		||||
        for param in signature(f).parameters.values():
 | 
			
		||||
            if param.name == 'corpus_name':
 | 
			
		||||
                f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}'
 | 
			
		||||
                continue
 | 
			
		||||
            if param.name == 'cqi_client':
 | 
			
		||||
                f_args[param.name] = session['d']['cqi_client']
 | 
			
		||||
                continue
 | 
			
		||||
            if param.default is param.empty:
 | 
			
		||||
                # args
 | 
			
		||||
                if param.name not in args[0]:
 | 
			
		||||
                    return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
                arg = args[0][param.name]
 | 
			
		||||
                if type(arg) is not param.annotation:
 | 
			
		||||
                    return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
                f_args[param.name] = arg
 | 
			
		||||
            else:
 | 
			
		||||
                # kwargs
 | 
			
		||||
                if param.name not in args[0]:
 | 
			
		||||
                    continue
 | 
			
		||||
                arg = args[0][param.name]
 | 
			
		||||
                if type(arg) is not param.annotation:
 | 
			
		||||
                    return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
                f_args[param.name] = arg
 | 
			
		||||
        session['d']['cqi_client_lock'].acquire()
 | 
			
		||||
        try:
 | 
			
		||||
            return_value = f(**f_args)
 | 
			
		||||
        except BrokenPipeError:
 | 
			
		||||
            return_value = {
 | 
			
		||||
                'code': 500,
 | 
			
		||||
                'msg': 'Internal Server Error'
 | 
			
		||||
            }
 | 
			
		||||
        except cqi.errors.CQiException as e:
 | 
			
		||||
            return_value = {
 | 
			
		||||
                'code': 500,
 | 
			
		||||
                'msg': 'Internal Server Error',
 | 
			
		||||
                'payload': {
 | 
			
		||||
                    'code': e.code,
 | 
			
		||||
                    'desc': e.description,
 | 
			
		||||
                    'msg': e.name
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        finally:
 | 
			
		||||
            session['d']['cqi_client_lock'].release()
 | 
			
		||||
        return return_value
 | 
			
		||||
    return wrapped
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def lookups_by_cpos(corpus, cpos_list):
 | 
			
		||||
def lookups_by_cpos(corpus: Corpus, cpos_list: List[int]) -> Dict:
 | 
			
		||||
    lookups = {}
 | 
			
		||||
    lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
 | 
			
		||||
    for attr in corpus.positional_attributes.list():
 | 
			
		||||
@@ -93,18 +39,22 @@ def lookups_by_cpos(corpus, cpos_list):
 | 
			
		||||
    return lookups
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def partial_export_subcorpus(subcorpus, match_id_list, context=25):
 | 
			
		||||
    if subcorpus.attrs['size'] == 0:
 | 
			
		||||
def partial_export_subcorpus(
 | 
			
		||||
    subcorpus: Subcorpus,
 | 
			
		||||
    match_id_list: List[int],
 | 
			
		||||
    context: int = 25
 | 
			
		||||
) -> Dict:
 | 
			
		||||
    if subcorpus.size == 0:
 | 
			
		||||
        return {"matches": []}
 | 
			
		||||
    match_boundaries = []
 | 
			
		||||
    for match_id in match_id_list:
 | 
			
		||||
        if match_id < 0 or match_id >= subcorpus.attrs['size']:
 | 
			
		||||
        if match_id < 0 or match_id >= subcorpus.size:
 | 
			
		||||
            continue
 | 
			
		||||
        match_boundaries.append(
 | 
			
		||||
            (
 | 
			
		||||
                match_id,
 | 
			
		||||
                subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0],
 | 
			
		||||
                subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0]
 | 
			
		||||
                subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
 | 
			
		||||
                subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
    cpos_set = set()
 | 
			
		||||
@@ -120,14 +70,14 @@ def partial_export_subcorpus(subcorpus, match_id_list, context=25):
 | 
			
		||||
            lc_rbound = match_start - 1
 | 
			
		||||
            lc = (lc_lbound, lc_rbound)
 | 
			
		||||
            cpos_list_lbound = lc_lbound
 | 
			
		||||
        if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
 | 
			
		||||
        if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
 | 
			
		||||
            rc = None
 | 
			
		||||
            cpos_list_rbound = match_end
 | 
			
		||||
        else:
 | 
			
		||||
            rc_lbound = match_end + 1
 | 
			
		||||
            rc_rbound = min(
 | 
			
		||||
                (match_end + context),
 | 
			
		||||
                (subcorpus.collection.corpus.attrs['size'] - 1)
 | 
			
		||||
                (subcorpus.collection.corpus.size - 1)
 | 
			
		||||
            )
 | 
			
		||||
            rc = (rc_lbound, rc_rbound)
 | 
			
		||||
            cpos_list_rbound = rc_rbound
 | 
			
		||||
@@ -138,15 +88,20 @@ def partial_export_subcorpus(subcorpus, match_id_list, context=25):
 | 
			
		||||
    return {'matches': matches, **lookups}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
 | 
			
		||||
    if subcorpus.attrs['size'] == 0:
 | 
			
		||||
def export_subcorpus(
 | 
			
		||||
    subcorpus: Subcorpus,
 | 
			
		||||
    context: int = 25,
 | 
			
		||||
    cutoff: float = float('inf'),
 | 
			
		||||
    offset: int = 0
 | 
			
		||||
) -> Dict:
 | 
			
		||||
    if subcorpus.size == 0:
 | 
			
		||||
        return {"matches": []}
 | 
			
		||||
    first_match = max(0, offset)
 | 
			
		||||
    last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
 | 
			
		||||
    last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
 | 
			
		||||
    match_boundaries = zip(
 | 
			
		||||
        list(range(first_match, last_match + 1)),
 | 
			
		||||
        subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match),
 | 
			
		||||
        subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match)
 | 
			
		||||
        range(first_match, last_match + 1),
 | 
			
		||||
        subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
 | 
			
		||||
        subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
 | 
			
		||||
    )
 | 
			
		||||
    cpos_set = set()
 | 
			
		||||
    matches = []
 | 
			
		||||
@@ -160,14 +115,14 @@ def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
 | 
			
		||||
            lc_rbound = match_start - 1
 | 
			
		||||
            lc = (lc_lbound, lc_rbound)
 | 
			
		||||
            cpos_list_lbound = lc_lbound
 | 
			
		||||
        if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
 | 
			
		||||
        if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
 | 
			
		||||
            rc = None
 | 
			
		||||
            cpos_list_rbound = match_end
 | 
			
		||||
        else:
 | 
			
		||||
            rc_lbound = match_end + 1
 | 
			
		||||
            rc_rbound = min(
 | 
			
		||||
                (match_end + context),
 | 
			
		||||
                (subcorpus.collection.corpus.attrs['size'] - 1)
 | 
			
		||||
                (subcorpus.collection.corpus.size - 1)
 | 
			
		||||
            )
 | 
			
		||||
            rc = (rc_lbound, rc_rbound)
 | 
			
		||||
            cpos_list_rbound = rc_rbound
 | 
			
		||||
@@ -1,43 +0,0 @@
 | 
			
		||||
from socket import gaierror
 | 
			
		||||
import cqi
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.connect', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_connect(cqi_client: cqi.CQiClient):
 | 
			
		||||
    try:
 | 
			
		||||
        cqi_status = cqi_client.connect()
 | 
			
		||||
    except gaierror as e:
 | 
			
		||||
        return {
 | 
			
		||||
            'code': 500,
 | 
			
		||||
            'msg': 'Internal Server Error',
 | 
			
		||||
            'payload': {'code': e.args[0], 'desc': e.args[1]}
 | 
			
		||||
        }
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.disconnect', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_disconnect(cqi_client: cqi.CQiClient):
 | 
			
		||||
    cqi_status = cqi_client.disconnect()
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.ping', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_ping(cqi_client: cqi.CQiClient):
 | 
			
		||||
    cqi_status = cqi_client.ping()
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,22 +0,0 @@
 | 
			
		||||
import cqi
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = {**cqi_corpus.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_list(cqi_client: cqi.CQiClient):
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_client.corpora.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,199 +0,0 @@
 | 
			
		||||
from collections import Counter
 | 
			
		||||
from flask import session
 | 
			
		||||
import cqi
 | 
			
		||||
import json
 | 
			
		||||
import math
 | 
			
		||||
import os
 | 
			
		||||
from app import db, socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from app.models import Corpus
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio, lookups_by_cpos
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_status = cqi_corpus.drop()
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_status = cqi_corpus.query(subcorpus_name, query)
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
# nopaque specific CQi extensions                                             #
 | 
			
		||||
###############################################################################
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    corpus = Corpus.query.get(session['d']['corpus_id'])
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    corpus.num_tokens = cqi_corpus.size
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    corpus = Corpus.query.get(session['d']['corpus_id'])
 | 
			
		||||
    visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
 | 
			
		||||
    if os.path.exists(visualization_data_file_path):
 | 
			
		||||
        with open(visualization_data_file_path, 'r') as f:
 | 
			
		||||
            payload = json.load(f)
 | 
			
		||||
        return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    ##########################################################################
 | 
			
		||||
    # A faster way to get cpos boundaries for smaller s_attrs                #
 | 
			
		||||
    ##########################################################################
 | 
			
		||||
    # cqi_corpus.query('Last', '<s> []* </s>;')
 | 
			
		||||
    # cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
 | 
			
		||||
    # print(cqi_subcorpus.size)
 | 
			
		||||
    # first_match = 0
 | 
			
		||||
    # last_match = cqi_subcorpus.attrs['size'] - 1
 | 
			
		||||
    # match_boundaries = zip(
 | 
			
		||||
    #     list(range(first_match, last_match + 1)),
 | 
			
		||||
    #     cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
 | 
			
		||||
    #     cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
 | 
			
		||||
    # )
 | 
			
		||||
    # for x in match_boundaries:
 | 
			
		||||
    #     print(x)
 | 
			
		||||
    cqi_p_attrs = {
 | 
			
		||||
        p_attr.name: p_attr
 | 
			
		||||
        for p_attr in cqi_corpus.positional_attributes.list()
 | 
			
		||||
    }
 | 
			
		||||
    cqi_s_attrs = {
 | 
			
		||||
        s_attr.name: s_attr
 | 
			
		||||
        for s_attr in cqi_corpus.structural_attributes.list()
 | 
			
		||||
    }
 | 
			
		||||
    payload = {
 | 
			
		||||
        'corpus': {
 | 
			
		||||
            'bounds': [0, cqi_corpus.size - 1],
 | 
			
		||||
            'counts': {
 | 
			
		||||
                'token': cqi_corpus.size
 | 
			
		||||
            },
 | 
			
		||||
            'freqs': {}
 | 
			
		||||
        },
 | 
			
		||||
        'p_attrs': {},
 | 
			
		||||
        's_attrs': {},
 | 
			
		||||
        'values': {'p_attrs': {}, 's_attrs': {}}
 | 
			
		||||
    }
 | 
			
		||||
    for p_attr in cqi_p_attrs.values():
 | 
			
		||||
        payload['corpus']['freqs'][p_attr.name] = dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, p_attr.lexicon_size),
 | 
			
		||||
                p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
        payload['p_attrs'][p_attr.name] = dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, cqi_corpus.size),
 | 
			
		||||
                p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
        payload['values']['p_attrs'][p_attr.name] = dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, p_attr.lexicon_size),
 | 
			
		||||
                p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
    for s_attr in cqi_s_attrs.values():
 | 
			
		||||
        if s_attr.has_values:
 | 
			
		||||
            continue
 | 
			
		||||
        payload['corpus']['counts'][s_attr.name] = s_attr.size
 | 
			
		||||
        payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
 | 
			
		||||
        payload['values']['s_attrs'][s_attr.name] = {}
 | 
			
		||||
        for id in range(0, s_attr.size):
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
 | 
			
		||||
            lbound, rbound = s_attr.cpos_by_id(id)
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
 | 
			
		||||
            if s_attr.name not in ['text', 's']:
 | 
			
		||||
                continue
 | 
			
		||||
            cpos_range = range(lbound, rbound + 1)
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
 | 
			
		||||
            if s_attr.name != 'text':
 | 
			
		||||
                continue
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
 | 
			
		||||
            payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
 | 
			
		||||
            for p_attr in cqi_p_attrs.values():
 | 
			
		||||
                payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
 | 
			
		||||
        sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
 | 
			
		||||
        s_attr_value_names = [
 | 
			
		||||
            sub_s_attr.name[(len(s_attr.name) + 1):]
 | 
			
		||||
            for sub_s_attr in sub_s_attrs
 | 
			
		||||
        ]
 | 
			
		||||
        sub_s_attr_values = [
 | 
			
		||||
            sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
 | 
			
		||||
            for sub_s_attr in sub_s_attrs
 | 
			
		||||
        ]
 | 
			
		||||
        payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
 | 
			
		||||
        payload['values']['s_attrs'][s_attr.name] = {
 | 
			
		||||
            s_attr_id: {
 | 
			
		||||
                s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
 | 
			
		||||
                for s_attr_value_name_idx, s_attr_value_name in enumerate(
 | 
			
		||||
                    payload['s_attrs'][s_attr.name]['values']
 | 
			
		||||
                )
 | 
			
		||||
            } for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
 | 
			
		||||
        }
 | 
			
		||||
    with open(visualization_data_file_path, 'w') as f:
 | 
			
		||||
        json.dump(payload, f)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    # Sanity checks
 | 
			
		||||
    if (
 | 
			
		||||
        per_page < 1
 | 
			
		||||
        or page < 1
 | 
			
		||||
        or (
 | 
			
		||||
            cqi_corpus.size > 0
 | 
			
		||||
            and page > math.ceil(cqi_corpus.size / per_page)
 | 
			
		||||
        )
 | 
			
		||||
    ):
 | 
			
		||||
        return {'code': 416, 'msg': 'Range Not Satisfiable'}
 | 
			
		||||
    first_cpos = (page - 1) * per_page
 | 
			
		||||
    last_cpos = min(cqi_corpus.size, first_cpos + per_page)
 | 
			
		||||
    cpos_list = [*range(first_cpos, last_cpos)]
 | 
			
		||||
    lookups = lookups_by_cpos(cqi_corpus, cpos_list)
 | 
			
		||||
    payload = {}
 | 
			
		||||
    # the items for the current page
 | 
			
		||||
    payload['items'] = [cpos_list]
 | 
			
		||||
    # the lookups for the items
 | 
			
		||||
    payload['lookups'] = lookups
 | 
			
		||||
    # the total number of items matching the query
 | 
			
		||||
    payload['total'] = cqi_corpus.size
 | 
			
		||||
    # the number of items to be displayed on a page.
 | 
			
		||||
    payload['per_page'] = per_page
 | 
			
		||||
    # The total number of pages
 | 
			
		||||
    payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
 | 
			
		||||
    # the current page number (1 indexed)
 | 
			
		||||
    payload['page'] = page if payload['pages'] > 0 else None
 | 
			
		||||
    # True if a previous page exists
 | 
			
		||||
    payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
 | 
			
		||||
    # True if a next page exists.
 | 
			
		||||
    payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False  # noqa
 | 
			
		||||
    # Number of the previous page.
 | 
			
		||||
    payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
 | 
			
		||||
    # Number of the next page
 | 
			
		||||
    payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,24 +0,0 @@
 | 
			
		||||
import cqi
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name)  # noqa
 | 
			
		||||
    payload = {**cqi_alignment_attribute.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,24 +0,0 @@
 | 
			
		||||
import cqi
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name)  # noqa
 | 
			
		||||
    payload = {**cqi_positional_attribute.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,24 +0,0 @@
 | 
			
		||||
import cqi
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name)  # noqa
 | 
			
		||||
    payload = {**cqi_structural_attribute.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -1,125 +0,0 @@
 | 
			
		||||
import cqi
 | 
			
		||||
import math
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    payload = {**cqi_subcorpus.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_status = cqi_subcorpus.drop()
 | 
			
		||||
    payload = {'code': cqi_status.code,
 | 
			
		||||
               'msg': cqi_status.__class__.__name__}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient):
 | 
			
		||||
    return {'code': 501, 'msg': 'Not Implemented'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient):
 | 
			
		||||
    return {'code': 501, 'msg': 'Not Implemented'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient):
 | 
			
		||||
    return {'code': 501, 'msg': 'Not Implemented'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
# nopaque specific CQi extensions                                             #
 | 
			
		||||
###############################################################################
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    # Sanity checks
 | 
			
		||||
    if (
 | 
			
		||||
        per_page < 1
 | 
			
		||||
        or page < 1
 | 
			
		||||
        or (
 | 
			
		||||
            cqi_subcorpus.attrs['size'] > 0
 | 
			
		||||
            and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
 | 
			
		||||
        )
 | 
			
		||||
    ):
 | 
			
		||||
        return {'code': 416, 'msg': 'Range Not Satisfiable'}
 | 
			
		||||
    offset = (page - 1) * per_page
 | 
			
		||||
    cutoff = per_page
 | 
			
		||||
    cqi_results_export = export_subcorpus(
 | 
			
		||||
        cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
 | 
			
		||||
    payload = {}
 | 
			
		||||
    # the items for the current page
 | 
			
		||||
    payload['items'] = cqi_results_export.pop('matches')
 | 
			
		||||
    # the lookups for the items
 | 
			
		||||
    payload['lookups'] = cqi_results_export
 | 
			
		||||
    # the total number of items matching the query
 | 
			
		||||
    payload['total'] = cqi_subcorpus.attrs['size']
 | 
			
		||||
    # the number of items to be displayed on a page.
 | 
			
		||||
    payload['per_page'] = per_page
 | 
			
		||||
    # The total number of pages
 | 
			
		||||
    payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
 | 
			
		||||
    # the current page number (1 indexed)
 | 
			
		||||
    payload['page'] = page if payload['pages'] > 0 else None
 | 
			
		||||
    # True if a previous page exists
 | 
			
		||||
    payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
 | 
			
		||||
    # True if a next page exists.
 | 
			
		||||
    payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False  # noqa
 | 
			
		||||
    # Number of the previous page.
 | 
			
		||||
    payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
 | 
			
		||||
    # Number of the next page
 | 
			
		||||
    payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}
 | 
			
		||||
		Reference in New Issue
	
	Block a user