mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-12-25 02:44:18 +00:00
Update cqi package
This commit is contained in:
parent
6b24184206
commit
0db99e33fe
@ -421,13 +421,8 @@ class APIClient:
|
|||||||
self.port = port
|
self.port = port
|
||||||
self.socket = socket.socket()
|
self.socket = socket.socket()
|
||||||
|
|
||||||
def setup(self):
|
|
||||||
self.socket.connect((self.host, self.port))
|
|
||||||
|
|
||||||
def teardown(self):
|
|
||||||
self.socket.close()
|
|
||||||
|
|
||||||
def ctrl_connect(self, username, password):
|
def ctrl_connect(self, username, password):
|
||||||
|
self.socket.connect((self.host, self.port))
|
||||||
# INPUT: (STRING username, STRING password)
|
# INPUT: (STRING username, STRING password)
|
||||||
# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
|
# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
|
||||||
self.__send_WORD(CTRL_CONNECT)
|
self.__send_WORD(CTRL_CONNECT)
|
||||||
@ -439,7 +434,9 @@ class APIClient:
|
|||||||
# INPUT: ()
|
# INPUT: ()
|
||||||
# OUTPUT: CQI_STATUS_BYE_OK
|
# OUTPUT: CQI_STATUS_BYE_OK
|
||||||
self.__send_WORD(CTRL_BYE)
|
self.__send_WORD(CTRL_BYE)
|
||||||
return self.__recv_response()
|
response = self.__recv_response()
|
||||||
|
self.socket.close()
|
||||||
|
return response
|
||||||
|
|
||||||
def ctrl_user_abort(self):
|
def ctrl_user_abort(self):
|
||||||
# INPUT: ()
|
# INPUT: ()
|
||||||
@ -575,7 +572,7 @@ class APIClient:
|
|||||||
# INPUT: (STRING attribute)
|
# INPUT: (STRING attribute)
|
||||||
# OUTPUT: CQI_STATUS_OK
|
# OUTPUT: CQI_STATUS_OK
|
||||||
# unload attribute from memory
|
# unload attribute from memory
|
||||||
self.__send_WORD(CL_LEXICON_SIZE)
|
self.__send_WORD(CL_DROP_ATTRIBUTE)
|
||||||
self.__send_STRING(attribute)
|
self.__send_STRING(attribute)
|
||||||
return self.__recv_response()
|
return self.__recv_response()
|
||||||
|
|
||||||
@ -589,7 +586,7 @@ class APIClient:
|
|||||||
# OUTPUT: CQI_DATA_INT_LIST
|
# OUTPUT: CQI_DATA_INT_LIST
|
||||||
# returns -1 for every string in <strings> that is not found in the
|
# returns -1 for every string in <strings> that is not found in the
|
||||||
# lexicon
|
# lexicon
|
||||||
self.__send_WORD(CL_LEXICON_SIZE)
|
self.__send_WORD(CL_STR2ID)
|
||||||
self.__send_STRING(attribute)
|
self.__send_STRING(attribute)
|
||||||
self.__send_STRING_LIST(strings)
|
self.__send_STRING_LIST(strings)
|
||||||
return self.__recv_response()
|
return self.__recv_response()
|
||||||
|
@ -1,31 +1,15 @@
|
|||||||
from .api import APIClient
|
from .api import APIClient
|
||||||
from .constants import MAJOR_VERSION, MINOR_VERSION
|
|
||||||
from .models.corpora import CorpusCollection
|
from .models.corpora import CorpusCollection
|
||||||
|
|
||||||
|
|
||||||
class CQiClient(APIClient):
|
class CQiClient:
|
||||||
def __init__(self, host, port=4877):
|
def __init__(self, host, port=4877):
|
||||||
super(CQiClient, self).__init__(host, port=port)
|
self.api = APIClient(host, port=port)
|
||||||
|
|
||||||
def connect(self, username='anonymous', password=''):
|
def connect(self, username='anonymous', password=''):
|
||||||
super(CQiClient, self).setup()
|
self.api.ctrl_connect(username, password)
|
||||||
self.ctrl_connect(username, password)
|
self.corpora = CorpusCollection(self)
|
||||||
self.__load()
|
|
||||||
|
|
||||||
def disconnect(self):
|
def disconnect(self):
|
||||||
self.ctrl_bye()
|
del self.corpora
|
||||||
super(CQiClient, self).teardown()
|
self.api.ctrl_bye()
|
||||||
|
|
||||||
def __load(self):
|
|
||||||
self.corpora = CorpusCollection(self)
|
|
||||||
self.info = {'version': '{}.{}'.format(MAJOR_VERSION, MINOR_VERSION)}
|
|
||||||
|
|
||||||
def features(self):
|
|
||||||
features = []
|
|
||||||
if self.ask_feature_cqi_1_0():
|
|
||||||
features.append('cqi_1_0')
|
|
||||||
if self.ask_feature_cl_2_3():
|
|
||||||
features.append('cl_2_3')
|
|
||||||
if self.ask_feature_cqp_2_3():
|
|
||||||
features.append('cqp_2_3')
|
|
||||||
return features
|
|
||||||
|
139
app/corpora/cqi/models/attributes.py
Normal file
139
app/corpora/cqi/models/attributes.py
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
class AttributeCollection:
|
||||||
|
def __init__(self, client, corpus):
|
||||||
|
self.client = client
|
||||||
|
self.corpus = corpus
|
||||||
|
self.alignment = AlignmentAttributeCollection(client, corpus)
|
||||||
|
self.positional = PositionalAttributeCollection(client, corpus)
|
||||||
|
self.structural = StructuralAttributeCollection(client, corpus)
|
||||||
|
|
||||||
|
|
||||||
|
class Attribute:
|
||||||
|
def __init__(self, client, corpus, name):
|
||||||
|
self.client = client
|
||||||
|
self.corpus = corpus
|
||||||
|
self.name = name
|
||||||
|
self._name = '{}.{}'.format(corpus.name, name)
|
||||||
|
self.size = client.api.cl_attribute_size(self._name)
|
||||||
|
|
||||||
|
def drop(self):
|
||||||
|
return self.client.api.cl_drop_attribute(self._name)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignmentAttributeCollection:
|
||||||
|
def __init__(self, client, corpus):
|
||||||
|
self.client = client
|
||||||
|
self.corpus = corpus
|
||||||
|
|
||||||
|
def get(self, name):
|
||||||
|
return AlignmentAttribute(self.client, self.corpus, name)
|
||||||
|
|
||||||
|
def list(self):
|
||||||
|
return [AlignmentAttribute(self.client, self.corpus, attr) for attr in
|
||||||
|
self.client.api.corpus_alignment_attributes(self.corpus.name)]
|
||||||
|
|
||||||
|
|
||||||
|
class AlignmentAttribute(Attribute):
|
||||||
|
def cpos_by_ids(self, id_list):
|
||||||
|
return self.client.api.cl_alg2cpos(self._name, id_list)
|
||||||
|
|
||||||
|
def ids_by_cpos(self, cpos_list):
|
||||||
|
return self.client.api.cl_cpos2alg(self._name, cpos_list)
|
||||||
|
|
||||||
|
|
||||||
|
class PositionalAttributeCollection:
|
||||||
|
def __init__(self, client, corpus):
|
||||||
|
self.client = client
|
||||||
|
self.corpus = corpus
|
||||||
|
|
||||||
|
def get(self, name):
|
||||||
|
return PositionalAttribute(self.client, self.corpus, name)
|
||||||
|
|
||||||
|
def list(self):
|
||||||
|
return [PositionalAttribute(self.client, self.corpus, attr) for attr in
|
||||||
|
self.client.api.corpus_positional_attributes(self.corpus.name)]
|
||||||
|
|
||||||
|
|
||||||
|
class PositionalAttribute(Attribute):
|
||||||
|
def __init__(self, client, corpus, name):
|
||||||
|
super(PositionalAttribute, self).__init__(client, corpus, name)
|
||||||
|
self.lexicon_size = client.api.cl_lexicon_size(self._name)
|
||||||
|
|
||||||
|
def cpos_by_id(self, id):
|
||||||
|
return self.client.api.cl_id2cpos(self._name, id)
|
||||||
|
|
||||||
|
def cpos_by_ids(self, id_list):
|
||||||
|
return self.client.api.cl_idlist2cpos(self._name, id_list)
|
||||||
|
|
||||||
|
def freqs_by_ids(self, id_list):
|
||||||
|
return self.client.api.cl_id2freq(self._name, id_list)
|
||||||
|
|
||||||
|
def ids_by_cpos(self, cpos_list):
|
||||||
|
return self.client.api.cl_cpos2id(self._name, cpos_list)
|
||||||
|
|
||||||
|
def ids_by_regex(self, regex):
|
||||||
|
return self.client.api.cl_regex2id(self._name, regex)
|
||||||
|
|
||||||
|
def ids_by_values(self, value_list):
|
||||||
|
return self.client.api.cl_str2id(self._name, value_list)
|
||||||
|
|
||||||
|
def values_by_cpos(self, cpos_list):
|
||||||
|
return self.client.api.cl_cpos2str(self._name, cpos_list)
|
||||||
|
|
||||||
|
def values_by_ids(self, id_list):
|
||||||
|
return self.client.api.cl_id2str(self._name, id_list)
|
||||||
|
|
||||||
|
|
||||||
|
class StructuralAttributeCollection:
|
||||||
|
def __init__(self, client, corpus):
|
||||||
|
self.client = client
|
||||||
|
self.corpus = corpus
|
||||||
|
|
||||||
|
def get(self, name):
|
||||||
|
return StructuralAttribute(self.client, self.corpus, name)
|
||||||
|
|
||||||
|
def list(self, filters={}):
|
||||||
|
attrs = [StructuralAttribute(self.client, self.corpus, attr) for attr
|
||||||
|
in self.client.api.corpus_structural_attributes(
|
||||||
|
self.corpus.name)]
|
||||||
|
for k, v in filters.items():
|
||||||
|
if k == 'part_of':
|
||||||
|
attrs = list(filter(lambda x: x.name.startswith(v.name + '_'),
|
||||||
|
attrs))
|
||||||
|
return attrs
|
||||||
|
|
||||||
|
|
||||||
|
class StructuralAttribute(Attribute):
|
||||||
|
def __init__(self, client, corpus, name):
|
||||||
|
super(StructuralAttribute, self).__init__(client, corpus, name)
|
||||||
|
self.has_values = client.api.corpus_structural_attribute_has_values(
|
||||||
|
self._name
|
||||||
|
)
|
||||||
|
|
||||||
|
def cpos_by_id(self, id):
|
||||||
|
return self.client.api.cl_struc2cpos(self._name, id)
|
||||||
|
|
||||||
|
def ids_by_cpos(self, cpos_list):
|
||||||
|
return self.client.api.cl_cpos2struc(self._name, cpos_list)
|
||||||
|
|
||||||
|
def lbound_by_cpos(self, cpos_list):
|
||||||
|
return self.client.api.cl_cpos2lbound(self._name, cpos_list)
|
||||||
|
|
||||||
|
def rbound_by_cpos(self, cpos_list):
|
||||||
|
return self.client.api.cl_cpos2rbound(self._name, cpos_list)
|
||||||
|
|
||||||
|
def values_by_ids(self, id_list):
|
||||||
|
return self.client.api.cl_struc2str(self._name, id_list)
|
||||||
|
|
||||||
|
def lrcontext_by_cpos(self, cpos_first, cpos_last=None, context=3):
|
||||||
|
if cpos_last is None:
|
||||||
|
cpos_last = cpos_first
|
||||||
|
id_l, id_r = self.ids_by_cpos([cpos_first, cpos_last])
|
||||||
|
id_lc = max(0, id_l - context)
|
||||||
|
id_rc = min(id_r + context, self.size - 1)
|
||||||
|
ids = {id: list(range(*self.cpos_by_id(id))) for id
|
||||||
|
in range(id_lc, id_rc)}
|
||||||
|
cpos_list = [cpos for cpos_list in ids.values() for cpos in cpos_list]
|
||||||
|
lookups = self.corpus.lookups_by_cpos(cpos_list)
|
||||||
|
return {'ids': ids,
|
||||||
|
'match_cpos_list': list(range(cpos_first, cpos_last)),
|
||||||
|
**lookups}
|
@ -1,3 +1,4 @@
|
|||||||
|
from .attributes import AttributeCollection
|
||||||
from .subcorpora import SubcorpusCollection
|
from .subcorpora import SubcorpusCollection
|
||||||
|
|
||||||
|
|
||||||
@ -9,94 +10,59 @@ class CorpusCollection:
|
|||||||
return Corpus(self.client, name)
|
return Corpus(self.client, name)
|
||||||
|
|
||||||
def list(self):
|
def list(self):
|
||||||
return [Corpus(self.client, corpus) for corpus
|
return [Corpus(self.client, corpus) for corpus in
|
||||||
in self.client.corpus_list_coprora()]
|
self.client.api.corpus_list_coprora()]
|
||||||
|
|
||||||
|
|
||||||
class Corpus:
|
class Corpus:
|
||||||
def __init__(self, client, name):
|
def __init__(self, client, name):
|
||||||
self.client = client
|
self.client = client
|
||||||
self.name = name
|
self.name = name
|
||||||
self.__load()
|
self.size = client.api.cl_attribute_size('{}.word'.format(name))
|
||||||
|
# self.info = client.api.corpus_info(name)
|
||||||
|
self.charset = client.api.corpus_charset(name)
|
||||||
|
# self.full_name = client.api.corpus_full_name(name)
|
||||||
|
self.properties = client.api.corpus_properties(name)
|
||||||
|
self.attributes = AttributeCollection(client, self)
|
||||||
|
self.subcorpora = SubcorpusCollection(client, self)
|
||||||
|
|
||||||
def __load(self):
|
def lookups_by_cpos(self, cpos_list):
|
||||||
self.size = self.client.cl_attribute_size('{}.word'.format(self.name))
|
cpos_list = list(set(cpos_list))
|
||||||
# self.info = client.corpus_info(self.name)
|
lookups = {}
|
||||||
self.charset = self.client.corpus_charset(self.name)
|
if cpos_list:
|
||||||
# self.full_name = self.client.corpus_full_name(self.name)
|
lookups['cpos_lookup'] = {}
|
||||||
self.properties = self.client.corpus_properties(self.name)
|
for cpos in cpos_list:
|
||||||
self.alignment_attributes = \
|
lookups['cpos_lookup'][cpos] = {}
|
||||||
self.client.corpus_alignment_attributes(self.name)
|
for attr in self.attributes.positional.list():
|
||||||
self.structural_attributes = \
|
cpos_attr_values = attr.values_by_cpos(cpos_list)
|
||||||
self.client.corpus_structural_attributes(self.name)
|
for i, cpos in enumerate(cpos_list):
|
||||||
self.positional_attributes = \
|
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
|
||||||
self.client.corpus_positional_attributes(self.name)
|
for attr in self.attributes.structural.list():
|
||||||
self.subcorpora = SubcorpusCollection(self.client, self)
|
if attr.has_values:
|
||||||
|
continue
|
||||||
def alg2cpos(self, attribute, alg):
|
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
for i, cpos in enumerate(cpos_list):
|
||||||
return self.client.cl_alg2cpos(__attribute, alg)
|
if cpos_attr_ids[i] != -1:
|
||||||
|
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
|
||||||
def cpos2alg(self, attribute, cpos_list):
|
occured_attr_ids = list(filter(lambda x: x != -1,
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
set(cpos_attr_ids)))
|
||||||
return self.client.cl_cpos2alg(__attribute, cpos_list)
|
if not occured_attr_ids:
|
||||||
|
continue
|
||||||
def cpos2id(self, attribute, cpos_list):
|
subattrs = self.attributes.structural.list(
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
filters={'part_of': attr})
|
||||||
return self.client.cl_cpos2id(__attribute, cpos_list)
|
if not subattrs:
|
||||||
|
continue
|
||||||
def cpos2lbound(self, attribute, cpos_list):
|
lookup_name = '{}_lookup'.format(attr.name)
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
lookups[lookup_name] = {}
|
||||||
return self.client.cl_cpos2lbound(__attribute, cpos_list)
|
for attr_id in occured_attr_ids:
|
||||||
|
lookups[lookup_name][attr_id] = {}
|
||||||
def cpos2rbound(self, attribute, cpos_list):
|
for subattr in subattrs:
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
subattr_values = subattr.values_by_ids(occured_attr_ids)
|
||||||
return self.client.cl_cpos2rbound(__attribute, cpos_list)
|
for i, subattr_value in enumerate(subattr_values):
|
||||||
|
lookups[lookup_name][occured_attr_ids[i]][subattr.name] = \
|
||||||
def cpos2str(self, attribute, cpos_list):
|
subattr_value
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
return lookups
|
||||||
return self.client.cl_cpos2str(__attribute, cpos_list)
|
|
||||||
|
|
||||||
def cpos2struc(self, attribute, cpos_list):
|
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
|
||||||
return self.client.cl_cpos2struc(__attribute, cpos_list)
|
|
||||||
|
|
||||||
def id2cpos(self, attribute, id):
|
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
|
||||||
return self.client.cl_id2cpos(__attribute, id)
|
|
||||||
|
|
||||||
def idlist2cpos(self, attribute, ids):
|
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
|
||||||
return self.client.cl_idlist2cpos(__attribute, ids)
|
|
||||||
|
|
||||||
def id2freq(self, attribute, ids):
|
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
|
||||||
return self.client.cl_id2freq(__attribute, ids)
|
|
||||||
|
|
||||||
def id2str(self, attribute, ids):
|
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
|
||||||
return self.client.cl_id2str(__attribute, ids)
|
|
||||||
|
|
||||||
def query(self, query, subcorpus_name='Results'):
|
def query(self, query, subcorpus_name='Results'):
|
||||||
self.client.cqp_query(self.name, subcorpus_name, query)
|
self.client.api.cqp_query(self.name, subcorpus_name, query)
|
||||||
return self.subcorpora.get('Results')
|
return self.subcorpora.get('Results')
|
||||||
|
|
||||||
def regex2id(self, attribute, regex):
|
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
|
||||||
return self.client.cl_regex2id(__attribute, regex)
|
|
||||||
|
|
||||||
def structural_attribute_has_values(self, attribute):
|
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
|
||||||
return self.client.corpus_structural_attribute_has_values(__attribute)
|
|
||||||
|
|
||||||
def str2id(self, attribute, strings):
|
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
|
||||||
return self.client.cl_str2id(__attribute, strings)
|
|
||||||
|
|
||||||
def struc2cpos(self, attribute, struc):
|
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
|
||||||
return self.client.cl_struc2cpos(__attribute, struc)
|
|
||||||
|
|
||||||
def struc2str(self, attribute, strucs):
|
|
||||||
__attribute = '{}.{}'.format(self.name, attribute)
|
|
||||||
return self.client.cl_struc2str(__attribute, strucs)
|
|
||||||
|
@ -3,47 +3,42 @@ from ..constants import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH,
|
|||||||
|
|
||||||
|
|
||||||
class SubcorpusCollection:
|
class SubcorpusCollection:
|
||||||
def __init__(self, client, parent_corpus):
|
def __init__(self, client, corpus):
|
||||||
self.client = client
|
self.client = client
|
||||||
self.parent_corpus = parent_corpus
|
self.corpus = corpus
|
||||||
|
|
||||||
def get(self, name):
|
def get(self, name):
|
||||||
return Subcorpus(self.client, self.parent_corpus, name)
|
return Subcorpus(self.client, self.corpus, name)
|
||||||
|
|
||||||
def list(self):
|
def list(self):
|
||||||
return [Subcorpus(self.client, self.parent_corpus, subcorpus)
|
return [Subcorpus(self.client, self.corpus, subcorpus) for subcorpus in
|
||||||
for subcorpus
|
self.client.api.cqp_list_subcorpora(self.corpus.name)]
|
||||||
in self.client.cqp_list_subcorpora(self.parent_corpus.name)]
|
|
||||||
|
|
||||||
|
|
||||||
class Subcorpus:
|
class Subcorpus:
|
||||||
def __init__(self, client, parent_corpus, name):
|
def __init__(self, client, corpus, name):
|
||||||
self.client = client
|
self.client = client
|
||||||
self.parent_corpus = parent_corpus
|
self.corpus = corpus
|
||||||
self.name = name
|
self.name = name
|
||||||
self.__name = '{}:{}'.format(parent_corpus.name, name)
|
self._name = '{}:{}'.format(corpus.name, name)
|
||||||
self.__load()
|
|
||||||
|
|
||||||
def __load(self):
|
|
||||||
self.fields = {}
|
self.fields = {}
|
||||||
if self.client.cqp_subcorpus_has_field(self.__name, CONST_FIELD_MATCH):
|
if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_MATCH):
|
||||||
self.fields['match'] = CONST_FIELD_MATCH
|
self.fields['match'] = CONST_FIELD_MATCH
|
||||||
if self.client.cqp_subcorpus_has_field(self.__name,
|
if client.api.cqp_subcorpus_has_field(self._name,
|
||||||
CONST_FIELD_MATCHEND):
|
CONST_FIELD_MATCHEND):
|
||||||
self.fields['matchend'] = CONST_FIELD_MATCHEND
|
self.fields['matchend'] = CONST_FIELD_MATCHEND
|
||||||
if self.client.cqp_subcorpus_has_field(self.__name,
|
if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_TARGET):
|
||||||
CONST_FIELD_TARGET):
|
|
||||||
self.fields['target'] = CONST_FIELD_TARGET
|
self.fields['target'] = CONST_FIELD_TARGET
|
||||||
if self.client.cqp_subcorpus_has_field(self.__name,
|
if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_KEYWORD):
|
||||||
CONST_FIELD_KEYWORD):
|
|
||||||
self.fields['keyword'] = CONST_FIELD_KEYWORD
|
self.fields['keyword'] = CONST_FIELD_KEYWORD
|
||||||
self.size = self.client.cqp_subcorpus_size(self.__name)
|
self.size = client.api.cqp_subcorpus_size(self._name)
|
||||||
|
|
||||||
def drop(self):
|
def drop(self):
|
||||||
return self.client.cqp_drop_subcorpus(self.__name)
|
return self.client.api.cqp_drop_subcorpus(self._name)
|
||||||
|
|
||||||
def dump(self, field, first, last):
|
def dump(self, field, first, last):
|
||||||
return self.client.cqp_dump_subcorpus(self.__name, field, first, last)
|
return self.client.api.cqp_dump_subcorpus(self._name, field, first,
|
||||||
|
last)
|
||||||
|
|
||||||
def dump_values(self, context=25, first_result=0,
|
def dump_values(self, context=25, first_result=0,
|
||||||
num_results=float('inf')):
|
num_results=float('inf')):
|
||||||
@ -58,57 +53,21 @@ class Subcorpus:
|
|||||||
last_result))
|
last_result))
|
||||||
for match_start, match_end in match_boundaries:
|
for match_start, match_end in match_boundaries:
|
||||||
left_start = max(0, match_start - context)
|
left_start = max(0, match_start - context)
|
||||||
right_end = min(self.parent_corpus.size, (match_end + 1 + context))
|
right_end = min(self.corpus.size, (match_end + 1 + context))
|
||||||
matches.append({'lc': list(range(left_start, match_start)),
|
matches.append({'lc': list(range(left_start, match_start)),
|
||||||
'hit': list(range(match_start, match_end + 1)),
|
'hit': list(range(match_start, match_end + 1)),
|
||||||
'rc': list(range(match_end + 1, right_end))})
|
'rc': list(range(match_end + 1, right_end))})
|
||||||
cpos_list = []
|
cpos_list = []
|
||||||
for match in matches:
|
for match in matches:
|
||||||
cpos_list += match['lc'] + match['hit'] + match['rc']
|
cpos_list += match['lc'] + match['hit'] + match['rc']
|
||||||
cpos_list = list(set(cpos_list))
|
lookups = self.corpus.lookups_by_cpos(cpos_list)
|
||||||
lookups = {}
|
|
||||||
if len(cpos_list) > 0:
|
|
||||||
lookups['cpos_lookup'] = {}
|
|
||||||
for cpos in cpos_list:
|
|
||||||
lookups['cpos_lookup'][cpos] = {}
|
|
||||||
for attr in self.parent_corpus.positional_attributes:
|
|
||||||
cpos_attr_values = self.parent_corpus.cpos2str(attr, cpos_list)
|
|
||||||
for i, cpos in enumerate(cpos_list):
|
|
||||||
lookups['cpos_lookup'][cpos][attr] = cpos_attr_values[i]
|
|
||||||
for attr in self.parent_corpus.structural_attributes:
|
|
||||||
if self.parent_corpus.structural_attribute_has_values(attr):
|
|
||||||
continue
|
|
||||||
cpos_attr_ids = self.parent_corpus.cpos2struc(attr, cpos_list)
|
|
||||||
for i, cpos in enumerate(cpos_list):
|
|
||||||
if cpos_attr_ids[i] != -1:
|
|
||||||
lookups['cpos_lookup'][cpos][attr] = cpos_attr_ids[i]
|
|
||||||
occured_attr_ids = list(set(cpos_attr_ids))
|
|
||||||
occured_attr_ids = list(filter(lambda x: x != -1,
|
|
||||||
occured_attr_ids))
|
|
||||||
if len(occured_attr_ids) == 0:
|
|
||||||
continue
|
|
||||||
attr_subattrs = \
|
|
||||||
list(filter(lambda x: x.startswith(attr + '_'),
|
|
||||||
self.parent_corpus.structural_attributes))
|
|
||||||
attr_subattrs = list(map(lambda x: x.split('_', 1)[1],
|
|
||||||
attr_subattrs))
|
|
||||||
if len(attr_subattrs) == 0:
|
|
||||||
continue
|
|
||||||
lookups[attr + '_lookup'] = {}
|
|
||||||
for attr_id in occured_attr_ids:
|
|
||||||
lookups[attr + '_lookup'][attr_id] = {}
|
|
||||||
for subattr in attr_subattrs:
|
|
||||||
__subattr = attr + '_' + subattr
|
|
||||||
subattr_values = \
|
|
||||||
self.parent_corpus.struc2str(__subattr, occured_attr_ids)
|
|
||||||
for i, value in enumerate(subattr_values):
|
|
||||||
lookups[attr + '_lookup'][occured_attr_ids[i]][subattr] = \
|
|
||||||
value
|
|
||||||
return {'matches': matches, **lookups}
|
return {'matches': matches, **lookups}
|
||||||
|
|
||||||
def fdist_1(self, cutoff, field, attribute):
|
def fdist_1(self, cutoff, field, attribute):
|
||||||
return self.client.cqp_fdist_1(self.__name, cutoff, field, attribute)
|
return self.client.api.cqp_fdist_1(self._name, cutoff, field,
|
||||||
|
attribute._name)
|
||||||
|
|
||||||
def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
|
def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
|
||||||
return self.client.cqp_fdist_2(self.__name, cutoff, field_1,
|
return self.client.api.cqp_fdist_2(self._name, cutoff,
|
||||||
attribute_1, field_2, attribute_2)
|
field_1, attribute_1._name,
|
||||||
|
field_2, attribute_2._name)
|
||||||
|
@ -32,7 +32,6 @@ class CQiWrapper(APIClient):
|
|||||||
Connects via socket to the CQP server using the given username and
|
Connects via socket to the CQP server using the given username and
|
||||||
password from class initiation.
|
password from class initiation.
|
||||||
'''
|
'''
|
||||||
super(CQiWrapper, self).setup()
|
|
||||||
self.ctrl_connect(self.username, self.password)
|
self.ctrl_connect(self.username, self.password)
|
||||||
|
|
||||||
def __create_attribute_strings(self):
|
def __create_attribute_strings(self):
|
||||||
@ -79,7 +78,6 @@ class CQiWrapper(APIClient):
|
|||||||
Disconnects from the CQP server. Closes used socket after disconnect.
|
Disconnects from the CQP server. Closes used socket after disconnect.
|
||||||
'''
|
'''
|
||||||
self.ctrl_bye()
|
self.ctrl_bye()
|
||||||
super(CQiWrapper, self).teardown()
|
|
||||||
print('Disconnected from cqp server.')
|
print('Disconnected from cqp server.')
|
||||||
|
|
||||||
def query_subcorpus(self, query, result_subcorpus_name='Query-results'):
|
def query_subcorpus(self, query, result_subcorpus_name='Query-results'):
|
||||||
|
Loading…
Reference in New Issue
Block a user