mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-03 20:02:47 +00:00 
			
		
		
		
	Update cqi package
This commit is contained in:
		@@ -421,13 +421,8 @@ class APIClient:
 | 
			
		||||
        self.port = port
 | 
			
		||||
        self.socket = socket.socket()
 | 
			
		||||
 | 
			
		||||
    def setup(self):
 | 
			
		||||
        self.socket.connect((self.host, self.port))
 | 
			
		||||
 | 
			
		||||
    def teardown(self):
 | 
			
		||||
        self.socket.close()
 | 
			
		||||
 | 
			
		||||
    def ctrl_connect(self, username, password):
 | 
			
		||||
        self.socket.connect((self.host, self.port))
 | 
			
		||||
        # INPUT: (STRING username, STRING password)
 | 
			
		||||
        # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
 | 
			
		||||
        self.__send_WORD(CTRL_CONNECT)
 | 
			
		||||
@@ -439,7 +434,9 @@ class APIClient:
 | 
			
		||||
        # INPUT: ()
 | 
			
		||||
        # OUTPUT: CQI_STATUS_BYE_OK
 | 
			
		||||
        self.__send_WORD(CTRL_BYE)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
        response = self.__recv_response()
 | 
			
		||||
        self.socket.close()
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
    def ctrl_user_abort(self):
 | 
			
		||||
        # INPUT: ()
 | 
			
		||||
@@ -575,7 +572,7 @@ class APIClient:
 | 
			
		||||
        # INPUT: (STRING attribute)
 | 
			
		||||
        # OUTPUT: CQI_STATUS_OK
 | 
			
		||||
        # unload attribute from memory
 | 
			
		||||
        self.__send_WORD(CL_LEXICON_SIZE)
 | 
			
		||||
        self.__send_WORD(CL_DROP_ATTRIBUTE)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
@@ -589,7 +586,7 @@ class APIClient:
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns -1 for every string in <strings> that is not found in the
 | 
			
		||||
        # lexicon
 | 
			
		||||
        self.__send_WORD(CL_LEXICON_SIZE)
 | 
			
		||||
        self.__send_WORD(CL_STR2ID)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_STRING_LIST(strings)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 
 | 
			
		||||
@@ -1,31 +1,15 @@
 | 
			
		||||
from .api import APIClient
 | 
			
		||||
from .constants import MAJOR_VERSION, MINOR_VERSION
 | 
			
		||||
from .models.corpora import CorpusCollection
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CQiClient(APIClient):
 | 
			
		||||
class CQiClient:
 | 
			
		||||
    def __init__(self, host, port=4877):
 | 
			
		||||
        super(CQiClient, self).__init__(host, port=port)
 | 
			
		||||
        self.api = APIClient(host, port=port)
 | 
			
		||||
 | 
			
		||||
    def connect(self, username='anonymous', password=''):
 | 
			
		||||
        super(CQiClient, self).setup()
 | 
			
		||||
        self.ctrl_connect(username, password)
 | 
			
		||||
        self.__load()
 | 
			
		||||
        self.api.ctrl_connect(username, password)
 | 
			
		||||
        self.corpora = CorpusCollection(self)
 | 
			
		||||
 | 
			
		||||
    def disconnect(self):
 | 
			
		||||
        self.ctrl_bye()
 | 
			
		||||
        super(CQiClient, self).teardown()
 | 
			
		||||
 | 
			
		||||
    def __load(self):
 | 
			
		||||
        self.corpora = CorpusCollection(self)
 | 
			
		||||
        self.info = {'version': '{}.{}'.format(MAJOR_VERSION, MINOR_VERSION)}
 | 
			
		||||
 | 
			
		||||
    def features(self):
 | 
			
		||||
        features = []
 | 
			
		||||
        if self.ask_feature_cqi_1_0():
 | 
			
		||||
            features.append('cqi_1_0')
 | 
			
		||||
        if self.ask_feature_cl_2_3():
 | 
			
		||||
            features.append('cl_2_3')
 | 
			
		||||
        if self.ask_feature_cqp_2_3():
 | 
			
		||||
            features.append('cqp_2_3')
 | 
			
		||||
        return features
 | 
			
		||||
        del self.corpora
 | 
			
		||||
        self.api.ctrl_bye()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										139
									
								
								app/corpora/cqi/models/attributes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								app/corpora/cqi/models/attributes.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,139 @@
 | 
			
		||||
class AttributeCollection:
 | 
			
		||||
    def __init__(self, client, corpus):
 | 
			
		||||
        self.client = client
 | 
			
		||||
        self.corpus = corpus
 | 
			
		||||
        self.alignment = AlignmentAttributeCollection(client, corpus)
 | 
			
		||||
        self.positional = PositionalAttributeCollection(client, corpus)
 | 
			
		||||
        self.structural = StructuralAttributeCollection(client, corpus)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Attribute:
 | 
			
		||||
    def __init__(self, client, corpus, name):
 | 
			
		||||
        self.client = client
 | 
			
		||||
        self.corpus = corpus
 | 
			
		||||
        self.name = name
 | 
			
		||||
        self._name = '{}.{}'.format(corpus.name, name)
 | 
			
		||||
        self.size = client.api.cl_attribute_size(self._name)
 | 
			
		||||
 | 
			
		||||
    def drop(self):
 | 
			
		||||
        return self.client.api.cl_drop_attribute(self._name)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AlignmentAttributeCollection:
 | 
			
		||||
    def __init__(self, client, corpus):
 | 
			
		||||
        self.client = client
 | 
			
		||||
        self.corpus = corpus
 | 
			
		||||
 | 
			
		||||
    def get(self, name):
 | 
			
		||||
        return AlignmentAttribute(self.client, self.corpus, name)
 | 
			
		||||
 | 
			
		||||
    def list(self):
 | 
			
		||||
        return [AlignmentAttribute(self.client, self.corpus, attr) for attr in
 | 
			
		||||
                self.client.api.corpus_alignment_attributes(self.corpus.name)]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AlignmentAttribute(Attribute):
 | 
			
		||||
    def cpos_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_alg2cpos(self._name, id_list)
 | 
			
		||||
 | 
			
		||||
    def ids_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2alg(self._name, cpos_list)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PositionalAttributeCollection:
 | 
			
		||||
    def __init__(self, client, corpus):
 | 
			
		||||
        self.client = client
 | 
			
		||||
        self.corpus = corpus
 | 
			
		||||
 | 
			
		||||
    def get(self, name):
 | 
			
		||||
        return PositionalAttribute(self.client, self.corpus, name)
 | 
			
		||||
 | 
			
		||||
    def list(self):
 | 
			
		||||
        return [PositionalAttribute(self.client, self.corpus, attr) for attr in
 | 
			
		||||
                self.client.api.corpus_positional_attributes(self.corpus.name)]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PositionalAttribute(Attribute):
 | 
			
		||||
    def __init__(self, client, corpus, name):
 | 
			
		||||
        super(PositionalAttribute, self).__init__(client, corpus, name)
 | 
			
		||||
        self.lexicon_size = client.api.cl_lexicon_size(self._name)
 | 
			
		||||
 | 
			
		||||
    def cpos_by_id(self, id):
 | 
			
		||||
        return self.client.api.cl_id2cpos(self._name, id)
 | 
			
		||||
 | 
			
		||||
    def cpos_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_idlist2cpos(self._name, id_list)
 | 
			
		||||
 | 
			
		||||
    def freqs_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_id2freq(self._name, id_list)
 | 
			
		||||
 | 
			
		||||
    def ids_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2id(self._name, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def ids_by_regex(self, regex):
 | 
			
		||||
        return self.client.api.cl_regex2id(self._name, regex)
 | 
			
		||||
 | 
			
		||||
    def ids_by_values(self, value_list):
 | 
			
		||||
        return self.client.api.cl_str2id(self._name, value_list)
 | 
			
		||||
 | 
			
		||||
    def values_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2str(self._name, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def values_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_id2str(self._name, id_list)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class StructuralAttributeCollection:
 | 
			
		||||
    def __init__(self, client, corpus):
 | 
			
		||||
        self.client = client
 | 
			
		||||
        self.corpus = corpus
 | 
			
		||||
 | 
			
		||||
    def get(self, name):
 | 
			
		||||
        return StructuralAttribute(self.client, self.corpus, name)
 | 
			
		||||
 | 
			
		||||
    def list(self, filters={}):
 | 
			
		||||
        attrs = [StructuralAttribute(self.client, self.corpus, attr) for attr
 | 
			
		||||
                 in self.client.api.corpus_structural_attributes(
 | 
			
		||||
                    self.corpus.name)]
 | 
			
		||||
        for k, v in filters.items():
 | 
			
		||||
            if k == 'part_of':
 | 
			
		||||
                attrs = list(filter(lambda x: x.name.startswith(v.name + '_'),
 | 
			
		||||
                                    attrs))
 | 
			
		||||
        return attrs
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class StructuralAttribute(Attribute):
 | 
			
		||||
    def __init__(self, client, corpus, name):
 | 
			
		||||
        super(StructuralAttribute, self).__init__(client, corpus, name)
 | 
			
		||||
        self.has_values = client.api.corpus_structural_attribute_has_values(
 | 
			
		||||
            self._name
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def cpos_by_id(self, id):
 | 
			
		||||
        return self.client.api.cl_struc2cpos(self._name, id)
 | 
			
		||||
 | 
			
		||||
    def ids_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2struc(self._name, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def lbound_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2lbound(self._name, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def rbound_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2rbound(self._name, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def values_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_struc2str(self._name, id_list)
 | 
			
		||||
 | 
			
		||||
    def lrcontext_by_cpos(self, cpos_first, cpos_last=None, context=3):
 | 
			
		||||
        if cpos_last is None:
 | 
			
		||||
            cpos_last = cpos_first
 | 
			
		||||
        id_l, id_r = self.ids_by_cpos([cpos_first, cpos_last])
 | 
			
		||||
        id_lc = max(0, id_l - context)
 | 
			
		||||
        id_rc = min(id_r + context, self.size - 1)
 | 
			
		||||
        ids = {id: list(range(*self.cpos_by_id(id))) for id
 | 
			
		||||
               in range(id_lc, id_rc)}
 | 
			
		||||
        cpos_list = [cpos for cpos_list in ids.values() for cpos in cpos_list]
 | 
			
		||||
        lookups = self.corpus.lookups_by_cpos(cpos_list)
 | 
			
		||||
        return {'ids': ids,
 | 
			
		||||
                'match_cpos_list': list(range(cpos_first, cpos_last)),
 | 
			
		||||
                **lookups}
 | 
			
		||||
@@ -1,3 +1,4 @@
 | 
			
		||||
from .attributes import AttributeCollection
 | 
			
		||||
from .subcorpora import SubcorpusCollection
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -9,94 +10,59 @@ class CorpusCollection:
 | 
			
		||||
        return Corpus(self.client, name)
 | 
			
		||||
 | 
			
		||||
    def list(self):
 | 
			
		||||
        return [Corpus(self.client, corpus) for corpus
 | 
			
		||||
                in self.client.corpus_list_coprora()]
 | 
			
		||||
        return [Corpus(self.client, corpus) for corpus in
 | 
			
		||||
                self.client.api.corpus_list_coprora()]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Corpus:
 | 
			
		||||
    def __init__(self, client, name):
 | 
			
		||||
        self.client = client
 | 
			
		||||
        self.name = name
 | 
			
		||||
        self.__load()
 | 
			
		||||
        self.size = client.api.cl_attribute_size('{}.word'.format(name))
 | 
			
		||||
        # self.info = client.api.corpus_info(name)
 | 
			
		||||
        self.charset = client.api.corpus_charset(name)
 | 
			
		||||
        # self.full_name = client.api.corpus_full_name(name)
 | 
			
		||||
        self.properties = client.api.corpus_properties(name)
 | 
			
		||||
        self.attributes = AttributeCollection(client, self)
 | 
			
		||||
        self.subcorpora = SubcorpusCollection(client, self)
 | 
			
		||||
 | 
			
		||||
    def __load(self):
 | 
			
		||||
        self.size = self.client.cl_attribute_size('{}.word'.format(self.name))
 | 
			
		||||
        # self.info = client.corpus_info(self.name)
 | 
			
		||||
        self.charset = self.client.corpus_charset(self.name)
 | 
			
		||||
        # self.full_name = self.client.corpus_full_name(self.name)
 | 
			
		||||
        self.properties = self.client.corpus_properties(self.name)
 | 
			
		||||
        self.alignment_attributes = \
 | 
			
		||||
            self.client.corpus_alignment_attributes(self.name)
 | 
			
		||||
        self.structural_attributes = \
 | 
			
		||||
            self.client.corpus_structural_attributes(self.name)
 | 
			
		||||
        self.positional_attributes = \
 | 
			
		||||
            self.client.corpus_positional_attributes(self.name)
 | 
			
		||||
        self.subcorpora = SubcorpusCollection(self.client, self)
 | 
			
		||||
 | 
			
		||||
    def alg2cpos(self, attribute, alg):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_alg2cpos(__attribute, alg)
 | 
			
		||||
 | 
			
		||||
    def cpos2alg(self, attribute, cpos_list):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_cpos2alg(__attribute, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def cpos2id(self, attribute, cpos_list):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_cpos2id(__attribute, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def cpos2lbound(self, attribute, cpos_list):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_cpos2lbound(__attribute, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def cpos2rbound(self, attribute, cpos_list):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_cpos2rbound(__attribute, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def cpos2str(self, attribute, cpos_list):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_cpos2str(__attribute, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def cpos2struc(self, attribute, cpos_list):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_cpos2struc(__attribute, cpos_list)
 | 
			
		||||
 | 
			
		||||
    def id2cpos(self, attribute, id):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_id2cpos(__attribute, id)
 | 
			
		||||
 | 
			
		||||
    def idlist2cpos(self, attribute, ids):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_idlist2cpos(__attribute, ids)
 | 
			
		||||
 | 
			
		||||
    def id2freq(self, attribute, ids):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_id2freq(__attribute, ids)
 | 
			
		||||
 | 
			
		||||
    def id2str(self, attribute, ids):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_id2str(__attribute, ids)
 | 
			
		||||
    def lookups_by_cpos(self, cpos_list):
 | 
			
		||||
        cpos_list = list(set(cpos_list))
 | 
			
		||||
        lookups = {}
 | 
			
		||||
        if cpos_list:
 | 
			
		||||
            lookups['cpos_lookup'] = {}
 | 
			
		||||
        for cpos in cpos_list:
 | 
			
		||||
            lookups['cpos_lookup'][cpos] = {}
 | 
			
		||||
        for attr in self.attributes.positional.list():
 | 
			
		||||
            cpos_attr_values = attr.values_by_cpos(cpos_list)
 | 
			
		||||
            for i, cpos in enumerate(cpos_list):
 | 
			
		||||
                lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
 | 
			
		||||
        for attr in self.attributes.structural.list():
 | 
			
		||||
            if attr.has_values:
 | 
			
		||||
                continue
 | 
			
		||||
            cpos_attr_ids = attr.ids_by_cpos(cpos_list)
 | 
			
		||||
            for i, cpos in enumerate(cpos_list):
 | 
			
		||||
                if cpos_attr_ids[i] != -1:
 | 
			
		||||
                    lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
 | 
			
		||||
            occured_attr_ids = list(filter(lambda x: x != -1,
 | 
			
		||||
                                           set(cpos_attr_ids)))
 | 
			
		||||
            if not occured_attr_ids:
 | 
			
		||||
                continue
 | 
			
		||||
            subattrs = self.attributes.structural.list(
 | 
			
		||||
                filters={'part_of': attr})
 | 
			
		||||
            if not subattrs:
 | 
			
		||||
                continue
 | 
			
		||||
            lookup_name = '{}_lookup'.format(attr.name)
 | 
			
		||||
            lookups[lookup_name] = {}
 | 
			
		||||
            for attr_id in occured_attr_ids:
 | 
			
		||||
                lookups[lookup_name][attr_id] = {}
 | 
			
		||||
            for subattr in subattrs:
 | 
			
		||||
                subattr_values = subattr.values_by_ids(occured_attr_ids)
 | 
			
		||||
                for i, subattr_value in enumerate(subattr_values):
 | 
			
		||||
                    lookups[lookup_name][occured_attr_ids[i]][subattr.name] = \
 | 
			
		||||
                        subattr_value
 | 
			
		||||
        return lookups
 | 
			
		||||
 | 
			
		||||
    def query(self, query, subcorpus_name='Results'):
 | 
			
		||||
        self.client.cqp_query(self.name, subcorpus_name, query)
 | 
			
		||||
        self.client.api.cqp_query(self.name, subcorpus_name, query)
 | 
			
		||||
        return self.subcorpora.get('Results')
 | 
			
		||||
 | 
			
		||||
    def regex2id(self, attribute, regex):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_regex2id(__attribute, regex)
 | 
			
		||||
 | 
			
		||||
    def structural_attribute_has_values(self, attribute):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.corpus_structural_attribute_has_values(__attribute)
 | 
			
		||||
 | 
			
		||||
    def str2id(self, attribute, strings):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_str2id(__attribute, strings)
 | 
			
		||||
 | 
			
		||||
    def struc2cpos(self, attribute, struc):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_struc2cpos(__attribute, struc)
 | 
			
		||||
 | 
			
		||||
    def struc2str(self, attribute, strucs):
 | 
			
		||||
        __attribute = '{}.{}'.format(self.name, attribute)
 | 
			
		||||
        return self.client.cl_struc2str(__attribute, strucs)
 | 
			
		||||
 
 | 
			
		||||
@@ -3,47 +3,42 @@ from ..constants import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH,
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SubcorpusCollection:
 | 
			
		||||
    def __init__(self, client, parent_corpus):
 | 
			
		||||
    def __init__(self, client, corpus):
 | 
			
		||||
        self.client = client
 | 
			
		||||
        self.parent_corpus = parent_corpus
 | 
			
		||||
        self.corpus = corpus
 | 
			
		||||
 | 
			
		||||
    def get(self, name):
 | 
			
		||||
        return Subcorpus(self.client, self.parent_corpus, name)
 | 
			
		||||
        return Subcorpus(self.client, self.corpus, name)
 | 
			
		||||
 | 
			
		||||
    def list(self):
 | 
			
		||||
        return [Subcorpus(self.client, self.parent_corpus, subcorpus)
 | 
			
		||||
                for subcorpus
 | 
			
		||||
                in self.client.cqp_list_subcorpora(self.parent_corpus.name)]
 | 
			
		||||
        return [Subcorpus(self.client, self.corpus, subcorpus) for subcorpus in
 | 
			
		||||
                self.client.api.cqp_list_subcorpora(self.corpus.name)]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Subcorpus:
 | 
			
		||||
    def __init__(self, client, parent_corpus, name):
 | 
			
		||||
    def __init__(self, client, corpus, name):
 | 
			
		||||
        self.client = client
 | 
			
		||||
        self.parent_corpus = parent_corpus
 | 
			
		||||
        self.corpus = corpus
 | 
			
		||||
        self.name = name
 | 
			
		||||
        self.__name = '{}:{}'.format(parent_corpus.name, name)
 | 
			
		||||
        self.__load()
 | 
			
		||||
 | 
			
		||||
    def __load(self):
 | 
			
		||||
        self._name = '{}:{}'.format(corpus.name, name)
 | 
			
		||||
        self.fields = {}
 | 
			
		||||
        if self.client.cqp_subcorpus_has_field(self.__name, CONST_FIELD_MATCH):
 | 
			
		||||
        if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_MATCH):
 | 
			
		||||
            self.fields['match'] = CONST_FIELD_MATCH
 | 
			
		||||
        if self.client.cqp_subcorpus_has_field(self.__name,
 | 
			
		||||
                                               CONST_FIELD_MATCHEND):
 | 
			
		||||
        if client.api.cqp_subcorpus_has_field(self._name,
 | 
			
		||||
                                              CONST_FIELD_MATCHEND):
 | 
			
		||||
            self.fields['matchend'] = CONST_FIELD_MATCHEND
 | 
			
		||||
        if self.client.cqp_subcorpus_has_field(self.__name,
 | 
			
		||||
                                               CONST_FIELD_TARGET):
 | 
			
		||||
        if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_TARGET):
 | 
			
		||||
            self.fields['target'] = CONST_FIELD_TARGET
 | 
			
		||||
        if self.client.cqp_subcorpus_has_field(self.__name,
 | 
			
		||||
                                               CONST_FIELD_KEYWORD):
 | 
			
		||||
        if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_KEYWORD):
 | 
			
		||||
            self.fields['keyword'] = CONST_FIELD_KEYWORD
 | 
			
		||||
        self.size = self.client.cqp_subcorpus_size(self.__name)
 | 
			
		||||
        self.size = client.api.cqp_subcorpus_size(self._name)
 | 
			
		||||
 | 
			
		||||
    def drop(self):
 | 
			
		||||
        return self.client.cqp_drop_subcorpus(self.__name)
 | 
			
		||||
        return self.client.api.cqp_drop_subcorpus(self._name)
 | 
			
		||||
 | 
			
		||||
    def dump(self, field, first, last):
 | 
			
		||||
        return self.client.cqp_dump_subcorpus(self.__name, field, first, last)
 | 
			
		||||
        return self.client.api.cqp_dump_subcorpus(self._name, field, first,
 | 
			
		||||
                                                  last)
 | 
			
		||||
 | 
			
		||||
    def dump_values(self, context=25, first_result=0,
 | 
			
		||||
                    num_results=float('inf')):
 | 
			
		||||
@@ -58,57 +53,21 @@ class Subcorpus:
 | 
			
		||||
                                         last_result))
 | 
			
		||||
        for match_start, match_end in match_boundaries:
 | 
			
		||||
            left_start = max(0, match_start - context)
 | 
			
		||||
            right_end = min(self.parent_corpus.size, (match_end + 1 + context))
 | 
			
		||||
            right_end = min(self.corpus.size, (match_end + 1 + context))
 | 
			
		||||
            matches.append({'lc': list(range(left_start, match_start)),
 | 
			
		||||
                            'hit': list(range(match_start, match_end + 1)),
 | 
			
		||||
                            'rc': list(range(match_end + 1, right_end))})
 | 
			
		||||
        cpos_list = []
 | 
			
		||||
        for match in matches:
 | 
			
		||||
            cpos_list += match['lc'] + match['hit'] + match['rc']
 | 
			
		||||
        cpos_list = list(set(cpos_list))
 | 
			
		||||
        lookups = {}
 | 
			
		||||
        if len(cpos_list) > 0:
 | 
			
		||||
            lookups['cpos_lookup'] = {}
 | 
			
		||||
        for cpos in cpos_list:
 | 
			
		||||
            lookups['cpos_lookup'][cpos] = {}
 | 
			
		||||
        for attr in self.parent_corpus.positional_attributes:
 | 
			
		||||
            cpos_attr_values = self.parent_corpus.cpos2str(attr, cpos_list)
 | 
			
		||||
            for i, cpos in enumerate(cpos_list):
 | 
			
		||||
                lookups['cpos_lookup'][cpos][attr] = cpos_attr_values[i]
 | 
			
		||||
        for attr in self.parent_corpus.structural_attributes:
 | 
			
		||||
            if self.parent_corpus.structural_attribute_has_values(attr):
 | 
			
		||||
                continue
 | 
			
		||||
            cpos_attr_ids = self.parent_corpus.cpos2struc(attr, cpos_list)
 | 
			
		||||
            for i, cpos in enumerate(cpos_list):
 | 
			
		||||
                if cpos_attr_ids[i] != -1:
 | 
			
		||||
                    lookups['cpos_lookup'][cpos][attr] = cpos_attr_ids[i]
 | 
			
		||||
            occured_attr_ids = list(set(cpos_attr_ids))
 | 
			
		||||
            occured_attr_ids = list(filter(lambda x: x != -1,
 | 
			
		||||
                                           occured_attr_ids))
 | 
			
		||||
            if len(occured_attr_ids) == 0:
 | 
			
		||||
                continue
 | 
			
		||||
            attr_subattrs = \
 | 
			
		||||
                list(filter(lambda x: x.startswith(attr + '_'),
 | 
			
		||||
                            self.parent_corpus.structural_attributes))
 | 
			
		||||
            attr_subattrs = list(map(lambda x: x.split('_', 1)[1],
 | 
			
		||||
                                     attr_subattrs))
 | 
			
		||||
            if len(attr_subattrs) == 0:
 | 
			
		||||
                continue
 | 
			
		||||
            lookups[attr + '_lookup'] = {}
 | 
			
		||||
            for attr_id in occured_attr_ids:
 | 
			
		||||
                lookups[attr + '_lookup'][attr_id] = {}
 | 
			
		||||
            for subattr in attr_subattrs:
 | 
			
		||||
                __subattr = attr + '_' + subattr
 | 
			
		||||
                subattr_values = \
 | 
			
		||||
                    self.parent_corpus.struc2str(__subattr, occured_attr_ids)
 | 
			
		||||
                for i, value in enumerate(subattr_values):
 | 
			
		||||
                    lookups[attr + '_lookup'][occured_attr_ids[i]][subattr] = \
 | 
			
		||||
                        value
 | 
			
		||||
        lookups = self.corpus.lookups_by_cpos(cpos_list)
 | 
			
		||||
        return {'matches': matches, **lookups}
 | 
			
		||||
 | 
			
		||||
    def fdist_1(self, cutoff, field, attribute):
 | 
			
		||||
        return self.client.cqp_fdist_1(self.__name, cutoff, field, attribute)
 | 
			
		||||
        return self.client.api.cqp_fdist_1(self._name, cutoff, field,
 | 
			
		||||
                                           attribute._name)
 | 
			
		||||
 | 
			
		||||
    def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
 | 
			
		||||
        return self.client.cqp_fdist_2(self.__name, cutoff, field_1,
 | 
			
		||||
                                       attribute_1, field_2, attribute_2)
 | 
			
		||||
        return self.client.api.cqp_fdist_2(self._name, cutoff,
 | 
			
		||||
                                           field_1, attribute_1._name,
 | 
			
		||||
                                           field_2, attribute_2._name)
 | 
			
		||||
 
 | 
			
		||||
@@ -32,7 +32,6 @@ class CQiWrapper(APIClient):
 | 
			
		||||
        Connects via socket to the CQP server using the given username and
 | 
			
		||||
        password from class initiation.
 | 
			
		||||
        '''
 | 
			
		||||
        super(CQiWrapper, self).setup()
 | 
			
		||||
        self.ctrl_connect(self.username, self.password)
 | 
			
		||||
 | 
			
		||||
    def __create_attribute_strings(self):
 | 
			
		||||
@@ -79,7 +78,6 @@ class CQiWrapper(APIClient):
 | 
			
		||||
        Disconnects from the CQP server. Closes used socket after disconnect.
 | 
			
		||||
        '''
 | 
			
		||||
        self.ctrl_bye()
 | 
			
		||||
        super(CQiWrapper, self).teardown()
 | 
			
		||||
        print('Disconnected from cqp server.')
 | 
			
		||||
 | 
			
		||||
    def query_subcorpus(self, query, result_subcorpus_name='Query-results'):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user