mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 12:22:47 +00:00 
			
		
		
		
	Merge branch 'development' of gitlab.ub.uni-bielefeld.de:sfb1288inf/opaque into development
This commit is contained in:
		@@ -1,7 +1,6 @@
 | 
				
			|||||||
# flake8: noqa
 | 
					# flake8: noqa
 | 
				
			||||||
from .api import APIClient
 | 
					from .api import APIClient
 | 
				
			||||||
from .client import CQiClient
 | 
					from .client import CQiClient
 | 
				
			||||||
from .wrapper import CQiWrapper
 | 
					 | 
				
			||||||
from .version import version, version_info
 | 
					from .version import version, version_info
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -20,10 +20,11 @@ class APIClient:
 | 
				
			|||||||
    >>> client.ctrl_bye()
 | 
					    >>> client.ctrl_bye()
 | 
				
			||||||
    {'code': 259, 'msg': 'CQI_STATUS_BYE_OK'}
 | 
					    {'code': 259, 'msg': 'CQI_STATUS_BYE_OK'}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Args:
 | 
					    Attributes:
 | 
				
			||||||
    host (str): URL to the CQP server. For example,
 | 
					    host (str): URL to the CQP server. For example,
 | 
				
			||||||
        ``cqpserver.localhost`` or ``127.0.0.1``.
 | 
					        ``cqpserver.localhost`` or ``127.0.0.1``.
 | 
				
			||||||
    port (int): Port the CQP server listens on. Default: ``4877``
 | 
					    port (int): Port the CQP server listens on. Default: ``4877``
 | 
				
			||||||
 | 
					    socket (socket.socket): Socket for communicating with a CQP server.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, host, port=4877):
 | 
					    def __init__(self, host, port=4877):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -15,8 +15,9 @@ class CQiClient:
 | 
				
			|||||||
    {'code': 260, 'msg': 'CQI_STATUS_PING_OK'}
 | 
					    {'code': 260, 'msg': 'CQI_STATUS_PING_OK'}
 | 
				
			||||||
    >>> client.disconnect()
 | 
					    >>> client.disconnect()
 | 
				
			||||||
    {'code': 259, 'msg': 'CQI_STATUS_BYE_OK'}
 | 
					    {'code': 259, 'msg': 'CQI_STATUS_BYE_OK'}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Attributes:
 | 
					    Attributes:
 | 
				
			||||||
    api (APIClient): A client pointing to the specified to the CQP server.
 | 
					    api (APIClient): An API client pointing to the specified CQP server.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, host, port=4877):
 | 
					    def __init__(self, host, port=4877):
 | 
				
			||||||
@@ -32,12 +33,14 @@ class CQiClient:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def connect(self, username='anonymous', password=''):
 | 
					    def connect(self, username='anonymous', password=''):
 | 
				
			||||||
        status = self.api.ctrl_connect(username, password)
 | 
					        status = self.api.ctrl_connect(username, password)
 | 
				
			||||||
        self.corpora = CorpusCollection(self)
 | 
					 | 
				
			||||||
        return status
 | 
					        return status
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def disconnect(self):
 | 
					    def disconnect(self):
 | 
				
			||||||
        del self.corpora
 | 
					 | 
				
			||||||
        return self.api.ctrl_bye()
 | 
					        return self.api.ctrl_bye()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def ping(self):
 | 
					    def ping(self):
 | 
				
			||||||
        return self.api.ctrl_ping()
 | 
					        return self.api.ctrl_ping()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def corpora(self):
 | 
				
			||||||
 | 
					        return CorpusCollection(client=self)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,139 +1,119 @@
 | 
				
			|||||||
class AttributeCollection:
 | 
					from .ressource import Collection, Model
 | 
				
			||||||
    def __init__(self, client, corpus):
 | 
					 | 
				
			||||||
        self.client = client
 | 
					 | 
				
			||||||
        self.corpus = corpus
 | 
					 | 
				
			||||||
        self.alignment = AlignmentAttributeCollection(client, corpus)
 | 
					 | 
				
			||||||
        self.positional = PositionalAttributeCollection(client, corpus)
 | 
					 | 
				
			||||||
        self.structural = StructuralAttributeCollection(client, corpus)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Attribute:
 | 
					class Attribute(Model):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    This is a class representing an attribute. Attributes denote the general
 | 
					    This is a class representing an attribute. Attributes denote the general
 | 
				
			||||||
    category of information. A specific occurence is identified by an Id.
 | 
					    category of information. A specific occurence is identified by an Id.
 | 
				
			||||||
 | 
					 | 
				
			||||||
    Attributes:
 | 
					 | 
				
			||||||
    client (CQiClient): A connected client pointing at the server that this
 | 
					 | 
				
			||||||
        object is on.
 | 
					 | 
				
			||||||
    corpus (Corpus): The corpus, this attribute belongs to.
 | 
					 | 
				
			||||||
    name (str): The name of the Attribute.
 | 
					 | 
				
			||||||
    size (int): The number of occurences of this attribute within the corpus.
 | 
					 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, client, corpus, name):
 | 
					    id_attribute = 'api_name'
 | 
				
			||||||
        self.client = client
 | 
					
 | 
				
			||||||
        self.corpus = corpus
 | 
					    @staticmethod
 | 
				
			||||||
        self.name = name
 | 
					    def _attrs(client, corpus, name):
 | 
				
			||||||
        self._name = '{}.{}'.format(corpus.name, name)
 | 
					        api_name = '{}.{}'.format(corpus.attrs['api_name'], name)
 | 
				
			||||||
        self.size = client.api.cl_attribute_size(self._name)
 | 
					        return {'api_name': api_name,
 | 
				
			||||||
 | 
					                'name': name,
 | 
				
			||||||
 | 
					                'size': client.api.cl_attribute_size(api_name)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def drop(self):
 | 
					    def drop(self):
 | 
				
			||||||
        return self.client.api.cl_drop_attribute(self._name)
 | 
					        return self.client.api.cl_drop_attribute(self.attrs['api_name'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class AlignmentAttributeCollection:
 | 
					class AttributeCollection(Collection):
 | 
				
			||||||
    def __init__(self, client, corpus):
 | 
					    model = Attribute
 | 
				
			||||||
        self.client = client
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, client=None, corpus=None):
 | 
				
			||||||
 | 
					        super(AttributeCollection, self).__init__(client=client)
 | 
				
			||||||
        self.corpus = corpus
 | 
					        self.corpus = corpus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get(self, name):
 | 
					    def get(self, attribute_name):
 | 
				
			||||||
        return AlignmentAttribute(self.client, self.corpus, name)
 | 
					        return self.prepare_model(self.model._attrs(self.client, self.corpus,
 | 
				
			||||||
 | 
					                                                    attribute_name))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def list(self):
 | 
					    def list(self):
 | 
				
			||||||
        return [AlignmentAttribute(self.client, self.corpus, attr) for attr in
 | 
					        raise NotImplementedError
 | 
				
			||||||
                self.client.api.corpus_alignment_attributes(self.corpus.name)]
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class AlignmentAttribute(Attribute):
 | 
					class AlignmentAttribute(Attribute):
 | 
				
			||||||
    def cpos_by_ids(self, id_list):
 | 
					    def cpos_by_ids(self, id_list):
 | 
				
			||||||
        return self.client.api.cl_alg2cpos(self._name, id_list)
 | 
					        return self.client.api.cl_alg2cpos(self.attrs['api_name'], id_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def ids_by_cpos(self, cpos_list):
 | 
					    def ids_by_cpos(self, cpos_list):
 | 
				
			||||||
        return self.client.api.cl_cpos2alg(self._name, cpos_list)
 | 
					        return self.client.api.cl_cpos2alg(self.attrs['api_name'], cpos_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class PositionalAttributeCollection:
 | 
					class AlignmentAttributeCollection(AttributeCollection):
 | 
				
			||||||
    def __init__(self, client, corpus):
 | 
					    model = AlignmentAttribute
 | 
				
			||||||
        self.client = client
 | 
					 | 
				
			||||||
        self.corpus = corpus
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get(self, name):
 | 
					 | 
				
			||||||
        return PositionalAttribute(self.client, self.corpus, name)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def list(self):
 | 
					    def list(self):
 | 
				
			||||||
        return [PositionalAttribute(self.client, self.corpus, attr) for attr in
 | 
					        return [self.prepare_model(self.model._attrs(self.client, self.corpus, attr))  # noqa
 | 
				
			||||||
                self.client.api.corpus_positional_attributes(self.corpus.name)]
 | 
					                for attr in self.client.api.corpus_alignment_attributes(self.corpus.attrs['api_name'])]  # noqa
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class PositionalAttribute(Attribute):
 | 
					class PositionalAttribute(Attribute):
 | 
				
			||||||
    def __init__(self, client, corpus, name):
 | 
					    @staticmethod
 | 
				
			||||||
        super(PositionalAttribute, self).__init__(client, corpus, name)
 | 
					    def _attrs(client, corpus, name):
 | 
				
			||||||
        self.lexicon_size = client.api.cl_lexicon_size(self._name)
 | 
					        attrs = super(PositionalAttribute, PositionalAttribute)._attrs(client, corpus, name)  # noqa
 | 
				
			||||||
 | 
					        attrs['lexicon_size'] = client.api.cl_lexicon_size(attrs['api_name'])
 | 
				
			||||||
 | 
					        return attrs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def cpos_by_id(self, id):
 | 
					    def cpos_by_id(self, id):
 | 
				
			||||||
        return self.client.api.cl_id2cpos(self._name, id)
 | 
					        return self.client.api.cl_id2cpos(self.attrs['api_name'], id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def cpos_by_ids(self, id_list):
 | 
					    def cpos_by_ids(self, id_list):
 | 
				
			||||||
        return self.client.api.cl_idlist2cpos(self._name, id_list)
 | 
					        return self.client.api.cl_idlist2cpos(self.attrs['api_name'], id_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def freqs_by_ids(self, id_list):
 | 
					    def freqs_by_ids(self, id_list):
 | 
				
			||||||
        return self.client.api.cl_id2freq(self._name, id_list)
 | 
					        return self.client.api.cl_id2freq(self.attrs['api_name'], id_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def ids_by_cpos(self, cpos_list):
 | 
					    def ids_by_cpos(self, cpos_list):
 | 
				
			||||||
        return self.client.api.cl_cpos2id(self._name, cpos_list)
 | 
					        return self.client.api.cl_cpos2id(self.attrs['api_name'], cpos_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def ids_by_regex(self, regex):
 | 
					    def ids_by_regex(self, regex):
 | 
				
			||||||
        return self.client.api.cl_regex2id(self._name, regex)
 | 
					        return self.client.api.cl_regex2id(self.attrs['api_name'], regex)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def ids_by_values(self, value_list):
 | 
					    def ids_by_values(self, value_list):
 | 
				
			||||||
        return self.client.api.cl_str2id(self._name, value_list)
 | 
					        return self.client.api.cl_str2id(self.attrs['api_name'], value_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def values_by_cpos(self, cpos_list):
 | 
					    def values_by_cpos(self, cpos_list):
 | 
				
			||||||
        return self.client.api.cl_cpos2str(self._name, cpos_list)
 | 
					        return self.client.api.cl_cpos2str(self.attrs['api_name'], cpos_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def values_by_ids(self, id_list):
 | 
					    def values_by_ids(self, id_list):
 | 
				
			||||||
        return self.client.api.cl_id2str(self._name, id_list)
 | 
					        return self.client.api.cl_id2str(self.attrs['api_name'], id_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class StructuralAttributeCollection:
 | 
					class PositionalAttributeCollection(AttributeCollection):
 | 
				
			||||||
    def __init__(self, client, corpus):
 | 
					    model = PositionalAttribute
 | 
				
			||||||
        self.client = client
 | 
					 | 
				
			||||||
        self.corpus = corpus
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get(self, name):
 | 
					    def list(self):
 | 
				
			||||||
        return StructuralAttribute(self.client, self.corpus, name)
 | 
					        return [self.prepare_model(self.model._attrs(self.client, self.corpus, attr))  # noqa
 | 
				
			||||||
 | 
					                for attr in self.client.api.corpus_positional_attributes(self.corpus.attrs['api_name'])]  # noqa
 | 
				
			||||||
    def list(self, filters={}):
 | 
					 | 
				
			||||||
        attrs = [StructuralAttribute(self.client, self.corpus, attr) for attr
 | 
					 | 
				
			||||||
                 in self.client.api.corpus_structural_attributes(
 | 
					 | 
				
			||||||
                    self.corpus.name)]
 | 
					 | 
				
			||||||
        for k, v in filters.items():
 | 
					 | 
				
			||||||
            if k == 'part_of':
 | 
					 | 
				
			||||||
                attrs = list(filter(lambda x: x.name.startswith(v.name + '_'),
 | 
					 | 
				
			||||||
                                    attrs))
 | 
					 | 
				
			||||||
        return attrs
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class StructuralAttribute(Attribute):
 | 
					class StructuralAttribute(Attribute):
 | 
				
			||||||
    def __init__(self, client, corpus, name):
 | 
					    @staticmethod
 | 
				
			||||||
        super(StructuralAttribute, self).__init__(client, corpus, name)
 | 
					    def _attrs(client, corpus, name):
 | 
				
			||||||
        self.has_values = \
 | 
					        attrs = super(StructuralAttribute, StructuralAttribute)._attrs(client, corpus, name)  # noqa
 | 
				
			||||||
            client.api.corpus_structural_attribute_has_values(self._name)
 | 
					        attrs['has_values'] = client.api.corpus_structural_attribute_has_values(attrs['api_name'])  # noqa
 | 
				
			||||||
 | 
					        return attrs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def cpos_by_id(self, id):
 | 
					    def cpos_by_id(self, id):
 | 
				
			||||||
        return self.client.api.cl_struc2cpos(self._name, id)
 | 
					        return self.client.api.cl_struc2cpos(self.attrs['api_name'], id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def ids_by_cpos(self, cpos_list):
 | 
					    def ids_by_cpos(self, cpos_list):
 | 
				
			||||||
        return self.client.api.cl_cpos2struc(self._name, cpos_list)
 | 
					        return self.client.api.cl_cpos2struc(self.attrs['api_name'], cpos_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def lbound_by_cpos(self, cpos_list):
 | 
					    def lbound_by_cpos(self, cpos_list):
 | 
				
			||||||
        return self.client.api.cl_cpos2lbound(self._name, cpos_list)
 | 
					        return self.client.api.cl_cpos2lbound(self.attrs['api_name'],
 | 
				
			||||||
 | 
					                                              cpos_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def rbound_by_cpos(self, cpos_list):
 | 
					    def rbound_by_cpos(self, cpos_list):
 | 
				
			||||||
        return self.client.api.cl_cpos2rbound(self._name, cpos_list)
 | 
					        return self.client.api.cl_cpos2rbound(self.attrs['api_name'],
 | 
				
			||||||
 | 
					                                              cpos_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def values_by_ids(self, id_list):
 | 
					    def values_by_ids(self, id_list):
 | 
				
			||||||
        return self.client.api.cl_struc2str(self._name, id_list)
 | 
					        return self.client.api.cl_struc2str(self.attrs['api_name'], id_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def export(self, first_cpos, last_cpos, context=0, expand_lists=False):
 | 
					    def export(self, first_cpos, last_cpos, context=0, expand_lists=False):
 | 
				
			||||||
        first_id, last_id = self.ids_by_cpos([first_cpos, last_cpos])
 | 
					        first_id, last_id = self.ids_by_cpos([first_cpos, last_cpos])
 | 
				
			||||||
@@ -150,9 +130,10 @@ class StructuralAttribute(Attribute):
 | 
				
			|||||||
            else:
 | 
					            else:
 | 
				
			||||||
                cpos_list_lbound = first_cpos
 | 
					                cpos_list_lbound = first_cpos
 | 
				
			||||||
            rc_rbound = \
 | 
					            rc_rbound = \
 | 
				
			||||||
                self.cpos_by_id(min((last_id + context), (self.size - 1)))[1]
 | 
					                self.cpos_by_id(min((last_id + context), (self.attrs['size'] - 1)))[1]
 | 
				
			||||||
            if rc_rbound != last_cpos:
 | 
					            if rc_rbound != last_cpos:
 | 
				
			||||||
                rc_lbound = min((last_cpos + 1), (self.corpus.size - 1))
 | 
					                rc_lbound = min((last_cpos + 1),
 | 
				
			||||||
 | 
					                                (self.collection.corpus.attrs['size'] - 1))
 | 
				
			||||||
                rc = (rc_lbound, rc_rbound)
 | 
					                rc = (rc_lbound, rc_rbound)
 | 
				
			||||||
                cpos_list_rbound = rc_rbound
 | 
					                cpos_list_rbound = rc_rbound
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
@@ -164,5 +145,17 @@ class StructuralAttribute(Attribute):
 | 
				
			|||||||
                     'rc': list(range(rc[0], (rc[1] + 1)))}
 | 
					                     'rc': list(range(rc[0], (rc[1] + 1)))}
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            match = {'lc': lc, 'c': c, 'rc': rc}
 | 
					            match = {'lc': lc, 'c': c, 'rc': rc}
 | 
				
			||||||
        lookups = self.corpus.lookups_by_cpos(cpos_list)
 | 
					        lookups = self.collection.corpus.lookups_by_cpos(cpos_list)
 | 
				
			||||||
        return {'match': match, **lookups}
 | 
					        return {'match': match, **lookups}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class StructuralAttributeCollection(AttributeCollection):
 | 
				
			||||||
 | 
					    model = StructuralAttribute
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def list(self, filters={}):
 | 
				
			||||||
 | 
					        attrs = [self.prepare_model(self.model._attrs(self.client, self.corpus, attr))  # noqa
 | 
				
			||||||
 | 
					                 for attr in self.client.api.corpus_structural_attributes(self.corpus.attrs['api_name'])]  # noqa
 | 
				
			||||||
 | 
					        for k, v in filters.items():
 | 
				
			||||||
 | 
					            if k == 'part_of':
 | 
				
			||||||
 | 
					                attrs = list(filter(lambda x: x.attrs['name'].startswith(v.attrs['name'] + '_'), attrs))  # noqa
 | 
				
			||||||
 | 
					        return attrs
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,30 +1,24 @@
 | 
				
			|||||||
from .attributes import AttributeCollection
 | 
					from .attributes import (AlignmentAttributeCollection,
 | 
				
			||||||
 | 
					                         PositionalAttributeCollection,
 | 
				
			||||||
 | 
					                         StructuralAttributeCollection)
 | 
				
			||||||
 | 
					from .ressource import Collection, Model
 | 
				
			||||||
from .subcorpora import SubcorpusCollection
 | 
					from .subcorpora import SubcorpusCollection
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CorpusCollection:
 | 
					class Corpus(Model):
 | 
				
			||||||
    def __init__(self, client):
 | 
					    id_attribute = 'api_name'
 | 
				
			||||||
        self.client = client
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get(self, name):
 | 
					    @staticmethod
 | 
				
			||||||
        return Corpus(self.client, name)
 | 
					    def _attrs(client, name):
 | 
				
			||||||
 | 
					        api_name = name
 | 
				
			||||||
    def list(self):
 | 
					        return {'api_name': api_name,
 | 
				
			||||||
        return [Corpus(self.client, corpus) for corpus in
 | 
					                'name': name,
 | 
				
			||||||
                self.client.api.corpus_list_coprora()]
 | 
					                'size': client.api.cl_attribute_size(
 | 
				
			||||||
 | 
					                    '{}.word'.format(api_name)),
 | 
				
			||||||
 | 
					                # 'info': client.api.corpus_info(name),
 | 
				
			||||||
class Corpus:
 | 
					                'charset': client.api.corpus_charset(api_name),
 | 
				
			||||||
    def __init__(self, client, name):
 | 
					                # 'full_name' = client.api.corpus_full_name(name),
 | 
				
			||||||
        self.client = client
 | 
					                'properties': client.api.corpus_properties(api_name)}
 | 
				
			||||||
        self.name = name
 | 
					 | 
				
			||||||
        self.size = client.api.cl_attribute_size('{}.word'.format(name))
 | 
					 | 
				
			||||||
        # self.info = client.api.corpus_info(name)
 | 
					 | 
				
			||||||
        self.charset = client.api.corpus_charset(name)
 | 
					 | 
				
			||||||
        # self.full_name = client.api.corpus_full_name(name)
 | 
					 | 
				
			||||||
        self.properties = client.api.corpus_properties(name)
 | 
					 | 
				
			||||||
        self.attributes = AttributeCollection(client, self)
 | 
					 | 
				
			||||||
        self.subcorpora = SubcorpusCollection(client, self)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def lookups_by_cpos(self, cpos_list):
 | 
					    def lookups_by_cpos(self, cpos_list):
 | 
				
			||||||
        cpos_list = list(set(cpos_list))
 | 
					        cpos_list = list(set(cpos_list))
 | 
				
			||||||
@@ -33,35 +27,64 @@ class Corpus:
 | 
				
			|||||||
            lookups['cpos_lookup'] = {}
 | 
					            lookups['cpos_lookup'] = {}
 | 
				
			||||||
        for cpos in cpos_list:
 | 
					        for cpos in cpos_list:
 | 
				
			||||||
            lookups['cpos_lookup'][cpos] = {}
 | 
					            lookups['cpos_lookup'][cpos] = {}
 | 
				
			||||||
        for attr in self.attributes.positional.list():
 | 
					        for attr in self.positional_attributes.list():
 | 
				
			||||||
            cpos_attr_values = attr.values_by_cpos(cpos_list)
 | 
					            cpos_attr_values = attr.values_by_cpos(cpos_list)
 | 
				
			||||||
            for i, cpos in enumerate(cpos_list):
 | 
					            for i, cpos in enumerate(cpos_list):
 | 
				
			||||||
                lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
 | 
					                lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_values[i]
 | 
				
			||||||
        for attr in self.attributes.structural.list():
 | 
					        for attr in self.structural_attributes.list():
 | 
				
			||||||
            if attr.has_values:
 | 
					            if attr.attrs['has_values']:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
            cpos_attr_ids = attr.ids_by_cpos(cpos_list)
 | 
					            cpos_attr_ids = attr.ids_by_cpos(cpos_list)
 | 
				
			||||||
            for i, cpos in enumerate(cpos_list):
 | 
					            for i, cpos in enumerate(cpos_list):
 | 
				
			||||||
                if cpos_attr_ids[i] != -1:
 | 
					                if cpos_attr_ids[i] != -1:
 | 
				
			||||||
                    lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
 | 
					                    lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
 | 
				
			||||||
            occured_attr_ids = list(filter(lambda x: x != -1,
 | 
					            occured_attr_ids = list(filter(lambda x: x != -1,
 | 
				
			||||||
                                           set(cpos_attr_ids)))
 | 
					                                           set(cpos_attr_ids)))
 | 
				
			||||||
            if not occured_attr_ids:
 | 
					            if not occured_attr_ids:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
            subattrs = \
 | 
					            subattrs = \
 | 
				
			||||||
                self.attributes.structural.list(filters={'part_of': attr})
 | 
					                self.structural_attributes.list(filters={'part_of': attr})
 | 
				
			||||||
            if not subattrs:
 | 
					            if not subattrs:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
            lookup_name = '{}_lookup'.format(attr.name)
 | 
					            lookup_name = '{}_lookup'.format(attr.attrs['name'])
 | 
				
			||||||
            lookups[lookup_name] = {}
 | 
					            lookups[lookup_name] = {}
 | 
				
			||||||
            for attr_id in occured_attr_ids:
 | 
					            for attr_id in occured_attr_ids:
 | 
				
			||||||
                lookups[lookup_name][attr_id] = {}
 | 
					                lookups[lookup_name][attr_id] = {}
 | 
				
			||||||
            for subattr in subattrs:
 | 
					            for subattr in subattrs:
 | 
				
			||||||
                subattr_values = subattr.values_by_ids(occured_attr_ids)
 | 
					                subattr_values = subattr.values_by_ids(occured_attr_ids)
 | 
				
			||||||
                for i, subattr_value in enumerate(subattr_values):
 | 
					                for i, subattr_value in enumerate(subattr_values):
 | 
				
			||||||
                    lookups[lookup_name][occured_attr_ids[i]][subattr.name] = \
 | 
					                    subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):]
 | 
				
			||||||
 | 
					                    lookups[lookup_name][occured_attr_ids[i]][subattr_name] = \
 | 
				
			||||||
                        subattr_value
 | 
					                        subattr_value
 | 
				
			||||||
        return lookups
 | 
					        return lookups
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def query(self, query, subcorpus_name='Results'):
 | 
					    def query(self, query, subcorpus_name='Results'):
 | 
				
			||||||
        return self.client.api.cqp_query(self.name, subcorpus_name, query)
 | 
					        return self.client.api.cqp_query(self.attrs['api_name'],
 | 
				
			||||||
 | 
					                                         subcorpus_name, query)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def alignment_attributes(self):
 | 
				
			||||||
 | 
					        return AlignmentAttributeCollection(client=self.client, corpus=self)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def positional_attributes(self):
 | 
				
			||||||
 | 
					        return PositionalAttributeCollection(client=self.client, corpus=self)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def structural_attributes(self):
 | 
				
			||||||
 | 
					        return StructuralAttributeCollection(client=self.client, corpus=self)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def subcorpora(self):
 | 
				
			||||||
 | 
					        return SubcorpusCollection(client=self.client, corpus=self)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CorpusCollection(Collection):
 | 
				
			||||||
 | 
					    model = Corpus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get(self, corpus_name):
 | 
				
			||||||
 | 
					        return self.prepare_model(self.model._attrs(self.client, corpus_name))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def list(self):
 | 
				
			||||||
 | 
					        return [self.prepare_model(self.model._attrs(self.client, corpus))
 | 
				
			||||||
 | 
					                for corpus in self.client.api.corpus_list_coprora()]
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										78
									
								
								app/corpora/cqi/models/ressource.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								app/corpora/cqi/models/ressource.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,78 @@
 | 
				
			|||||||
 | 
					class Model:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    A base class for representing a single object on the server.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    id_attribute = 'Id'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, attrs=None, client=None, collection=None):
 | 
				
			||||||
 | 
					        #: A client pointing at the server that this object is on.
 | 
				
			||||||
 | 
					        self.client = client
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #: The collection that this model is part of.
 | 
				
			||||||
 | 
					        self.collection = collection
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #: The raw representation of this object from the API
 | 
				
			||||||
 | 
					        self.attrs = attrs or {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def _attrs(client, key):
 | 
				
			||||||
 | 
					        raise NotImplementedError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __repr__(self):
 | 
				
			||||||
 | 
					        return "<{}: {}>".format(self.__class__.__name__, self.id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __eq__(self, other):
 | 
				
			||||||
 | 
					        return isinstance(other, self.__class__) and self.id == other.id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __hash__(self):
 | 
				
			||||||
 | 
					        return hash("{}:{}".format(self.__class__.__name__, self.id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def id(self):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        The ID of the object.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        return self.attrs.get(self.id_attribute)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def reload(self):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Load this object from the server again and update ``attrs`` with the
 | 
				
			||||||
 | 
					        new data.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        new_model = self.collection.get(self.id)
 | 
				
			||||||
 | 
					        self.attrs = new_model.attrs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Collection:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    A base class for representing all objects of a particular type on the
 | 
				
			||||||
 | 
					    server.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #: The type of object this collection represents, set by subclasses
 | 
				
			||||||
 | 
					    model = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, client=None):
 | 
				
			||||||
 | 
					        #: The client pointing at the server that this collection of objects
 | 
				
			||||||
 | 
					        #: is on.
 | 
				
			||||||
 | 
					        self.client = client
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def list(self):
 | 
				
			||||||
 | 
					        raise NotImplementedError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get(self, key):
 | 
				
			||||||
 | 
					        raise NotImplementedError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def prepare_model(self, attrs):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Create a model from a set of attributes.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        if isinstance(attrs, Model):
 | 
				
			||||||
 | 
					            attrs.client = self.client
 | 
				
			||||||
 | 
					            attrs.collection = self
 | 
				
			||||||
 | 
					            return attrs
 | 
				
			||||||
 | 
					        elif isinstance(attrs, dict):
 | 
				
			||||||
 | 
					            return self.model(attrs=attrs, client=self.client, collection=self)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            raise Exception("Can't create {} from {}".format(
 | 
				
			||||||
 | 
					                self.model.__name__, attrs))
 | 
				
			||||||
@@ -1,54 +1,45 @@
 | 
				
			|||||||
 | 
					from .ressource import Collection, Model
 | 
				
			||||||
from ..api.specification import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH,
 | 
					from ..api.specification import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH,
 | 
				
			||||||
                                 CONST_FIELD_MATCHEND, CONST_FIELD_TARGET)
 | 
					                                 CONST_FIELD_MATCHEND, CONST_FIELD_TARGET)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class SubcorpusCollection:
 | 
					class Subcorpus(Model):
 | 
				
			||||||
    def __init__(self, client, corpus):
 | 
					    id_attribute = 'api_name'
 | 
				
			||||||
        self.client = client
 | 
					 | 
				
			||||||
        self.corpus = corpus
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get(self, name):
 | 
					    @staticmethod
 | 
				
			||||||
        return Subcorpus(self.client, self.corpus, name)
 | 
					    def _attrs(client, corpus, name):
 | 
				
			||||||
 | 
					        api_name = '{}:{}'.format(corpus.attrs['api_name'], name)
 | 
				
			||||||
    def list(self):
 | 
					        fields = {}
 | 
				
			||||||
        return [Subcorpus(self.client, self.corpus, subcorpus) for subcorpus in
 | 
					        if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_MATCH):
 | 
				
			||||||
                self.client.api.cqp_list_subcorpora(self.corpus.name)]
 | 
					            fields['match'] = CONST_FIELD_MATCH
 | 
				
			||||||
 | 
					        if client.api.cqp_subcorpus_has_field(api_name,
 | 
				
			||||||
 | 
					 | 
				
			||||||
class Subcorpus:
 | 
					 | 
				
			||||||
    def __init__(self, client, corpus, name):
 | 
					 | 
				
			||||||
        self.client = client
 | 
					 | 
				
			||||||
        self.corpus = corpus
 | 
					 | 
				
			||||||
        self.name = name
 | 
					 | 
				
			||||||
        self._name = '{}:{}'.format(corpus.name, name)
 | 
					 | 
				
			||||||
        self.fields = {}
 | 
					 | 
				
			||||||
        if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_MATCH):
 | 
					 | 
				
			||||||
            self.fields['match'] = CONST_FIELD_MATCH
 | 
					 | 
				
			||||||
        if client.api.cqp_subcorpus_has_field(self._name,
 | 
					 | 
				
			||||||
                                              CONST_FIELD_MATCHEND):
 | 
					                                              CONST_FIELD_MATCHEND):
 | 
				
			||||||
            self.fields['matchend'] = CONST_FIELD_MATCHEND
 | 
					            fields['matchend'] = CONST_FIELD_MATCHEND
 | 
				
			||||||
        if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_TARGET):
 | 
					        if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_TARGET):
 | 
				
			||||||
            self.fields['target'] = CONST_FIELD_TARGET
 | 
					            fields['target'] = CONST_FIELD_TARGET
 | 
				
			||||||
        if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_KEYWORD):
 | 
					        if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_KEYWORD):
 | 
				
			||||||
            self.fields['keyword'] = CONST_FIELD_KEYWORD
 | 
					            fields['keyword'] = CONST_FIELD_KEYWORD
 | 
				
			||||||
        self.size = client.api.cqp_subcorpus_size(self._name)
 | 
					        return {'api_name': api_name,
 | 
				
			||||||
 | 
					                'name': name,
 | 
				
			||||||
 | 
					                'fields': fields,
 | 
				
			||||||
 | 
					                'size': client.api.cqp_subcorpus_size(api_name)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def drop(self):
 | 
					    def drop(self):
 | 
				
			||||||
        return self.client.api.cqp_drop_subcorpus(self._name)
 | 
					        return self.client.api.cqp_drop_subcorpus(self.attrs['api_name'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def dump(self, field, first, last):
 | 
					    def dump(self, field, first, last):
 | 
				
			||||||
        return self.client.api.cqp_dump_subcorpus(self._name, field, first,
 | 
					        return self.client.api.cqp_dump_subcorpus(self.attrs['api_name'],
 | 
				
			||||||
                                                  last)
 | 
					                                                  field, first, last)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def export(self, context=25, cutoff=float('inf'), expand_lists=False,
 | 
					    def export(self, context=25, cutoff=float('inf'), expand_lists=False,
 | 
				
			||||||
               offset=0):
 | 
					               offset=0):
 | 
				
			||||||
        if self.size == 0:
 | 
					        if self.attrs['size'] == 0:
 | 
				
			||||||
            return {"matches": []}
 | 
					            return {"matches": []}
 | 
				
			||||||
        first_match = max(0, offset)
 | 
					        first_match = max(0, offset)
 | 
				
			||||||
        last_match = min((offset + cutoff - 1), (self.size - 1))
 | 
					        last_match = min((offset + cutoff - 1), (self.attrs['size'] - 1))
 | 
				
			||||||
        match_boundaries = \
 | 
					        match_boundaries = \
 | 
				
			||||||
            zip(self.dump(self.fields['match'], first_match, last_match),
 | 
					            zip(self.dump(self.attrs['fields']['match'], first_match, last_match),  # noqa
 | 
				
			||||||
                self.dump(self.fields['matchend'], first_match, last_match))
 | 
					                self.dump(self.attrs['fields']['matchend'], first_match, last_match))  # noqa
 | 
				
			||||||
        cpos_list = []
 | 
					        cpos_list = []
 | 
				
			||||||
        matches = []
 | 
					        matches = []
 | 
				
			||||||
        for match_start, match_end in match_boundaries:
 | 
					        for match_start, match_end in match_boundaries:
 | 
				
			||||||
@@ -64,10 +55,11 @@ class Subcorpus:
 | 
				
			|||||||
                    cpos_list_lbound = lc_lbound
 | 
					                    cpos_list_lbound = lc_lbound
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    cpos_list_lbound = match_start
 | 
					                    cpos_list_lbound = match_start
 | 
				
			||||||
                rc_lbound = min((match_end + 1), (self.corpus.size - 1))
 | 
					                rc_lbound = min((match_end + 1),
 | 
				
			||||||
 | 
					                                (self.collection.corpus.attrs['size'] - 1))
 | 
				
			||||||
                if rc_lbound != match_end:
 | 
					                if rc_lbound != match_end:
 | 
				
			||||||
                    rc_rbound = min((match_end + 1 + context),
 | 
					                    rc_rbound = min((match_end + 1 + context),
 | 
				
			||||||
                                    (self.corpus.size - 1))
 | 
					                                    (self.collection.corpus.attrs['size'] - 1))
 | 
				
			||||||
                    rc = (rc_lbound, rc_rbound)
 | 
					                    rc = (rc_lbound, rc_rbound)
 | 
				
			||||||
                    cpos_list_rbound = rc_rbound
 | 
					                    cpos_list_rbound = rc_rbound
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
@@ -81,14 +73,30 @@ class Subcorpus:
 | 
				
			|||||||
            else:
 | 
					            else:
 | 
				
			||||||
                match = {'lc': lc, 'c': c, 'rc': rc}
 | 
					                match = {'lc': lc, 'c': c, 'rc': rc}
 | 
				
			||||||
            matches.append(match)
 | 
					            matches.append(match)
 | 
				
			||||||
        lookups = self.corpus.lookups_by_cpos(cpos_list)
 | 
					        lookups = self.collection.corpus.lookups_by_cpos(cpos_list)
 | 
				
			||||||
        return {'matches': matches, **lookups}
 | 
					        return {'matches': matches, **lookups}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def fdist_1(self, cutoff, field, attribute):
 | 
					    def fdist_1(self, cutoff, field, attribute):
 | 
				
			||||||
        return self.client.api.cqp_fdist_1(self._name, cutoff,
 | 
					        return self.client.api.cqp_fdist_1(self.attrs['api_name'], cutoff,
 | 
				
			||||||
                                           field, attribute._name)
 | 
					                                           field, attribute._name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
 | 
					    def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
 | 
				
			||||||
        return self.client.api.cqp_fdist_2(self._name, cutoff,
 | 
					        return self.client.api.cqp_fdist_2(self.attrs['api_name'], cutoff,
 | 
				
			||||||
                                           field_1, attribute_1._name,
 | 
					                                           field_1, attribute_1._name,
 | 
				
			||||||
                                           field_2, attribute_2._name)
 | 
					                                           field_2, attribute_2._name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class SubcorpusCollection(Collection):
 | 
				
			||||||
 | 
					    model = Subcorpus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, client=None, corpus=None):
 | 
				
			||||||
 | 
					        super(SubcorpusCollection, self).__init__(client=client)
 | 
				
			||||||
 | 
					        self.corpus = corpus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get(self, subcorpus_name):
 | 
				
			||||||
 | 
					        return self.prepare_model(self.model._attrs(self.client, self.corpus,
 | 
				
			||||||
 | 
					                                                    subcorpus_name))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def list(self):
 | 
				
			||||||
 | 
					        return [self.prepare_model(self.model._attrs(self.client, self.corpus, subcorpus))  # noqa
 | 
				
			||||||
 | 
					                for subcorpus in self.client.api.cqp_list_subcorpora(self.corpus.attrs['api_name'])]  # noqa
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,321 +0,0 @@
 | 
				
			|||||||
from .api import APIClient
 | 
					 | 
				
			||||||
from .api.specification import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND
 | 
					 | 
				
			||||||
import time
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class CQiWrapper(APIClient):
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
    CQIiWrapper object
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    High level wrapper that groups and renames some functions of CQiClient
 | 
					 | 
				
			||||||
    for ease of use. Also structures recieved data into python dictionaries.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Keyword arguments:
 | 
					 | 
				
			||||||
    host -- host IP adress or hostname wher the cqp server is running
 | 
					 | 
				
			||||||
    port -- port of the cqp server
 | 
					 | 
				
			||||||
    username -- username used to connect to the cqp server
 | 
					 | 
				
			||||||
    password -- password of the user to connect to the cqp server
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    SUBCORPUS_NAMES = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __init__(self, host='127.0.0.1', port=4877, username='anonymous',
 | 
					 | 
				
			||||||
                 password=''):
 | 
					 | 
				
			||||||
        super(CQiWrapper, self).__init__(host, port=port)
 | 
					 | 
				
			||||||
        self.username = username
 | 
					 | 
				
			||||||
        self.password = password
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def connect(self):
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        Connect with CQP server
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Connects via socket to the CQP server using the given username and
 | 
					 | 
				
			||||||
        password from class initiation.
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        self.ctrl_connect(self.username, self.password)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __create_attribute_strings(self):
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        Creates all needed attribute strings to query for word, lemma etc. in
 | 
					 | 
				
			||||||
        the given corpus.
 | 
					 | 
				
			||||||
        For example: CORPUS_NAME.word to query words
 | 
					 | 
				
			||||||
        Automaticalle creates strings for all pre defined tags.
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        p_attrs = self.corpus_positional_attributes(self.corpus_name)
 | 
					 | 
				
			||||||
        struct_attrs = self.corpus_structural_attributes(self.corpus_name)
 | 
					 | 
				
			||||||
        self.attr_strings = {}
 | 
					 | 
				
			||||||
        self.attr_strings['positional_attrs'] = {}
 | 
					 | 
				
			||||||
        self.attr_strings['struct_attrs'] = {}
 | 
					 | 
				
			||||||
        for p_attr in p_attrs:
 | 
					 | 
				
			||||||
            self.attr_strings['positional_attrs'][p_attr] = (self.corpus_name
 | 
					 | 
				
			||||||
                                                             + '.'
 | 
					 | 
				
			||||||
                                                             + p_attr)
 | 
					 | 
				
			||||||
        for struct_attr in struct_attrs:
 | 
					 | 
				
			||||||
            self.attr_strings['struct_attrs'][struct_attr] = (self.corpus_name
 | 
					 | 
				
			||||||
                                                              + '.'
 | 
					 | 
				
			||||||
                                                              + struct_attr)
 | 
					 | 
				
			||||||
        print(('All positional and '
 | 
					 | 
				
			||||||
                        'structural attributes: {}').format(self.attr_strings))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def select_corpus(self, corpus_name):
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        Checks if given copus name exists. If it exists set it as the main
 | 
					 | 
				
			||||||
        corpus name used to create the needed query attribute strings like
 | 
					 | 
				
			||||||
        CORPUS_NAME.word.
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        if corpus_name in self.corpus_list_coprora():
 | 
					 | 
				
			||||||
            self.corpus_name = corpus_name
 | 
					 | 
				
			||||||
            self.__create_attribute_strings()
 | 
					 | 
				
			||||||
            print('{} does exist.'.format(corpus_name))
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            print('{} does not exist.'.format(corpus_name))
 | 
					 | 
				
			||||||
            raise Exception('Given Corpus Name is not in corpora list.')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def disconnect(self):
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        Disconnect from CQP server
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Disconnects from the CQP server. Closes used socket after disconnect.
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        self.ctrl_bye()
 | 
					 | 
				
			||||||
        print('Disconnected from cqp server.')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def query_subcorpus(self, query, result_subcorpus_name='Query-results'):
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        Create subcorpus
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Input query will be used to create a subcorpus holding all cpos match
 | 
					 | 
				
			||||||
        positions for that query.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Keyword arguments:
 | 
					 | 
				
			||||||
        result_subcorpus_name -- set name of the subcorpus which holds all
 | 
					 | 
				
			||||||
        cpos match positions, produced by the query
 | 
					 | 
				
			||||||
        query -- query written in cqp query language
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        self.query = query
 | 
					 | 
				
			||||||
        self.cqp_query(self.corpus_name, result_subcorpus_name, query)
 | 
					 | 
				
			||||||
        self.result_subcorpus = (self.corpus_name
 | 
					 | 
				
			||||||
                                 + ':'
 | 
					 | 
				
			||||||
                                 + result_subcorpus_name)
 | 
					 | 
				
			||||||
        self.SUBCORPUS_NAMES.append(self.result_subcorpus)
 | 
					 | 
				
			||||||
        self.match_count = self.cqp_subcorpus_size(self.result_subcorpus)
 | 
					 | 
				
			||||||
        print('Nr of all matches is: {}'.format(self.match_count))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def show_subcorpora(self):
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        Show all subcorpora currently saved by the cqp server.
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        return self.cqp_list_subcorpora(self.corpus_name)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def show_query_results(self,
 | 
					 | 
				
			||||||
                           context_len=10,
 | 
					 | 
				
			||||||
                           result_len=1000,
 | 
					 | 
				
			||||||
                           result_offset=0):
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        Show query results
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Shows the actual matched strings produce by the query. Uses the cpos
 | 
					 | 
				
			||||||
        match indexes to grab those strings. saves them into an orderd
 | 
					 | 
				
			||||||
        dictionary. Also saves coresponding tags, lemmas and context. Gets those
 | 
					 | 
				
			||||||
        informations using the corresponding cpos.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Keyword arguments:
 | 
					 | 
				
			||||||
        context_len -- defines how many words before and after a match will be
 | 
					 | 
				
			||||||
        shown (default 10)
 | 
					 | 
				
			||||||
        result_len -- defines for how many matches all informations like lemma
 | 
					 | 
				
			||||||
        and POS are being grabbed
 | 
					 | 
				
			||||||
        result_offset -- defines the offset of the matches being requested. If
 | 
					 | 
				
			||||||
        the offset is 100 informations for matches 100 to result_len are being
 | 
					 | 
				
			||||||
        grabbed
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        t0 = time.time()
 | 
					 | 
				
			||||||
        self.context_len = context_len
 | 
					 | 
				
			||||||
        self.corpus_max_len = self.cl_attribute_size(
 | 
					 | 
				
			||||||
                                   self.attr_strings['positional_attrs']['word']
 | 
					 | 
				
			||||||
                              )
 | 
					 | 
				
			||||||
        self.nr_matches = min(result_len, self.match_count)
 | 
					 | 
				
			||||||
        if self.match_count == 0:
 | 
					 | 
				
			||||||
            print('Query resulted in 0 matches.')
 | 
					 | 
				
			||||||
            self.results = {'code': 0,
 | 
					 | 
				
			||||||
                            'result': {'matches': [],
 | 
					 | 
				
			||||||
                                       'match_count': self.match_count,
 | 
					 | 
				
			||||||
                                       'cpos_lookup': {},
 | 
					 | 
				
			||||||
                                       'text_lookup': {}}
 | 
					 | 
				
			||||||
                            }
 | 
					 | 
				
			||||||
            return self.results
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            # Get match cpos boundries
 | 
					 | 
				
			||||||
            # match_boundries shows the start and end cpos of one match as a
 | 
					 | 
				
			||||||
            # pair of cpositions
 | 
					 | 
				
			||||||
            # [(1355, 1357), (1477, 1479)] Example for two boundry pairs
 | 
					 | 
				
			||||||
            offset_start = 0 if result_offset == 0 else result_offset
 | 
					 | 
				
			||||||
            print('Offset start is: {}'.format(offset_start))
 | 
					 | 
				
			||||||
            offset_end = min((self.nr_matches + result_offset - 1), self.match_count - 1)
 | 
					 | 
				
			||||||
            print('Offset end is: {}'.format(offset_end))
 | 
					 | 
				
			||||||
            match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus,
 | 
					 | 
				
			||||||
                                                           CONST_FIELD_MATCH,
 | 
					 | 
				
			||||||
                                                           offset_start,
 | 
					 | 
				
			||||||
                                                           offset_end),
 | 
					 | 
				
			||||||
                                   self.cqp_dump_subcorpus(self.result_subcorpus,
 | 
					 | 
				
			||||||
                                                           CONST_FIELD_MATCHEND,
 | 
					 | 
				
			||||||
                                                           offset_start,
 | 
					 | 
				
			||||||
                                                           offset_end))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Generate all cpos between match boundries including start and end
 | 
					 | 
				
			||||||
        # boundries.
 | 
					 | 
				
			||||||
        # Also generate cpos for left and right context.
 | 
					 | 
				
			||||||
        # Save those cpos into dict as lists for the keys 'lc', 'hit' and 'rc'
 | 
					 | 
				
			||||||
        # Also collect all cpos together in one list for the final request of
 | 
					 | 
				
			||||||
        # all cpos informations
 | 
					 | 
				
			||||||
        all_matches = []
 | 
					 | 
				
			||||||
        all_cpos = []
 | 
					 | 
				
			||||||
        for start, end in match_boundaries:
 | 
					 | 
				
			||||||
            end += 1
 | 
					 | 
				
			||||||
            lc_cpos = list(range(max([0, start - self.context_len]), start))
 | 
					 | 
				
			||||||
            lc = {'lc': lc_cpos}
 | 
					 | 
				
			||||||
            match_cpos = list(range(start, end))
 | 
					 | 
				
			||||||
            match = {'hit': match_cpos}
 | 
					 | 
				
			||||||
            rc_cpos = list(range(end, min([self.corpus_max_len,
 | 
					 | 
				
			||||||
                                           end + self.context_len])))
 | 
					 | 
				
			||||||
            rc = {'rc': rc_cpos}
 | 
					 | 
				
			||||||
            lc.update(match)
 | 
					 | 
				
			||||||
            lc.update(rc)
 | 
					 | 
				
			||||||
            all_cpos.extend(lc_cpos + match_cpos + rc_cpos)
 | 
					 | 
				
			||||||
            all_matches.append(lc)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        all_cpos = list(set(all_cpos))  # get rid of cpos duplicates
 | 
					 | 
				
			||||||
        len_all_cpos = len(all_cpos)
 | 
					 | 
				
			||||||
        t1 = time.time()
 | 
					 | 
				
			||||||
        t_total = t1 - t0
 | 
					 | 
				
			||||||
        print('Time to create all CPOS for query: {}'.format(t_total))
 | 
					 | 
				
			||||||
        print('Requesting {} CPOS with one query.'.format(len_all_cpos))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for
 | 
					 | 
				
			||||||
        # all cpos entries in all_cpos_list
 | 
					 | 
				
			||||||
        # Also saves these informations into self.results dict
 | 
					 | 
				
			||||||
        t2 = time.time()
 | 
					 | 
				
			||||||
        all_cpos_infos, text_lookup = self.get_cpos_infos(all_cpos)
 | 
					 | 
				
			||||||
        t3 = time.time()
 | 
					 | 
				
			||||||
        t_final = t3 - t2
 | 
					 | 
				
			||||||
        print('Got infos for {} CPOS in {} seconds:'.format(len_all_cpos,
 | 
					 | 
				
			||||||
                                                                     t_final))
 | 
					 | 
				
			||||||
        self.results = {'code': 0,
 | 
					 | 
				
			||||||
                        'result': {'matches': all_matches,
 | 
					 | 
				
			||||||
                                   'match_count': self.match_count,
 | 
					 | 
				
			||||||
                                   'cpos_lookup': all_cpos_infos,
 | 
					 | 
				
			||||||
                                   'text_lookup': text_lookup}
 | 
					 | 
				
			||||||
                        }
 | 
					 | 
				
			||||||
        return self.results
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_cpos_infos(self, all_cpos):
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for
 | 
					 | 
				
			||||||
        all cpos entries specified in the parameter all_cpos.
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        # Get all positional attribute informations
 | 
					 | 
				
			||||||
        cpos_infos = {}
 | 
					 | 
				
			||||||
        for p_attr_key in self.attr_strings['positional_attrs'].keys():
 | 
					 | 
				
			||||||
            match_strs = self.cl_cpos2str(self.attr_strings['positional_attrs'][p_attr_key], all_cpos)
 | 
					 | 
				
			||||||
            cpos_infos[p_attr_key] = match_strs
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Get all strucutural attribute informations
 | 
					 | 
				
			||||||
        tmp_info = {}
 | 
					 | 
				
			||||||
        structs_to_check = []
 | 
					 | 
				
			||||||
        for struct_attr_key in self.attr_strings['struct_attrs'].keys():
 | 
					 | 
				
			||||||
            key = self.attr_strings['struct_attrs'][struct_attr_key]
 | 
					 | 
				
			||||||
            has_value = self.corpus_structural_attribute_has_values(key)
 | 
					 | 
				
			||||||
            struct_ids = self.cl_cpos2struc(key, all_cpos)
 | 
					 | 
				
			||||||
            if has_value is False:  # Get IDs of strucutural elements without values (this means get IDs of XML tags. Struct elements only have values if they are XML attributes)
 | 
					 | 
				
			||||||
                tmp_info[struct_attr_key] = []
 | 
					 | 
				
			||||||
                for id in struct_ids:
 | 
					 | 
				
			||||||
                    tmp_info[struct_attr_key].append(id)
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                structs_to_check.append({key: struct_attr_key})
 | 
					 | 
				
			||||||
        print('Structs to check: {}'.format(structs_to_check))
 | 
					 | 
				
			||||||
        struct_attr_values = list(tmp_info.values())
 | 
					 | 
				
			||||||
        # print('Struct attr value list: {}'.format(struct_attr_values))
 | 
					 | 
				
			||||||
        struct_attr_keys = list(tmp_info.keys())
 | 
					 | 
				
			||||||
        # print('Struct attr key list: {}'.format(struct_attr_keys))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Build textlookup dictionary
 | 
					 | 
				
			||||||
        text_lookup_ids = list(set(struct_attr_values[0]))  # every CPOS is associated with one text id. A set is build to only gather text_lookup informations for every unique text id
 | 
					 | 
				
			||||||
        text_lookup = {}  # final dict containing all info of one text identified by its id
 | 
					 | 
				
			||||||
        for d in structs_to_check:
 | 
					 | 
				
			||||||
            s_key, s_value = zip(*d.items())
 | 
					 | 
				
			||||||
            print('dict entries: {}: {}'.format(s_key, s_value))
 | 
					 | 
				
			||||||
            s_value = s_value[0].split('_', 1)[-1]
 | 
					 | 
				
			||||||
            print('S_VALUE: {}'.format(s_value))
 | 
					 | 
				
			||||||
            struct_values = self.cl_struc2str(s_key[0], text_lookup_ids)
 | 
					 | 
				
			||||||
            print('Extracted Value with key {}: {}'.format(s_key[0], struct_values))
 | 
					 | 
				
			||||||
            zipped = dict(zip(text_lookup_ids, struct_values))
 | 
					 | 
				
			||||||
            for zip_key, zip_value in zipped.items():
 | 
					 | 
				
			||||||
                print('Text id as key is: {}'.format(zip_key))
 | 
					 | 
				
			||||||
                print('Value of this text is: {}'.format(zip_value))
 | 
					 | 
				
			||||||
                check = text_lookup.get(zip_key)
 | 
					 | 
				
			||||||
                print('check: {}'.format(check))
 | 
					 | 
				
			||||||
                if check is None:
 | 
					 | 
				
			||||||
                    text_lookup[zip_key] = {s_value: zip_value}
 | 
					 | 
				
			||||||
                else:
 | 
					 | 
				
			||||||
                    text_lookup[zip_key].update({s_value: zip_value})
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # zip keys and values together
 | 
					 | 
				
			||||||
        attr_values_list = []
 | 
					 | 
				
			||||||
        attr_keys_list = []
 | 
					 | 
				
			||||||
        for key in cpos_infos.keys():
 | 
					 | 
				
			||||||
            attr_values_list.append(cpos_infos[key])
 | 
					 | 
				
			||||||
            attr_keys_list.append(key)
 | 
					 | 
				
			||||||
        attr_keys_list.extend(struct_attr_keys)
 | 
					 | 
				
			||||||
        attr_values_list.extend(struct_attr_values)
 | 
					 | 
				
			||||||
        joined_cpos_infos = zip(all_cpos, *attr_values_list)
 | 
					 | 
				
			||||||
        dict_cpos_infos = {}
 | 
					 | 
				
			||||||
        for info in joined_cpos_infos:
 | 
					 | 
				
			||||||
            dict_cpos_infos[info[0]] = dict(zip(attr_keys_list, info[1:]))
 | 
					 | 
				
			||||||
        return dict_cpos_infos, text_lookup
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_sentences(self,
 | 
					 | 
				
			||||||
                      match_cpos_list,
 | 
					 | 
				
			||||||
                      get_surrounding_s=False,
 | 
					 | 
				
			||||||
                      l_r_s_context_additional_len=1):
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        Get sentence informations for one match also set if and how much left
 | 
					 | 
				
			||||||
        right context sentences should be grabbed surrounding the given CPOS.
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
        t0 = time.time()
 | 
					 | 
				
			||||||
        key = self.corpus_name + '.s'
 | 
					 | 
				
			||||||
        first_cpos, last_cpos = match_cpos_list[0], match_cpos_list[-1]
 | 
					 | 
				
			||||||
        context_sentences = {}
 | 
					 | 
				
			||||||
        s_ids = self.cl_cpos2struc(key, [first_cpos, last_cpos])
 | 
					 | 
				
			||||||
        print('s id match: {}'.format(s_ids))
 | 
					 | 
				
			||||||
        for s_id in s_ids:
 | 
					 | 
				
			||||||
            s_start, s_end = self.cl_struc2cpos(key, s_id)
 | 
					 | 
				
			||||||
            s_cpos = list(range(s_start, s_end + 1))
 | 
					 | 
				
			||||||
            context_sentences[s_id] = s_cpos
 | 
					 | 
				
			||||||
        if get_surrounding_s:
 | 
					 | 
				
			||||||
            max_s_id = self.cl_attribute_size(key) - 1
 | 
					 | 
				
			||||||
            print('max sid: {}'.format(max_s_id))
 | 
					 | 
				
			||||||
            additional_s_ids = []
 | 
					 | 
				
			||||||
            additional_s = list(range(max(s_ids[0]
 | 
					 | 
				
			||||||
                                          - l_r_s_context_additional_len,
 | 
					 | 
				
			||||||
                                          0),
 | 
					 | 
				
			||||||
                                      min(s_ids[-1]
 | 
					 | 
				
			||||||
                                          + l_r_s_context_additional_len,
 | 
					 | 
				
			||||||
                                          max_s_id) + 1))
 | 
					 | 
				
			||||||
            additional_s_ids.extend(additional_s)
 | 
					 | 
				
			||||||
            for s_id in additional_s_ids:
 | 
					 | 
				
			||||||
                print('s id additional: {}'.format(s_id))
 | 
					 | 
				
			||||||
                s_start, s_end = self.cl_struc2cpos(key, s_id)
 | 
					 | 
				
			||||||
                s_cpos = list(range(s_start, s_end + 1))
 | 
					 | 
				
			||||||
                context_sentences[s_id] = s_cpos
 | 
					 | 
				
			||||||
        all_cpos = []
 | 
					 | 
				
			||||||
        for key in context_sentences.keys():
 | 
					 | 
				
			||||||
            all_cpos.extend(context_sentences[key])
 | 
					 | 
				
			||||||
        all_cpos = list(set(all_cpos))
 | 
					 | 
				
			||||||
        all_cpos_infos, text_lookup = self.get_cpos_infos(all_cpos)
 | 
					 | 
				
			||||||
        t1 = time.time()
 | 
					 | 
				
			||||||
        t_total = t1 - t0
 | 
					 | 
				
			||||||
        print('Got all sentences informations in {} seconds'. format(t_total))
 | 
					 | 
				
			||||||
        match_context = {'context_s_cpos': context_sentences,
 | 
					 | 
				
			||||||
                         'cpos_lookup': all_cpos_infos,
 | 
					 | 
				
			||||||
                         'text_lookup': text_lookup,
 | 
					 | 
				
			||||||
                         'match_cpos_list': match_cpos_list}
 | 
					 | 
				
			||||||
        return match_context
 | 
					 | 
				
			||||||
@@ -67,23 +67,23 @@ def corpus_analysis_query(query):
 | 
				
			|||||||
        socketio.emit('corpus_analysis_query', response, room=request.sid)
 | 
					        socketio.emit('corpus_analysis_query', response, room=request.sid)
 | 
				
			||||||
        return
 | 
					        return
 | 
				
			||||||
    response = {'code': 200, 'desc': None, 'msg': 'OK',
 | 
					    response = {'code': 200, 'desc': None, 'msg': 'OK',
 | 
				
			||||||
                'payload': {**query_status, 'match_count': results.size}}
 | 
					                'payload': {**query_status, 'match_count': results.attrs['size']}}
 | 
				
			||||||
    socketio.emit('corpus_analysis_query', response, room=request.sid)
 | 
					    socketio.emit('corpus_analysis_query', response, room=request.sid)
 | 
				
			||||||
    chunk_size = 100
 | 
					    chunk_size = 100
 | 
				
			||||||
    chunk_start = 0
 | 
					    chunk_start = 0
 | 
				
			||||||
    context = 100
 | 
					    context = 100
 | 
				
			||||||
    progress = 0
 | 
					    progress = 0
 | 
				
			||||||
    client.status = 'running'
 | 
					    client.status = 'running'
 | 
				
			||||||
    while chunk_start <= results.size:
 | 
					    while chunk_start <= results.attrs['size']:
 | 
				
			||||||
        if client.status == 'abort':
 | 
					        if client.status == 'abort':
 | 
				
			||||||
            break
 | 
					            break
 | 
				
			||||||
        chunk = results.export(context=context, cutoff=chunk_size,
 | 
					        chunk = results.export(context=context, cutoff=chunk_size,
 | 
				
			||||||
                               expand_lists=False, offset=chunk_start)
 | 
					                               expand_lists=False, offset=chunk_start)
 | 
				
			||||||
        chunk['cpos_ranges'] = True
 | 
					        chunk['cpos_ranges'] = True
 | 
				
			||||||
        if (results.size == 0):
 | 
					        if (results.attrs['size'] == 0):
 | 
				
			||||||
            progress = 100
 | 
					            progress = 100
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            progress = ((chunk_start + chunk_size) / results.size) * 100
 | 
					            progress = ((chunk_start + chunk_size) / results.attrs['size']) * 100
 | 
				
			||||||
            progress = min(100, int(math.ceil(progress)))
 | 
					            progress = min(100, int(math.ceil(progress)))
 | 
				
			||||||
        response = {'code': 200, 'desc': None, 'msg': 'OK',
 | 
					        response = {'code': 200, 'desc': None, 'msg': 'OK',
 | 
				
			||||||
                    'payload': {'chunk': chunk, 'progress': progress}}
 | 
					                    'payload': {'chunk': chunk, 'progress': progress}}
 | 
				
			||||||
@@ -104,12 +104,12 @@ def corpus_analysis_inspect_match(payload):
 | 
				
			|||||||
        return
 | 
					        return
 | 
				
			||||||
    # Get more context for given match CPOS
 | 
					    # Get more context for given match CPOS
 | 
				
			||||||
    corpus = client.corpora.get('CORPUS')
 | 
					    corpus = client.corpora.get('CORPUS')
 | 
				
			||||||
    s = corpus.attributes.structural.get('s')
 | 
					    s = corpus.structural_attributes.get('s')
 | 
				
			||||||
    match_context = s.export(payload['first_cpos'], payload['last_cpos'],
 | 
					    match_context = s.export(payload['first_cpos'], payload['last_cpos'],
 | 
				
			||||||
                             context=3, expand_lists=False)
 | 
					                             context=3, expand_lists=False)
 | 
				
			||||||
    match_context['cpos_ranges'] = True
 | 
					    match_context['cpos_ranges'] = True
 | 
				
			||||||
    socketio.emit('match_context',
 | 
					    socketio.emit('match_context', {'payload': match_context},
 | 
				
			||||||
                  {'payload': match_context}, room=request.sid)
 | 
					                  room=request.sid)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
 | 
					def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -381,7 +381,7 @@ class ResultsList extends List {
 | 
				
			|||||||
      token = chunk["cpos_lookup"][cpos];
 | 
					      token = chunk["cpos_lookup"][cpos];
 | 
				
			||||||
      hitCellElement.insertAdjacentHTML("beforeend", `<span class="token" data-cpos="${cpos}">${token["word"]} </span>`);
 | 
					      hitCellElement.insertAdjacentHTML("beforeend", `<span class="token" data-cpos="${cpos}">${token["word"]} </span>`);
 | 
				
			||||||
      // get text titles of every hit cpos token
 | 
					      // get text titles of every hit cpos token
 | 
				
			||||||
      textTitles.add(chunk["text_lookup"][token["text"]]["text_title"]);
 | 
					      textTitles.add(chunk["text_lookup"][token["text"]]["title"]);
 | 
				
			||||||
      // add button to trigger more context to every match td
 | 
					      // add button to trigger more context to every match td
 | 
				
			||||||
      var inspectBtn = document.createElement("a");
 | 
					      var inspectBtn = document.createElement("a");
 | 
				
			||||||
      inspectBtn.setAttribute("class", "btn-floating btn-flat waves-effect waves-light grey right inspect disabled");
 | 
					      inspectBtn.setAttribute("class", "btn-floating btn-flat waves-effect waves-light grey right inspect disabled");
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user