mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-03 20:02:47 +00:00 
			
		
		
		
	use cqi from pypi
This commit is contained in:
		@@ -1,8 +0,0 @@
 | 
			
		||||
# flake8: noqa
 | 
			
		||||
from .api import APIClient
 | 
			
		||||
from .client import CQiClient
 | 
			
		||||
from .version import version, version_info
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__title__ = 'CQi'
 | 
			
		||||
__version__ = version
 | 
			
		||||
@@ -1,2 +0,0 @@
 | 
			
		||||
# flake8: noqa
 | 
			
		||||
from .client import APIClient
 | 
			
		||||
@@ -1,605 +0,0 @@
 | 
			
		||||
from time import sleep
 | 
			
		||||
from . import specification
 | 
			
		||||
from ..errors import cl_error_lookup, error_lookup, cqp_error_lookup
 | 
			
		||||
import socket
 | 
			
		||||
import struct
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class APIClient:
 | 
			
		||||
    """
 | 
			
		||||
    A low-level client for the IMS Open Corpus Workbench (CWB) corpus query
 | 
			
		||||
    interface (CQi) API.
 | 
			
		||||
 | 
			
		||||
    Example:
 | 
			
		||||
    >>> import cqi
 | 
			
		||||
    >>> client = cqi.APIClient('127.0.0.1')
 | 
			
		||||
    >>> client.ctrl_connect('user', 'password')
 | 
			
		||||
    {'code': 258, 'msg': 'CQI_STATUS_CONNECT_OK'}
 | 
			
		||||
    >>> client.ctrl_ping()
 | 
			
		||||
    {'code': 260, 'msg': 'CQI_STATUS_PING_OK'}
 | 
			
		||||
    >>> client.ctrl_bye()
 | 
			
		||||
    {'code': 259, 'msg': 'CQI_STATUS_BYE_OK'}
 | 
			
		||||
 | 
			
		||||
    Attributes:
 | 
			
		||||
    host (str): URL to the CQP server. For example,
 | 
			
		||||
        ``cqpserver.localhost`` or ``127.0.0.1``.
 | 
			
		||||
    port (int): Port the CQP server listens on. Default: ``4877``
 | 
			
		||||
    socket (socket.socket): Socket for communicating with a CQP server.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, host, port=4877):
 | 
			
		||||
        self.host = host
 | 
			
		||||
        self.port = port
 | 
			
		||||
        self.socket = socket.socket()
 | 
			
		||||
 | 
			
		||||
    def ctrl_connect(self, username, password):
 | 
			
		||||
        self.socket.connect((self.host, self.port))
 | 
			
		||||
        # INPUT: (STRING username, STRING password)
 | 
			
		||||
        # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
 | 
			
		||||
        self.__send_WORD(specification.CTRL_CONNECT)
 | 
			
		||||
        self.__send_STRING(username)
 | 
			
		||||
        self.__send_STRING(password)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def ctrl_bye(self):
 | 
			
		||||
        # INPUT: ()
 | 
			
		||||
        # OUTPUT: CQI_STATUS_BYE_OK
 | 
			
		||||
        self.__send_WORD(specification.CTRL_BYE)
 | 
			
		||||
        response = self.__recv_response()
 | 
			
		||||
        self.socket.close()
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
    def ctrl_user_abort(self):
 | 
			
		||||
        # INPUT: ()
 | 
			
		||||
        # OUTPUT:
 | 
			
		||||
        self.__send_WORD(specification.CTRL_USER_ABORT)
 | 
			
		||||
 | 
			
		||||
    def ctrl_ping(self):
 | 
			
		||||
        # INPUT: ()
 | 
			
		||||
        # OUTPUT: CQI_STATUS_PING_OK
 | 
			
		||||
        self.__send_WORD(specification.CTRL_PING)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def ctrl_last_general_error(self):
 | 
			
		||||
        # INPUT: ()
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING
 | 
			
		||||
        # full-text error message for the last general error reported by the
 | 
			
		||||
        # CQi server
 | 
			
		||||
        self.__send_WORD(specification.CTRL_LAST_GENERAL_ERROR)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def ask_feature_cqi_1_0(self):
 | 
			
		||||
        # INPUT: ()
 | 
			
		||||
        # OUTPUT: CQI_DATA_BOOL
 | 
			
		||||
        self.__send_WORD(specification.ASK_FEATURE_CQI_1_0)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def ask_feature_cl_2_3(self):
 | 
			
		||||
        # INPUT: ()
 | 
			
		||||
        # OUTPUT: CQI_DATA_BOOL
 | 
			
		||||
        self.__send_WORD(specification.ASK_FEATURE_CL_2_3)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def ask_feature_cqp_2_3(self):
 | 
			
		||||
        # INPUT: ()
 | 
			
		||||
        # OUTPUT: CQI_DATA_BOOL
 | 
			
		||||
        self.__send_WORD(specification.ASK_FEATURE_CL_2_3)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def corpus_list_coprora(self):
 | 
			
		||||
        # INPUT: ()
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
        self.__send_WORD(specification.CORPUS_LIST_CORPORA)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def corpus_charset(self, corpus):
 | 
			
		||||
        # INPUT: (STRING corpus)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING
 | 
			
		||||
        self.__send_WORD(specification.CORPUS_CHARSET)
 | 
			
		||||
        self.__send_STRING(corpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def corpus_properties(self, corpus):
 | 
			
		||||
        # INPUT: (STRING corpus)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
        self.__send_WORD(specification.CORPUS_PROPERTIES)
 | 
			
		||||
        self.__send_STRING(corpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def corpus_positional_attributes(self, corpus):
 | 
			
		||||
        # INPUT: (STRING corpus)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
        self.__send_WORD(specification.CORPUS_POSITIONAL_ATTRIBUTES)
 | 
			
		||||
        self.__send_STRING(corpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def corpus_structural_attributes(self, corpus):
 | 
			
		||||
        # INPUT: (STRING corpus)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
        self.__send_WORD(specification.CORPUS_STRUCTURAL_ATTRIBUTES)
 | 
			
		||||
        self.__send_STRING(corpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def corpus_structural_attribute_has_values(self, attribute):
 | 
			
		||||
        # INPUT: (STRING attribute)
 | 
			
		||||
        # OUTPUT: CQI_DATA_BOOL
 | 
			
		||||
        self.__send_WORD(specification.CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def corpus_alignment_attributes(self, corpus):
 | 
			
		||||
        # INPUT: (STRING corpus)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
        self.__send_WORD(specification.CORPUS_ALIGNMENT_ATTRIBUTES)
 | 
			
		||||
        self.__send_STRING(corpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def corpus_full_name(self, corpus):
 | 
			
		||||
        # INPUT: (STRING corpus)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING
 | 
			
		||||
        # the full name of <corpus> as specified in its registry entry
 | 
			
		||||
        self.__send_WORD(specification.CORPUS_FULL_NAME)
 | 
			
		||||
        self.__send_STRING(corpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def corpus_info(self, corpus):
 | 
			
		||||
        # INPUT: (STRING corpus)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
        # returns the contents of the .info file of <corpus> as a list of lines
 | 
			
		||||
        self.__send_WORD(specification.CORPUS_INFO)
 | 
			
		||||
        self.__send_STRING(corpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def corpus_drop_corpus(self, corpus):
 | 
			
		||||
        # INPUT: (STRING corpus)
 | 
			
		||||
        # OUTPUT: CQI_STATUS_OK
 | 
			
		||||
        # try to unload a corpus and all its attributes from memory
 | 
			
		||||
        self.__send_WORD(specification.CORPUS_DROP_CORPUS)
 | 
			
		||||
        self.__send_STRING(corpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_attribute_size(self, attribute):
 | 
			
		||||
        # INPUT: (STRING attribute)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT
 | 
			
		||||
        # returns the size of <attribute>:
 | 
			
		||||
        #     number of tokens        (positional)
 | 
			
		||||
        #     number of regions       (structural)
 | 
			
		||||
        #     number of alignments    (alignment)
 | 
			
		||||
        self.__send_WORD(specification.CL_ATTRIBUTE_SIZE)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_lexicon_size(self, attribute):
 | 
			
		||||
        # INPUT: (STRING attribute)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT
 | 
			
		||||
        # returns the number of entries in the lexicon of a positional
 | 
			
		||||
        # attribute;
 | 
			
		||||
        # valid lexicon IDs range from 0 .. (lexicon_size - 1)
 | 
			
		||||
        self.__send_WORD(specification.CL_LEXICON_SIZE)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_drop_attribute(self, attribute):
 | 
			
		||||
        # INPUT: (STRING attribute)
 | 
			
		||||
        # OUTPUT: CQI_STATUS_OK
 | 
			
		||||
        # unload attribute from memory
 | 
			
		||||
        self.__send_WORD(specification.CL_DROP_ATTRIBUTE)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    " NOTE: simple (scalar) mappings are applied to lists (the returned list
 | 
			
		||||
    "       has exactly the same length as the list passed as an argument)
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def cl_str2id(self, attribute, strings):
 | 
			
		||||
        # INPUT: (STRING attribute, STRING_LIST strings)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns -1 for every string in <strings> that is not found in the
 | 
			
		||||
        # lexicon
 | 
			
		||||
        self.__send_WORD(specification.CL_STR2ID)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_STRING_LIST(strings)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_id2str(self, attribute, id):
 | 
			
		||||
        # INPUT: (STRING attribute, INT_LIST id)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
        # returns "" for every ID in <id> that is out of range
 | 
			
		||||
        self.__send_WORD(specification.CL_ID2STR)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT_LIST(id)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_id2freq(self, attribute, id):
 | 
			
		||||
        # INPUT: (STRING attribute, INT_LIST id)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns 0 for every ID in <id> that is out of range
 | 
			
		||||
        self.__send_WORD(specification.CL_ID2FREQ)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT_LIST(id)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_cpos2id(self, attribute, cpos):
 | 
			
		||||
        # INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns -1 for every corpus position in <cpos> that is out of range
 | 
			
		||||
        self.__send_WORD(specification.CL_ID2FREQ)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT_LIST(cpos)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_cpos2str(self, attribute, cpos):
 | 
			
		||||
        # INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
        # returns "" for every corpus position in <cpos> that is out of range
 | 
			
		||||
        self.__send_WORD(specification.CL_CPOS2STR)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT_LIST(cpos)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_cpos2struc(self, attribute, cpos):
 | 
			
		||||
        # INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns -1 for every corpus position not inside a structure region
 | 
			
		||||
        self.__send_WORD(specification.CL_CPOS2STRUC)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT_LIST(cpos)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    " NOTE: temporary addition for the Euralex2000 tutorial, but should
 | 
			
		||||
    "       probably be included in CQi specs
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def cl_cpos2lbound(self, attribute, cpos):
 | 
			
		||||
        # INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns left boundary of s-attribute region enclosing cpos, -1 if not
 | 
			
		||||
        # in region
 | 
			
		||||
        self.__send_WORD(specification.CL_CPOS2LBOUND)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT_LIST(cpos)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_cpos2rbound(self, attribute, cpos):
 | 
			
		||||
        # INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns right boundary of s-attribute region enclosing cpos, -1 if
 | 
			
		||||
        # not in region
 | 
			
		||||
        self.__send_WORD(specification.CL_CPOS2RBOUND)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT_LIST(cpos)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_cpos2alg(self, attribute, cpos):
 | 
			
		||||
        # INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns -1 for every corpus position not inside an alignment
 | 
			
		||||
        self.__send_WORD(specification.CL_CPOS2ALG)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT_LIST(cpos)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_struc2str(self, attribute, strucs):
 | 
			
		||||
        # INPUT: (STRING attribute, INT_LIST strucs)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
        # returns annotated string values of structure regions in <strucs>; ""
 | 
			
		||||
        # if out of range
 | 
			
		||||
        # check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
 | 
			
		||||
        self.__send_WORD(specification.CL_STRUC2STR)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT_LIST(strucs)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    " NOTE: the following mappings take a single argument and return multiple
 | 
			
		||||
    "       values, including lists of arbitrary size
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def cl_id2cpos(self, attribute, id):
 | 
			
		||||
        # INPUT: (STRING attribute, INT id)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns all corpus positions where the given token occurs
 | 
			
		||||
        self.__send_WORD(specification.CL_ID2CPOS)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT(id)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_idlist2cpos(self, attribute, id_list):
 | 
			
		||||
        # INPUT: (STRING attribute, INT_LIST id_list)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns all corpus positions where one of the tokens in <id_list>
 | 
			
		||||
        # occurs; the returned list is sorted as a whole, not per token id
 | 
			
		||||
        self.__send_WORD(specification.CL_IDLIST2CPOS)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT_LIST(id_list)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_regex2id(self, attribute, regex):
 | 
			
		||||
        # INPUT: (STRING attribute, STRING regex)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns lexicon IDs of all tokens that match <regex>; the returned
 | 
			
		||||
        # list may be empty (size 0);
 | 
			
		||||
        self.__send_WORD(specification.CL_REGEX2ID)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_STRING(regex)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_struc2cpos(self, attribute, struc):
 | 
			
		||||
        # INPUT: (STRING attribute, INT struc)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_INT
 | 
			
		||||
        # returns start and end corpus positions of structure region <struc>
 | 
			
		||||
        self.__send_WORD(specification.CL_STRUC2CPOS)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT(struc)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cl_alg2cpos(self, attribute, alg):
 | 
			
		||||
        # INPUT: (STRING attribute, INT alg)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_INT_INT_INT
 | 
			
		||||
        # returns (src_start, src_end, target_start, target_end)
 | 
			
		||||
        self.__send_WORD(specification.CL_ALG2CPOS)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        self.__send_INT(alg)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cqp_query(self, mother_corpus, subcorpus_name, query):
 | 
			
		||||
        # INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
 | 
			
		||||
        # OUTPUT: CQI_STATUS_OK
 | 
			
		||||
        # <query> must include the ';' character terminating the query.
 | 
			
		||||
        self.__send_WORD(specification.CQP_QUERY)
 | 
			
		||||
        self.__send_STRING(mother_corpus)
 | 
			
		||||
        self.__send_STRING(subcorpus_name)
 | 
			
		||||
        self.__send_STRING(query)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cqp_list_subcorpora(self, corpus):
 | 
			
		||||
        # INPUT: (STRING corpus)
 | 
			
		||||
        # OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
        self.__send_WORD(specification.CQP_LIST_SUBCORPORA)
 | 
			
		||||
        self.__send_STRING(corpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cqp_subcorpus_size(self, subcorpus):
 | 
			
		||||
        # INPUT: (STRING subcorpus)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT
 | 
			
		||||
        self.__send_WORD(specification.CQP_SUBCORPUS_SIZE)
 | 
			
		||||
        self.__send_STRING(subcorpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cqp_subcorpus_has_field(self, subcorpus, field):
 | 
			
		||||
        # INPUT: (STRING subcorpus, BYTE field)
 | 
			
		||||
        # OUTPUT: CQI_DATA_BOOL
 | 
			
		||||
        self.__send_WORD(specification.CQP_SUBCORPUS_HAS_FIELD)
 | 
			
		||||
        self.__send_STRING(subcorpus)
 | 
			
		||||
        self.__send_BYTE(field)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cqp_dump_subcorpus(self, subcorpus, field, first, last):
 | 
			
		||||
        # INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # Dump the values of <field> for match ranges <first> .. <last>
 | 
			
		||||
        # in <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants.
 | 
			
		||||
        self.__send_WORD(specification.CQP_DUMP_SUBCORPUS)
 | 
			
		||||
        self.__send_STRING(subcorpus)
 | 
			
		||||
        self.__send_BYTE(field)
 | 
			
		||||
        self.__send_INT(first)
 | 
			
		||||
        self.__send_INT(last)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cqp_drop_subcorpus(self, subcorpus):
 | 
			
		||||
        # INPUT: (STRING subcorpus)
 | 
			
		||||
        # OUTPUT: CQI_STATUS_OK
 | 
			
		||||
        # delete a subcorpus from memory
 | 
			
		||||
        self.__send_WORD(specification.CQP_DROP_SUBCORPUS)
 | 
			
		||||
        self.__send_STRING(subcorpus)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    " NOTE: The following two functions are temporarily included for the
 | 
			
		||||
    "       Euralex 2000 tutorial demo
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def cqp_fdist_1(self, subcorpus, cutoff, field, attribute):
 | 
			
		||||
        """ NOTE: frequency distribution of single tokens """
 | 
			
		||||
        # INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
 | 
			
		||||
        # field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
 | 
			
		||||
        #                 CQI_CONST_FIELD_KEYWORD
 | 
			
		||||
        # NB: pairs are sorted by frequency desc.
 | 
			
		||||
        self.__send_WORD(specification.CQP_FDIST_1)
 | 
			
		||||
        self.__send_STRING(subcorpus)
 | 
			
		||||
        self.__send_INT(cutoff)
 | 
			
		||||
        self.__send_BYTE(field)
 | 
			
		||||
        self.__send_STRING(attribute)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def cqp_fdist_2(self, subcorpus, cutoff, field1, attribute1, field2,
 | 
			
		||||
                    attribute2):
 | 
			
		||||
        """ NOTE: frequency distribution of pairs of tokens """
 | 
			
		||||
        # INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
 | 
			
		||||
        #         BYTE field2, STRING attribute2)
 | 
			
		||||
        # OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
        # returns <n> (id1, id2, frequency) pairs flattened into a list of size
 | 
			
		||||
        # 3*<n>
 | 
			
		||||
        # NB: triples are sorted by frequency desc.
 | 
			
		||||
        self.__send_WORD(specification.CQP_FDIST_2)
 | 
			
		||||
        self.__send_STRING(subcorpus)
 | 
			
		||||
        self.__send_INT(cutoff)
 | 
			
		||||
        self.__send_BYTE(field1)
 | 
			
		||||
        self.__send_STRING(attribute1)
 | 
			
		||||
        self.__send_BYTE(field2)
 | 
			
		||||
        self.__send_STRING(attribute2)
 | 
			
		||||
        return self.__recv_response()
 | 
			
		||||
 | 
			
		||||
    def __recv_response(self):
 | 
			
		||||
        byte_data = self.__recv_WORD()
 | 
			
		||||
        response_type = byte_data >> 8
 | 
			
		||||
        if response_type == specification.CL_ERROR:
 | 
			
		||||
            raise cl_error_lookup[byte_data]()
 | 
			
		||||
        elif response_type == specification.CQP_ERROR:
 | 
			
		||||
            raise cqp_error_lookup[byte_data]()
 | 
			
		||||
        elif response_type == specification.DATA:
 | 
			
		||||
            return self.__recv_DATA(byte_data)
 | 
			
		||||
        elif response_type == specification.ERROR:
 | 
			
		||||
            raise error_lookup[byte_data]()
 | 
			
		||||
        elif response_type == specification.STATUS:
 | 
			
		||||
            return {'code': byte_data, 'msg': specification.lookup[byte_data]}
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception('Unknown response type: {}'.format(response_type))
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA(self, data_type):
 | 
			
		||||
        if data_type == specification.DATA_BYTE:
 | 
			
		||||
            data = self.__recv_DATA_BYTE()
 | 
			
		||||
        elif data_type == specification.DATA_BOOL:
 | 
			
		||||
            data = self.__recv_DATA_BOOL()
 | 
			
		||||
        elif data_type == specification.DATA_INT:
 | 
			
		||||
            data = self.__recv_DATA_INT()
 | 
			
		||||
        elif data_type == specification.DATA_STRING:
 | 
			
		||||
            data = self.__recv_DATA_STRING()
 | 
			
		||||
        elif data_type == specification.DATA_BYTE_LIST:
 | 
			
		||||
            data = self.__recv_DATA_BYTE_LIST()
 | 
			
		||||
        elif data_type == specification.DATA_BOOL_LIST:
 | 
			
		||||
            data = self.__recv_DATA_BOOL_LIST()
 | 
			
		||||
        elif data_type == specification.DATA_INT_LIST:
 | 
			
		||||
            data = self.__recv_DATA_INT_LIST()
 | 
			
		||||
        elif data_type == specification.DATA_STRING_LIST:
 | 
			
		||||
            data = self.__recv_DATA_STRING_LIST()
 | 
			
		||||
        elif data_type == specification.DATA_INT_INT:
 | 
			
		||||
            data = self.__recv_DATA_INT_INT()
 | 
			
		||||
        elif data_type == specification.DATA_INT_INT_INT_INT:
 | 
			
		||||
            data = self.__recv_DATA_INT_INT_INT_INT()
 | 
			
		||||
        elif data_type == specification.DATA_INT_TABLE:
 | 
			
		||||
            data = self.__recv_DATA_INT_TABLE()
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception('Unknown data type: {}'.format(data_type))
 | 
			
		||||
        return data
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_BYTE(self):
 | 
			
		||||
        while True:
 | 
			
		||||
            if (len(self.socket.recv(1, socket.MSG_PEEK)) == 1):
 | 
			
		||||
                byte_data = self.socket.recv(1)
 | 
			
		||||
                break
 | 
			
		||||
            sleep(0.1)
 | 
			
		||||
        return struct.unpack('!B', byte_data)[0]
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_BOOL(self):
 | 
			
		||||
        while True:
 | 
			
		||||
            if (len(self.socket.recv(1, socket.MSG_PEEK)) == 1):
 | 
			
		||||
                byte_data = self.socket.recv(1)
 | 
			
		||||
                break
 | 
			
		||||
            sleep(0.1)
 | 
			
		||||
        return struct.unpack('!?', byte_data)[0]
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_INT(self):
 | 
			
		||||
        while True:
 | 
			
		||||
            if (len(self.socket.recv(4, socket.MSG_PEEK)) == 4):
 | 
			
		||||
                byte_data = self.socket.recv(4)
 | 
			
		||||
                break
 | 
			
		||||
            sleep(0.1)
 | 
			
		||||
        return struct.unpack('!i', byte_data)[0]
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_STRING(self):
 | 
			
		||||
        n = self.__recv_WORD()
 | 
			
		||||
        while True:
 | 
			
		||||
            if (len(self.socket.recv(n, socket.MSG_PEEK)) == n):
 | 
			
		||||
                byte_data = self.socket.recv(n)
 | 
			
		||||
                break
 | 
			
		||||
            sleep(0.1)
 | 
			
		||||
        return struct.unpack('!{}s'.format(n), byte_data)[0].decode()
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_BYTE_LIST(self):
 | 
			
		||||
        data = []
 | 
			
		||||
        n = self.__recv_DATA_INT()
 | 
			
		||||
        while n > 0:
 | 
			
		||||
            data.append(self.__recv_DATA_BYTE())
 | 
			
		||||
            n -= 1
 | 
			
		||||
        return data
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_BOOL_LIST(self):
 | 
			
		||||
        data = []
 | 
			
		||||
        n = self.__recv_DATA_INT()
 | 
			
		||||
        while n > 0:
 | 
			
		||||
            data.append(self.__recv_DATA_BOOL())
 | 
			
		||||
            n -= 1
 | 
			
		||||
        return data
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_INT_LIST(self):
 | 
			
		||||
        data = []
 | 
			
		||||
        n = self.__recv_DATA_INT()
 | 
			
		||||
        while n > 0:
 | 
			
		||||
            data.append(self.__recv_DATA_INT())
 | 
			
		||||
            n -= 1
 | 
			
		||||
        return data
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_STRING_LIST(self):
 | 
			
		||||
        data = []
 | 
			
		||||
        n = self.__recv_DATA_INT()
 | 
			
		||||
        while n > 0:
 | 
			
		||||
            data.append(self.__recv_DATA_STRING())
 | 
			
		||||
            n -= 1
 | 
			
		||||
        return data
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_INT_INT(self):
 | 
			
		||||
        return (self.__recv_DATA_INT(), self.__recv_DATA_INT())
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_INT_INT_INT_INT(self):
 | 
			
		||||
        return (self.__recv_DATA_INT(),
 | 
			
		||||
                self.__recv_DATA_INT(),
 | 
			
		||||
                self.__recv_DATA_INT(),
 | 
			
		||||
                self.__recv_DATA_INT())
 | 
			
		||||
 | 
			
		||||
    def __recv_DATA_INT_TABLE(self):
 | 
			
		||||
        rows = self.__recv_DATA_INT()
 | 
			
		||||
        columns = self.__recv_DATA_INT()
 | 
			
		||||
        data = []
 | 
			
		||||
        for i in range(0, rows):
 | 
			
		||||
            row = []
 | 
			
		||||
            for j in range(0, columns):
 | 
			
		||||
                row.append(self.__recv_DATA_INT())
 | 
			
		||||
            data.append(row)
 | 
			
		||||
        return data
 | 
			
		||||
 | 
			
		||||
    def __recv_WORD(self):
 | 
			
		||||
        while True:
 | 
			
		||||
            if (len(self.socket.recv(2, socket.MSG_PEEK)) == 2):
 | 
			
		||||
                byte_data = self.socket.recv(2)
 | 
			
		||||
                break
 | 
			
		||||
            sleep(0.1)
 | 
			
		||||
        return struct.unpack('!H', byte_data)[0]
 | 
			
		||||
 | 
			
		||||
    def __send_BYTE(self, byte_data):
 | 
			
		||||
        data = struct.pack('!B', byte_data)
 | 
			
		||||
        self.socket.sendall(data)
 | 
			
		||||
 | 
			
		||||
    def __send_BOOL(self, bool_data):
 | 
			
		||||
        data = struct.pack('!?', bool_data)
 | 
			
		||||
        self.socket.sendall(data)
 | 
			
		||||
 | 
			
		||||
    def __send_INT(self, int_data):
 | 
			
		||||
        data = struct.pack('!i', int_data)
 | 
			
		||||
        self.socket.sendall(data)
 | 
			
		||||
 | 
			
		||||
    def __send_STRING(self, string_data):
 | 
			
		||||
        encoded_string_data = string_data.encode('utf-8')
 | 
			
		||||
        n = len(encoded_string_data)
 | 
			
		||||
        data = struct.pack('!H{}s'.format(n), n, encoded_string_data)
 | 
			
		||||
        self.socket.sendall(data)
 | 
			
		||||
 | 
			
		||||
    def __send_INT_LIST(self, int_list_data):
 | 
			
		||||
        n = len(int_list_data)
 | 
			
		||||
        self.__send_INT(n)
 | 
			
		||||
        for int_data in int_list_data:
 | 
			
		||||
            self.__send_INT(int_data)
 | 
			
		||||
 | 
			
		||||
    def __send_STRING_LIST(self, string_list_data):
 | 
			
		||||
        n = len(string_list_data)
 | 
			
		||||
        self.__send_INT(n)
 | 
			
		||||
        for string_data in string_list_data:
 | 
			
		||||
            self.__send_STRING(string_data)
 | 
			
		||||
 | 
			
		||||
    def __send_WORD(self, word_data):
 | 
			
		||||
        data = struct.pack('!H', word_data)
 | 
			
		||||
        self.socket.sendall(data)
 | 
			
		||||
@@ -1,404 +0,0 @@
 | 
			
		||||
# ########################################################################### #
 | 
			
		||||
# IMS CQi specification                                                       #
 | 
			
		||||
#                                                                             #
 | 
			
		||||
# Version: 0.1a ;o)                                                           #
 | 
			
		||||
# Author: Stefan Evert (evert@ims.uni-stuttgart.de)                           #
 | 
			
		||||
# Modified by (codestyle): Patrick Jentsch (p.jentsch@uni-bielefeld.de)       #
 | 
			
		||||
# Modified date: Thurs Oct 10                                                 #
 | 
			
		||||
# ########################################################################### #
 | 
			
		||||
""" 1. padding """
 | 
			
		||||
PAD = 0x00
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
""" 2. CQi responses """
 | 
			
		||||
""" 2.1 CQI_STATUS_* """
 | 
			
		||||
STATUS = 0x01
 | 
			
		||||
STATUS_OK = 0x0101
 | 
			
		||||
STATUS_CONNECT_OK = 0x0102
 | 
			
		||||
STATUS_BYE_OK = 0x0103
 | 
			
		||||
STATUS_PING_OK = 0x0104
 | 
			
		||||
 | 
			
		||||
""" 2.2 CQI_ERROR_*  """
 | 
			
		||||
ERROR = 0x02
 | 
			
		||||
ERROR_GENERAL_ERROR = 0x0201
 | 
			
		||||
ERROR_CONNECT_REFUSED = 0x0202
 | 
			
		||||
ERROR_USER_ABORT = 0x0203
 | 
			
		||||
ERROR_SYNTAX_ERROR = 0x0204
 | 
			
		||||
# includes corpus/attribute/subcorpus specifier syntax
 | 
			
		||||
 | 
			
		||||
""" 2.3 CQI_DATA_* """
 | 
			
		||||
DATA = 0x03
 | 
			
		||||
DATA_BYTE = 0x0301
 | 
			
		||||
DATA_BOOL = 0x0302
 | 
			
		||||
DATA_INT = 0x0303
 | 
			
		||||
DATA_STRING = 0x0304
 | 
			
		||||
DATA_BYTE_LIST = 0x0305
 | 
			
		||||
DATA_BOOL_LIST = 0x0306
 | 
			
		||||
DATA_INT_LIST = 0x0307
 | 
			
		||||
DATA_STRING_LIST = 0x0308
 | 
			
		||||
DATA_INT_INT = 0x0309
 | 
			
		||||
DATA_INT_INT_INT_INT = 0x030A
 | 
			
		||||
DATA_INT_TABLE = 0x030B
 | 
			
		||||
 | 
			
		||||
""" 2.4 CQI_CL_ERROR_* """
 | 
			
		||||
"""
 | 
			
		||||
" NOTE: some CL error codes are not represented in the CQi specs
 | 
			
		||||
"       - usually because they're not used in the CL any more
 | 
			
		||||
"       - CDA_ENOSTRING is not considered an error (returns -1)
 | 
			
		||||
"       - CDA_EARGS: dynamic attribute calls not yet supported
 | 
			
		||||
"""
 | 
			
		||||
CL_ERROR = 0x04
 | 
			
		||||
CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401
 | 
			
		||||
# returned if CQi server couldn't open attribute
 | 
			
		||||
CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402
 | 
			
		||||
# CDA_EATTTYPE
 | 
			
		||||
CL_ERROR_OUT_OF_RANGE = 0x0403
 | 
			
		||||
# CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG
 | 
			
		||||
CL_ERROR_REGEX = 0x0404
 | 
			
		||||
# CDA_EPATTERN (not used), CDA_EBADREGEX
 | 
			
		||||
CL_ERROR_CORPUS_ACCESS = 0x0405
 | 
			
		||||
# CDA_ENODATA
 | 
			
		||||
CL_ERROR_OUT_OF_MEMORY = 0x0406
 | 
			
		||||
# CDA_ENOMEM
 | 
			
		||||
# this means the CQi server has run out of memory;
 | 
			
		||||
# try discarding some other corpora and/or subcorpora
 | 
			
		||||
CL_ERROR_INTERNAL = 0x0407
 | 
			
		||||
# CDA_EOTHER, CDA_ENYI
 | 
			
		||||
# this is the classical 'please contact technical support' error
 | 
			
		||||
 | 
			
		||||
""" 2.5 CQI_CQP_ERROR_* """
 | 
			
		||||
CQP_ERROR = 0x05
 | 
			
		||||
# CQP error messages yet to be defined
 | 
			
		||||
CQP_ERROR_GENERAL = 0x0501
 | 
			
		||||
CQP_ERROR_NO_SUCH_CORPUS = 0x0502
 | 
			
		||||
CQP_ERROR_INVALID_FIELD = 0x0503
 | 
			
		||||
CQP_ERROR_OUT_OF_RANGE = 0x0504
 | 
			
		||||
# various cases where a number is out of range
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
""" 3. CQi commands """
 | 
			
		||||
""" 3.1 CQI_CTRL_* """
 | 
			
		||||
CTRL = 0x11
 | 
			
		||||
CTRL_CONNECT = 0x1101
 | 
			
		||||
# INPUT: (STRING username, STRING password)
 | 
			
		||||
# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
 | 
			
		||||
CTRL_BYE = 0x1102
 | 
			
		||||
# INPUT: ()
 | 
			
		||||
# OUTPUT: CQI_STATUS_BYE_OK
 | 
			
		||||
CTRL_USER_ABORT = 0x1103
 | 
			
		||||
# INPUT: ()
 | 
			
		||||
# OUTPUT:
 | 
			
		||||
CTRL_PING = 0x1104
 | 
			
		||||
# INPUT: ()
 | 
			
		||||
# OUTPUT: CQI_STATUS_PING_OK
 | 
			
		||||
CTRL_LAST_GENERAL_ERROR = 0x1105
 | 
			
		||||
# INPUT: ()
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING
 | 
			
		||||
# full-text error message for the last general error reported by the CQi server
 | 
			
		||||
 | 
			
		||||
""" 3.2 CQI_ASK_FEATURE_* """
 | 
			
		||||
ASK_FEATURE = 0x12
 | 
			
		||||
ASK_FEATURE_CQI_1_0 = 0x1201
 | 
			
		||||
# INPUT: ()
 | 
			
		||||
# OUTPUT: CQI_DATA_BOOL
 | 
			
		||||
ASK_FEATURE_CL_2_3 = 0x1202
 | 
			
		||||
# INPUT: ()
 | 
			
		||||
# OUTPUT: CQI_DATA_BOOL
 | 
			
		||||
ASK_FEATURE_CQP_2_3 = 0x1203
 | 
			
		||||
# INPUT: ()
 | 
			
		||||
# OUTPUT: CQI_DATA_BOOL
 | 
			
		||||
 | 
			
		||||
""" 3.3 CQI_CORPUS_* """
 | 
			
		||||
CORPUS = 0x13
 | 
			
		||||
CORPUS_LIST_CORPORA = 0x1301
 | 
			
		||||
# INPUT: ()
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
CORPUS_CHARSET = 0x1303
 | 
			
		||||
# INPUT: (STRING corpus)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING
 | 
			
		||||
CORPUS_PROPERTIES = 0x1304
 | 
			
		||||
# INPUT: (STRING corpus)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
CORPUS_POSITIONAL_ATTRIBUTES = 0x1305
 | 
			
		||||
# INPUT: (STRING corpus)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
CORPUS_STRUCTURAL_ATTRIBUTES = 0x1306
 | 
			
		||||
# INPUT: (STRING corpus)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES = 0x1307
 | 
			
		||||
# INPUT: (STRING attribute)
 | 
			
		||||
# OUTPUT: CQI_DATA_BOOL
 | 
			
		||||
CORPUS_ALIGNMENT_ATTRIBUTES = 0x1308
 | 
			
		||||
# INPUT: (STRING corpus)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
CORPUS_FULL_NAME = 0x1309
 | 
			
		||||
# INPUT: (STRING corpus)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING
 | 
			
		||||
# the full name of <corpus> as specified in its registry entry
 | 
			
		||||
CORPUS_INFO = 0x130A
 | 
			
		||||
# INPUT: (STRING corpus)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
# returns the contents of the .info file of <corpus> as a list of lines
 | 
			
		||||
CORPUS_DROP_CORPUS = 0x130B
 | 
			
		||||
# INPUT: (STRING corpus)
 | 
			
		||||
# OUTPUT: CQI_STATUS_OK
 | 
			
		||||
# try to unload a corpus and all its attributes from memory
 | 
			
		||||
 | 
			
		||||
""" 3.4 CQI_CL_* """
 | 
			
		||||
CL = 0x14
 | 
			
		||||
# low-level corpus access (CL functions)
 | 
			
		||||
CL_ATTRIBUTE_SIZE = 0x1401
 | 
			
		||||
# INPUT: (STRING attribute)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT
 | 
			
		||||
# returns the size of <attribute>:
 | 
			
		||||
# - number of tokens (positional)
 | 
			
		||||
# - number of regions (structural)
 | 
			
		||||
# - number of alignments (alignment)
 | 
			
		||||
CL_LEXICON_SIZE = 0x1402
 | 
			
		||||
# INPUT: (STRING attribute)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT
 | 
			
		||||
# returns the number of entries in the lexicon of a positional attribute;
 | 
			
		||||
# valid lexicon IDs range from 0 .. (lexicon_size - 1)
 | 
			
		||||
CL_DROP_ATTRIBUTE = 0x1403
 | 
			
		||||
# INPUT: (STRING attribute)
 | 
			
		||||
# OUTPUT: CQI_STATUS_OK
 | 
			
		||||
# unload attribute from memory
 | 
			
		||||
"""
 | 
			
		||||
" NOTE: simple (scalar) mappings are applied to lists (the returned list has
 | 
			
		||||
"       exactly the same length as the list passed as an argument)
 | 
			
		||||
"""
 | 
			
		||||
CL_STR2ID = 0x1404
 | 
			
		||||
# INPUT: (STRING attribute, STRING_LIST strings)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns -1 for every string in <strings> that is not found in the lexicon
 | 
			
		||||
CL_ID2STR = 0x1405
 | 
			
		||||
# INPUT: (STRING attribute, INT_LIST id)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
# returns "" for every ID in <id> that is out of range
 | 
			
		||||
CL_ID2FREQ = 0x1406
 | 
			
		||||
# INPUT: (STRING attribute, INT_LIST id)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns 0 for every ID in <id> that is out of range
 | 
			
		||||
CL_CPOS2ID = 0x1407
 | 
			
		||||
# INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns -1 for every corpus position in <cpos> that is out of range
 | 
			
		||||
CL_CPOS2STR = 0x1408
 | 
			
		||||
# INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
# returns "" for every corpus position in <cpos> that is out of range
 | 
			
		||||
CL_CPOS2STRUC = 0x1409
 | 
			
		||||
# INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns -1 for every corpus position not inside a structure region
 | 
			
		||||
"""
 | 
			
		||||
" NOTE: temporary addition for the Euralex2000 tutorial, but should probably be
 | 
			
		||||
"       included in CQi specs
 | 
			
		||||
"""
 | 
			
		||||
CL_CPOS2LBOUND = 0x1420
 | 
			
		||||
# INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns left boundary of s-attribute region enclosing cpos, -1 if not in
 | 
			
		||||
# region
 | 
			
		||||
CL_CPOS2RBOUND = 0x1421
 | 
			
		||||
# INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns right boundary of s-attribute region enclosing cpos, -1 if not in
 | 
			
		||||
# region
 | 
			
		||||
CL_CPOS2ALG = 0x140A
 | 
			
		||||
# INPUT: (STRING attribute, INT_LIST cpos)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns -1 for every corpus position not inside an alignment
 | 
			
		||||
CL_STRUC2STR = 0x140B
 | 
			
		||||
# INPUT: (STRING attribute, INT_LIST strucs)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
# returns annotated string values of structure regions in <strucs>; "" if out
 | 
			
		||||
# of range
 | 
			
		||||
# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
 | 
			
		||||
"""
 | 
			
		||||
" NOTE: the following mappings take a single argument and return multiple
 | 
			
		||||
"       values, including lists of arbitrary size
 | 
			
		||||
"""
 | 
			
		||||
CL_ID2CPOS = 0x140C
 | 
			
		||||
# INPUT: (STRING attribute, INT id)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns all corpus positions where the given token occurs
 | 
			
		||||
CL_IDLIST2CPOS = 0x140D
 | 
			
		||||
# INPUT: (STRING attribute, INT_LIST id_list)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns all corpus positions where one of the tokens in <id_list>
 | 
			
		||||
# occurs; the returned list is sorted as a whole, not per token id
 | 
			
		||||
CL_REGEX2ID = 0x140E
 | 
			
		||||
# INPUT: (STRING attribute, STRING regex)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns lexicon IDs of all tokens that match <regex>; the returned
 | 
			
		||||
# list may be empty (size 0);
 | 
			
		||||
CL_STRUC2CPOS = 0x140F
 | 
			
		||||
# INPUT: (STRING attribute, INT struc)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_INT
 | 
			
		||||
# returns start and end corpus positions of structure region <struc>
 | 
			
		||||
CL_ALG2CPOS = 0x1410
 | 
			
		||||
# INPUT: (STRING attribute, INT alg)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_INT_INT_INT
 | 
			
		||||
# returns (src_start, src_end, target_start, target_end)
 | 
			
		||||
 | 
			
		||||
""" 3.5 CQI_CQP_* """
 | 
			
		||||
CQP = 0x15
 | 
			
		||||
CQP_QUERY = 0x1501
 | 
			
		||||
# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
 | 
			
		||||
# OUTPUT: CQI_STATUS_OK
 | 
			
		||||
# <query> must include the ';' character terminating the query.
 | 
			
		||||
CQP_LIST_SUBCORPORA = 0x1502
 | 
			
		||||
# INPUT: (STRING corpus)
 | 
			
		||||
# OUTPUT: CQI_DATA_STRING_LIST
 | 
			
		||||
CQP_SUBCORPUS_SIZE = 0x1503
 | 
			
		||||
# INPUT: (STRING subcorpus)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT
 | 
			
		||||
CQP_SUBCORPUS_HAS_FIELD = 0x1504
 | 
			
		||||
# INPUT: (STRING subcorpus, BYTE field)
 | 
			
		||||
# OUTPUT: CQI_DATA_BOOL
 | 
			
		||||
CQP_DUMP_SUBCORPUS = 0x1505
 | 
			
		||||
# INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# Dump the values of <field> for match ranges <first> .. <last> in <subcorpus>.
 | 
			
		||||
# <field> is one of the CQI_CONST_FIELD_* constants.
 | 
			
		||||
CQP_DROP_SUBCORPUS = 0x1509
 | 
			
		||||
# INPUT: (STRING subcorpus)
 | 
			
		||||
# OUTPUT: CQI_STATUS_OK
 | 
			
		||||
# delete a subcorpus from memory
 | 
			
		||||
"""
 | 
			
		||||
" NOTE: The following two functions are temporarily included for the Euralex
 | 
			
		||||
"       2000 tutorial demo
 | 
			
		||||
"""
 | 
			
		||||
""" NOTE: frequency distribution of single tokens """
 | 
			
		||||
CQP_FDIST_1 = 0x1510
 | 
			
		||||
# INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
 | 
			
		||||
# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
 | 
			
		||||
# CQI_CONST_FIELD_KEYWORD
 | 
			
		||||
# NB: pairs are sorted by frequency desc.
 | 
			
		||||
""" NOTE: frequency distribution of pairs of tokens """
 | 
			
		||||
CQP_FDIST_2 = 0x1511
 | 
			
		||||
# INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
 | 
			
		||||
#         BYTE field2, STRING attribute2)
 | 
			
		||||
# OUTPUT: CQI_DATA_INT_LIST
 | 
			
		||||
# returns <n> (id1, id2, frequency) pairs flattened into a list of size 3*<n>
 | 
			
		||||
# NB: triples are sorted by frequency desc.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
""" 4. Constant Definitions """
 | 
			
		||||
CONST_FALSE = 0x00
 | 
			
		||||
CONST_NO = 0x00
 | 
			
		||||
CONST_TRUE = 0x01
 | 
			
		||||
CONST_YES = 0x01
 | 
			
		||||
"""
 | 
			
		||||
" NOTE: The following constants specify which field will be returned by
 | 
			
		||||
"       CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands.
 | 
			
		||||
"""
 | 
			
		||||
CONST_FIELD_MATCH = 0x10
 | 
			
		||||
CONST_FIELD_MATCHEND = 0x11
 | 
			
		||||
"""
 | 
			
		||||
" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the
 | 
			
		||||
" numerical values 0 .. 9, so clients do not need to look up the constant
 | 
			
		||||
" values if they're handling arbitrary targets.
 | 
			
		||||
"""
 | 
			
		||||
CONST_FIELD_TARGET_0 = 0x00
 | 
			
		||||
CONST_FIELD_TARGET_1 = 0x01
 | 
			
		||||
CONST_FIELD_TARGET_2 = 0x02
 | 
			
		||||
CONST_FIELD_TARGET_3 = 0x03
 | 
			
		||||
CONST_FIELD_TARGET_4 = 0x04
 | 
			
		||||
CONST_FIELD_TARGET_5 = 0x05
 | 
			
		||||
CONST_FIELD_TARGET_6 = 0x06
 | 
			
		||||
CONST_FIELD_TARGET_7 = 0x07
 | 
			
		||||
CONST_FIELD_TARGET_8 = 0x08
 | 
			
		||||
CONST_FIELD_TARGET_9 = 0x09
 | 
			
		||||
"""
 | 
			
		||||
" NOTE: The following constants are provided for backward compatibility with
 | 
			
		||||
"       traditional CQP field names & while the generalised target concept
 | 
			
		||||
"       isn't yet implemented in the CQPserver.
 | 
			
		||||
"""
 | 
			
		||||
CONST_FIELD_TARGET = 0x00
 | 
			
		||||
CONST_FIELD_KEYWORD = 0x09
 | 
			
		||||
""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """
 | 
			
		||||
MAJOR_VERSION = 0x00
 | 
			
		||||
MINOR_VERSION = 0x01
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
""" 5. CQi lookup dictionary. """
 | 
			
		||||
lookup = {
 | 
			
		||||
    257: 'CQI_STATUS_OK',
 | 
			
		||||
    258: 'CQI_STATUS_CONNECT_OK',
 | 
			
		||||
    259: 'CQI_STATUS_BYE_OK',
 | 
			
		||||
    260: 'CQI_STATUS_PING_OK',
 | 
			
		||||
    513: 'CQI_ERROR_GENERAL_ERROR',
 | 
			
		||||
    514: 'CQI_ERROR_CONNECT_REFUSED',
 | 
			
		||||
    515: 'CQI_ERROR_USER_ABORT',
 | 
			
		||||
    516: 'CQI_ERROR_SYNTAX_ERROR',
 | 
			
		||||
    769: 'CQI_DATA_BYTE',
 | 
			
		||||
    770: 'CQI_DATA_BOOL',
 | 
			
		||||
    771: 'CQI_DATA_INT',
 | 
			
		||||
    772: 'CQI_DATA_STRING',
 | 
			
		||||
    773: 'CQI_DATA_BYTE_LIST',
 | 
			
		||||
    774: 'CQI_DATA_BOOL_LIST',
 | 
			
		||||
    775: 'CQI_DATA_INT_LIST',
 | 
			
		||||
    776: 'CQI_DATA_STRING_LIST',
 | 
			
		||||
    777: 'CQI_DATA_INT_INT',
 | 
			
		||||
    778: 'CQI_DATA_INT_INT_INT_INT',
 | 
			
		||||
    779: 'CQI_DATA_INT_TABLE',
 | 
			
		||||
    1025: 'CQI_CL_ERROR_NO_SUCH_ATTRIBUTE',
 | 
			
		||||
    1026: 'CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE',
 | 
			
		||||
    1027: 'CQI_CL_ERROR_OUT_OF_RANGE',
 | 
			
		||||
    1028: 'CQI_CL_ERROR_REGEX',
 | 
			
		||||
    1029: 'CQI_CL_ERROR_CORPUS_ACCESS',
 | 
			
		||||
    1030: 'CQI_CL_ERROR_OUT_OF_MEMORY',
 | 
			
		||||
    1031: 'CQI_CL_ERROR_INTERNAL',
 | 
			
		||||
    1281: 'CQI_CQP_ERROR_GENERAL',
 | 
			
		||||
    1282: 'CQI_CQP_ERROR_NO_SUCH_CORPUS',
 | 
			
		||||
    1283: 'CQI_CQP_ERROR_INVALID_FIELD',
 | 
			
		||||
    1284: 'CQI_CQP_ERROR_OUT_OF_RANGE',
 | 
			
		||||
    4353: 'CQI_CTRL_CONNECT',
 | 
			
		||||
    4354: 'CQI_CTRL_BYE',
 | 
			
		||||
    4355: 'CQI_CTRL_USER_ABORT',
 | 
			
		||||
    4356: 'CQI_CTRL_PING',
 | 
			
		||||
    4357: 'CQI_CTRL_LAST_GENERAL_ERROR',
 | 
			
		||||
    4609: 'CQI_ASK_FEATURE_CQI_1_0',
 | 
			
		||||
    4610: 'CQI_ASK_FEATURE_CL_2_3',
 | 
			
		||||
    4611: 'CQI_ASK_FEATURE_CQP_2_3',
 | 
			
		||||
    4865: 'CQI_CORPUS_LIST_CORPORA',
 | 
			
		||||
    4867: 'CQI_CORPUS_CHARSET',
 | 
			
		||||
    4868: 'CQI_CORPUS_PROPERTIES',
 | 
			
		||||
    4869: 'CQI_CORPUS_POSITIONAL_ATTRIBUTES',
 | 
			
		||||
    4870: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTES',
 | 
			
		||||
    4871: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES',
 | 
			
		||||
    4872: 'CQI_CORPUS_ALIGNMENT_ATTRIBUTES',
 | 
			
		||||
    4873: 'CQI_CORPUS_FULL_NAME',
 | 
			
		||||
    4874: 'CQI_CORPUS_INFO',
 | 
			
		||||
    4875: 'CQI_CORPUS_DROP_CORPUS',
 | 
			
		||||
    5121: 'CQI_CL_ATTRIBUTE_SIZE',
 | 
			
		||||
    5122: 'CQI_CL_LEXICON_SIZE',
 | 
			
		||||
    5123: 'CQI_CL_DROP_ATTRIBUTE',
 | 
			
		||||
    5124: 'CQI_CL_STR2ID',
 | 
			
		||||
    5125: 'CQI_CL_ID2STR',
 | 
			
		||||
    5126: 'CQI_CL_ID2FREQ',
 | 
			
		||||
    5127: 'CQI_CL_CPOS2ID',
 | 
			
		||||
    5128: 'CQI_CL_CPOS2STR',
 | 
			
		||||
    5129: 'CQI_CL_CPOS2STRUC',
 | 
			
		||||
    5130: 'CQI_CL_CPOS2ALG',
 | 
			
		||||
    5131: 'CQI_CL_STRUC2STR',
 | 
			
		||||
    5132: 'CQI_CL_ID2CPOS',
 | 
			
		||||
    5133: 'CQI_CL_IDLIST2CPOS',
 | 
			
		||||
    5134: 'CQI_CL_REGEX2ID',
 | 
			
		||||
    5135: 'CQI_CL_STRUC2CPOS',
 | 
			
		||||
    5136: 'CQI_CL_ALG2CPOS',
 | 
			
		||||
    5152: 'CQI_CL_CPOS2LBOUND',
 | 
			
		||||
    5153: 'CQI_CL_CPOS2RBOUND',
 | 
			
		||||
    5377: 'CQI_CQP_QUERY',
 | 
			
		||||
    5378: 'CQI_CQP_LIST_SUBCORPORA',
 | 
			
		||||
    5379: 'CQI_CQP_SUBCORPUS_SIZE',
 | 
			
		||||
    5380: 'CQI_CQP_SUBCORPUS_HAS_FIELD',
 | 
			
		||||
    5381: 'CQI_CQP_DUMP_SUBCORPUS',
 | 
			
		||||
    5385: 'CQI_CQP_DROP_SUBCORPUS',
 | 
			
		||||
    5392: 'CQI_CQP_FDIST_1',
 | 
			
		||||
    5393: 'CQI_CQP_FDIST_2'
 | 
			
		||||
}
 | 
			
		||||
@@ -1,46 +0,0 @@
 | 
			
		||||
from .api import APIClient
 | 
			
		||||
from .models.corpora import CorpusCollection
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CQiClient:
 | 
			
		||||
    """
 | 
			
		||||
    A client for communicating with a CQi server.
 | 
			
		||||
 | 
			
		||||
    Example:
 | 
			
		||||
    >>> import cqi
 | 
			
		||||
    >>> client = cqi.CQiClient('127.0.0.1')
 | 
			
		||||
    >>> client.connect()
 | 
			
		||||
    {'code': 258, 'msg': 'CQI_STATUS_CONNECT_OK'}
 | 
			
		||||
    >>> client.ping()
 | 
			
		||||
    {'code': 260, 'msg': 'CQI_STATUS_PING_OK'}
 | 
			
		||||
    >>> client.disconnect()
 | 
			
		||||
    {'code': 259, 'msg': 'CQI_STATUS_BYE_OK'}
 | 
			
		||||
 | 
			
		||||
    Attributes:
 | 
			
		||||
    api (APIClient): An API client pointing to the specified CQP server.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, host, port=4877):
 | 
			
		||||
        """
 | 
			
		||||
        CQiClient constructor
 | 
			
		||||
 | 
			
		||||
        Args:
 | 
			
		||||
        host (str): URL to the CQP server. For example,
 | 
			
		||||
            ``cqpserver.localhost`` or ``127.0.0.1``.
 | 
			
		||||
        port (int): Port the CQP server listens on. Default: ``4877``
 | 
			
		||||
        """
 | 
			
		||||
        self.api = APIClient(host, port=port)
 | 
			
		||||
 | 
			
		||||
    def connect(self, username='anonymous', password=''):
 | 
			
		||||
        status = self.api.ctrl_connect(username, password)
 | 
			
		||||
        return status
 | 
			
		||||
 | 
			
		||||
    def disconnect(self):
 | 
			
		||||
        return self.api.ctrl_bye()
 | 
			
		||||
 | 
			
		||||
    def ping(self):
 | 
			
		||||
        return self.api.ctrl_ping()
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def corpora(self):
 | 
			
		||||
        return CorpusCollection(client=self)
 | 
			
		||||
@@ -1,180 +0,0 @@
 | 
			
		||||
from .api import specification
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CQiException(Exception):
 | 
			
		||||
    """
 | 
			
		||||
    A base class from which all other exceptions inherit.
 | 
			
		||||
    If you want to catch all errors that the CQi package might raise,
 | 
			
		||||
    catch this base exception.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CQiException, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = None
 | 
			
		||||
        self.name = None
 | 
			
		||||
        self.description = None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Error(CQiException):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(Error, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.ERROR
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ErrorGeneralError(Error):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(ErrorGeneralError, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.ERROR_GENERAL_ERROR
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ErrorConnectRefused(Error):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(ErrorConnectRefused, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.ERROR_CONNECT_REFUSED
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ErrorUserAbort(Error):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(ErrorUserAbort, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.ERROR_USER_ABORT
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ErrorSyntaxError(Error):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(ErrorSyntaxError, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.ERROR_SYNTAX_ERROR
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CLError(CQiException):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CLError, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CL_ERROR
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CLErrorNoSuchAttribute(CLError):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CLErrorNoSuchAttribute, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CL_ERROR_NO_SUCH_ATTRIBUTE
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
        self.description = "CQi server couldn't open attribute"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CLErrorWrongAttributeType(CLError):
 | 
			
		||||
    # CDA_EATTTYPE
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CLErrorWrongAttributeType, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CL_ERROR_WRONG_ATTRIBUTE_TYPE
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CLErrorOutOfRange(CLError):
 | 
			
		||||
    # CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CLErrorOutOfRange, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CL_ERROR_OUT_OF_RANGE
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CLErrorRegex(CLError):
 | 
			
		||||
    # CDA_EPATTERN (not used), CDA_EBADREGEX
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CLErrorRegex, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CL_ERROR_REGEX
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CLErrorCorpusAccess(CLError):
 | 
			
		||||
    # CDA_ENODATA
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CLErrorCorpusAccess, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CL_ERROR_CORPUS_ACCESS
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CLErrorOutOfMemory(CLError):
 | 
			
		||||
    # CDA_ENOMEM
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CLErrorOutOfMemory, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CL_ERROR_OUT_OF_MEMORY
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
        self.description = ('CQi server has run out of memory; try discarding '
 | 
			
		||||
                            'some other corpora and/or subcorpora')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CLErrorInternal(CLError):
 | 
			
		||||
    # CDA_EOTHER, CDA_ENYI
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CLErrorInternal, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CL_ERROR_INTERNAL
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
        self.description = "Classical 'please contact technical support' error"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CQPError(CQiException):
 | 
			
		||||
    # CQP error messages yet to be defined
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CQPError, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CQP_ERROR
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CQPErrorGeneral(CQPError):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CQPErrorGeneral, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CQP_ERROR_GENERAL
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CQPErrorNoSuchCorpus(CQPError):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CQPErrorNoSuchCorpus, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CQP_ERROR_NO_SUCH_CORPUS
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CQPErrorInvalidField(CQPError):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CQPErrorInvalidField, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CQP_ERROR_INVALID_FIELD
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CQPErrorOutOfRange(CQPError):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(CQPErrorOutOfRange, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.code = specification.CQP_ERROR_OUT_OF_RANGE
 | 
			
		||||
        self.name = specification.lookup[self.code]
 | 
			
		||||
        self.description = 'A number is out of range'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
error_lookup = {
 | 
			
		||||
    specification.ERROR: Error,
 | 
			
		||||
    specification.ERROR_GENERAL_ERROR: ErrorGeneralError,
 | 
			
		||||
    specification.ERROR_CONNECT_REFUSED: ErrorConnectRefused,
 | 
			
		||||
    specification.ERROR_USER_ABORT: ErrorUserAbort,
 | 
			
		||||
    specification.ERROR_SYNTAX_ERROR: ErrorSyntaxError
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cl_error_lookup = {
 | 
			
		||||
    specification.CL_ERROR: CLError,
 | 
			
		||||
    specification.CL_ERROR_NO_SUCH_ATTRIBUTE: CLErrorNoSuchAttribute,
 | 
			
		||||
    specification.CL_ERROR_WRONG_ATTRIBUTE_TYPE: CLErrorWrongAttributeType,
 | 
			
		||||
    specification.CL_ERROR_OUT_OF_RANGE: CLErrorOutOfRange,
 | 
			
		||||
    specification.CL_ERROR_REGEX: CLErrorRegex,
 | 
			
		||||
    specification.CL_ERROR_CORPUS_ACCESS: CLErrorCorpusAccess,
 | 
			
		||||
    specification.CL_ERROR_OUT_OF_MEMORY: CLErrorOutOfMemory,
 | 
			
		||||
    specification.CL_ERROR_INTERNAL: CLErrorInternal
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cqp_error_lookup = {
 | 
			
		||||
    specification.CQP_ERROR: CQPError,
 | 
			
		||||
    specification.CQP_ERROR_GENERAL: CQPErrorGeneral,
 | 
			
		||||
    specification.CQP_ERROR_NO_SUCH_CORPUS: CQPErrorNoSuchCorpus,
 | 
			
		||||
    specification.CQP_ERROR_INVALID_FIELD: CQPErrorInvalidField,
 | 
			
		||||
    specification.CQP_ERROR_OUT_OF_RANGE: CQPErrorOutOfRange
 | 
			
		||||
}
 | 
			
		||||
@@ -1,162 +0,0 @@
 | 
			
		||||
from .ressource import Collection, Model
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Attribute(Model):
 | 
			
		||||
    """
 | 
			
		||||
    This is a class representing an attribute. Attributes denote the general
 | 
			
		||||
    category of information. A specific occurence is identified by an Id.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    id_attribute = 'api_name'
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _attrs(client, corpus, name):
 | 
			
		||||
        api_name = '{}.{}'.format(corpus.attrs['api_name'], name)
 | 
			
		||||
        return {'api_name': api_name,
 | 
			
		||||
                'name': name,
 | 
			
		||||
                'size': client.api.cl_attribute_size(api_name)}
 | 
			
		||||
 | 
			
		||||
    def drop(self):
 | 
			
		||||
        return self.client.api.cl_drop_attribute(self.attrs['api_name'])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AttributeCollection(Collection):
 | 
			
		||||
    model = Attribute
 | 
			
		||||
 | 
			
		||||
    def __init__(self, client=None, corpus=None):
 | 
			
		||||
        super(AttributeCollection, self).__init__(client=client)
 | 
			
		||||
        self.corpus = corpus
 | 
			
		||||
 | 
			
		||||
    def get(self, attribute_name):
 | 
			
		||||
        return self.prepare_model(self.model._attrs(self.client, self.corpus,
 | 
			
		||||
                                                    attribute_name))
 | 
			
		||||
 | 
			
		||||
    def list(self):
 | 
			
		||||
        raise NotImplementedError
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AlignmentAttribute(Attribute):
 | 
			
		||||
    def cpos_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_alg2cpos(self.attrs['api_name'], id_list)
 | 
			
		||||
 | 
			
		||||
    def ids_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2alg(self.attrs['api_name'], cpos_list)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AlignmentAttributeCollection(AttributeCollection):
 | 
			
		||||
    model = AlignmentAttribute
 | 
			
		||||
 | 
			
		||||
    def list(self):
 | 
			
		||||
        return [self.prepare_model(self.model._attrs(self.client, self.corpus, attr))  # noqa
 | 
			
		||||
                for attr in self.client.api.corpus_alignment_attributes(self.corpus.attrs['api_name'])]  # noqa
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PositionalAttribute(Attribute):
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _attrs(client, corpus, name):
 | 
			
		||||
        attrs = super(PositionalAttribute, PositionalAttribute)._attrs(client, corpus, name)  # noqa
 | 
			
		||||
        attrs['lexicon_size'] = client.api.cl_lexicon_size(attrs['api_name'])
 | 
			
		||||
        return attrs
 | 
			
		||||
 | 
			
		||||
    def cpos_by_id(self, id):
 | 
			
		||||
        return self.client.api.cl_id2cpos(self.attrs['api_name'], id)
 | 
			
		||||
 | 
			
		||||
    def cpos_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_idlist2cpos(self.attrs['api_name'], id_list)
 | 
			
		||||
 | 
			
		||||
    def freqs_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_id2freq(self.attrs['api_name'], id_list)
 | 
			
		||||
 | 
			
		||||
    def ids_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2id(self.attrs['api_name'], cpos_list)
 | 
			
		||||
 | 
			
		||||
    def ids_by_regex(self, regex):
 | 
			
		||||
        return self.client.api.cl_regex2id(self.attrs['api_name'], regex)
 | 
			
		||||
 | 
			
		||||
    def ids_by_values(self, value_list):
 | 
			
		||||
        return self.client.api.cl_str2id(self.attrs['api_name'], value_list)
 | 
			
		||||
 | 
			
		||||
    def values_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2str(self.attrs['api_name'], cpos_list)
 | 
			
		||||
 | 
			
		||||
    def values_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_id2str(self.attrs['api_name'], id_list)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PositionalAttributeCollection(AttributeCollection):
 | 
			
		||||
    model = PositionalAttribute
 | 
			
		||||
 | 
			
		||||
    def list(self):
 | 
			
		||||
        return [self.prepare_model(self.model._attrs(self.client, self.corpus, attr))  # noqa
 | 
			
		||||
                for attr in self.client.api.corpus_positional_attributes(self.corpus.attrs['api_name'])]  # noqa
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class StructuralAttribute(Attribute):
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _attrs(client, corpus, name):
 | 
			
		||||
        attrs = super(StructuralAttribute, StructuralAttribute)._attrs(client, corpus, name)  # noqa
 | 
			
		||||
        attrs['has_values'] = client.api.corpus_structural_attribute_has_values(attrs['api_name'])  # noqa
 | 
			
		||||
        return attrs
 | 
			
		||||
 | 
			
		||||
    def cpos_by_id(self, id):
 | 
			
		||||
        return self.client.api.cl_struc2cpos(self.attrs['api_name'], id)
 | 
			
		||||
 | 
			
		||||
    def ids_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2struc(self.attrs['api_name'], cpos_list)
 | 
			
		||||
 | 
			
		||||
    def lbound_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2lbound(self.attrs['api_name'],
 | 
			
		||||
                                              cpos_list)
 | 
			
		||||
 | 
			
		||||
    def rbound_by_cpos(self, cpos_list):
 | 
			
		||||
        return self.client.api.cl_cpos2rbound(self.attrs['api_name'],
 | 
			
		||||
                                              cpos_list)
 | 
			
		||||
 | 
			
		||||
    def values_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_struc2str(self.attrs['api_name'], id_list)
 | 
			
		||||
 | 
			
		||||
    def export(self, first_cpos, last_cpos, context=0, expand_lists=False):
 | 
			
		||||
        first_id, last_id = self.ids_by_cpos([first_cpos, last_cpos])
 | 
			
		||||
        c = (first_cpos, last_cpos)
 | 
			
		||||
        lc = rc = None
 | 
			
		||||
        if context == 0:
 | 
			
		||||
            cpos_list = list(range(first_cpos, (last_cpos + 1)))
 | 
			
		||||
        else:
 | 
			
		||||
            lc_lbound = self.cpos_by_id(max(0, (first_id - context)))[0]
 | 
			
		||||
            if lc_lbound != first_cpos:
 | 
			
		||||
                lc_rbound = max(0, (first_cpos - 1))
 | 
			
		||||
                lc = (lc_lbound, lc_rbound)
 | 
			
		||||
                cpos_list_lbound = lc_lbound
 | 
			
		||||
            else:
 | 
			
		||||
                cpos_list_lbound = first_cpos
 | 
			
		||||
            rc_rbound = \
 | 
			
		||||
                self.cpos_by_id(min((last_id + context),
 | 
			
		||||
                                    (self.attrs['size'] - 1)))[1]
 | 
			
		||||
            if rc_rbound != last_cpos:
 | 
			
		||||
                rc_lbound = min((last_cpos + 1),
 | 
			
		||||
                                (self.collection.corpus.attrs['size'] - 1))
 | 
			
		||||
                rc = (rc_lbound, rc_rbound)
 | 
			
		||||
                cpos_list_rbound = rc_rbound
 | 
			
		||||
            else:
 | 
			
		||||
                cpos_list_rbound = last_cpos
 | 
			
		||||
            cpos_list = list(range(cpos_list_lbound, (cpos_list_rbound + 1)))
 | 
			
		||||
        if expand_lists:
 | 
			
		||||
            match = {'lc': list(range(lc[0], (lc[1] + 1))),
 | 
			
		||||
                     'c': list(range(c[0], (c[1] + 1))),
 | 
			
		||||
                     'rc': list(range(rc[0], (rc[1] + 1)))}
 | 
			
		||||
        else:
 | 
			
		||||
            match = {'lc': lc, 'c': c, 'rc': rc}
 | 
			
		||||
        lookups = self.collection.corpus.lookups_by_cpos(cpos_list)
 | 
			
		||||
        return {'match': match, **lookups}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class StructuralAttributeCollection(AttributeCollection):
 | 
			
		||||
    model = StructuralAttribute
 | 
			
		||||
 | 
			
		||||
    def list(self, filters={}):
 | 
			
		||||
        attrs = [self.prepare_model(self.model._attrs(self.client, self.corpus, attr))  # noqa
 | 
			
		||||
                 for attr in self.client.api.corpus_structural_attributes(self.corpus.attrs['api_name'])]  # noqa
 | 
			
		||||
        for k, v in filters.items():
 | 
			
		||||
            if k == 'part_of':
 | 
			
		||||
                attrs = list(filter(lambda x: x.attrs['name'].startswith(v.attrs['name'] + '_'), attrs))  # noqa
 | 
			
		||||
        return attrs
 | 
			
		||||
@@ -1,96 +0,0 @@
 | 
			
		||||
from .attributes import (AlignmentAttributeCollection,
 | 
			
		||||
                         PositionalAttributeCollection,
 | 
			
		||||
                         StructuralAttributeCollection)
 | 
			
		||||
from .ressource import Collection, Model
 | 
			
		||||
from .subcorpora import SubcorpusCollection
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Corpus(Model):
 | 
			
		||||
    id_attribute = 'api_name'
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _attrs(client, name):
 | 
			
		||||
        api_name = name
 | 
			
		||||
        return {'api_name': api_name,
 | 
			
		||||
                'name': name,
 | 
			
		||||
                'size': client.api.cl_attribute_size(
 | 
			
		||||
                    '{}.word'.format(api_name)),
 | 
			
		||||
                # 'info': client.api.corpus_info(name),
 | 
			
		||||
                'charset': client.api.corpus_charset(api_name),
 | 
			
		||||
                # 'full_name' = client.api.corpus_full_name(name),
 | 
			
		||||
                'properties': client.api.corpus_properties(api_name)}
 | 
			
		||||
 | 
			
		||||
    def lookups_by_cpos(self, cpos_list):
 | 
			
		||||
        cpos_list = list(set(cpos_list))
 | 
			
		||||
        lookups = {}
 | 
			
		||||
        if cpos_list:
 | 
			
		||||
            lookups['cpos_lookup'] = {}
 | 
			
		||||
        for cpos in cpos_list:
 | 
			
		||||
            lookups['cpos_lookup'][cpos] = {}
 | 
			
		||||
        for attr in self.positional_attributes.list():
 | 
			
		||||
            cpos_attr_values = attr.values_by_cpos(cpos_list)
 | 
			
		||||
            for i, cpos in enumerate(cpos_list):
 | 
			
		||||
                lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
 | 
			
		||||
                    cpos_attr_values[i]
 | 
			
		||||
        for attr in self.structural_attributes.list():
 | 
			
		||||
            if attr.attrs['has_values']:
 | 
			
		||||
                continue
 | 
			
		||||
            cpos_attr_ids = attr.ids_by_cpos(cpos_list)
 | 
			
		||||
            for i, cpos in enumerate(cpos_list):
 | 
			
		||||
                if cpos_attr_ids[i] != -1:
 | 
			
		||||
                    lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
 | 
			
		||||
                        cpos_attr_ids[i]
 | 
			
		||||
            occured_attr_ids = list(filter(lambda x: x != -1,
 | 
			
		||||
                                           set(cpos_attr_ids)))
 | 
			
		||||
            if not occured_attr_ids:
 | 
			
		||||
                continue
 | 
			
		||||
            subattrs = \
 | 
			
		||||
                self.structural_attributes.list(filters={'part_of': attr})
 | 
			
		||||
            if not subattrs:
 | 
			
		||||
                continue
 | 
			
		||||
            lookup_name = '{}_lookup'.format(attr.attrs['name'])
 | 
			
		||||
            lookups[lookup_name] = {}
 | 
			
		||||
            for attr_id in occured_attr_ids:
 | 
			
		||||
                lookups[lookup_name][attr_id] = {}
 | 
			
		||||
            for subattr in subattrs:
 | 
			
		||||
                subattr_values = subattr.values_by_ids(occured_attr_ids)
 | 
			
		||||
                for i, subattr_value in enumerate(subattr_values):
 | 
			
		||||
                    subattr_name = \
 | 
			
		||||
                        subattr.attrs['name'][(len(attr.attrs['name']) + 1):]
 | 
			
		||||
                    lookups[lookup_name][occured_attr_ids[i]][subattr_name] = \
 | 
			
		||||
                        subattr_value
 | 
			
		||||
        return lookups
 | 
			
		||||
 | 
			
		||||
    def drop(self):
 | 
			
		||||
        return self.client.api.corpus_drop_corpus(self.attrs['api_name'])
 | 
			
		||||
 | 
			
		||||
    def query(self, query, subcorpus_name='Results'):
 | 
			
		||||
        return self.client.api.cqp_query(self.attrs['api_name'],
 | 
			
		||||
                                         subcorpus_name, query)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def alignment_attributes(self):
 | 
			
		||||
        return AlignmentAttributeCollection(client=self.client, corpus=self)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def positional_attributes(self):
 | 
			
		||||
        return PositionalAttributeCollection(client=self.client, corpus=self)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def structural_attributes(self):
 | 
			
		||||
        return StructuralAttributeCollection(client=self.client, corpus=self)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def subcorpora(self):
 | 
			
		||||
        return SubcorpusCollection(client=self.client, corpus=self)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CorpusCollection(Collection):
 | 
			
		||||
    model = Corpus
 | 
			
		||||
 | 
			
		||||
    def get(self, corpus_name):
 | 
			
		||||
        return self.prepare_model(self.model._attrs(self.client, corpus_name))
 | 
			
		||||
 | 
			
		||||
    def list(self):
 | 
			
		||||
        return [self.prepare_model(self.model._attrs(self.client, corpus))
 | 
			
		||||
                for corpus in self.client.api.corpus_list_coprora()]
 | 
			
		||||
@@ -1,78 +0,0 @@
 | 
			
		||||
class Model:
 | 
			
		||||
    """
 | 
			
		||||
    A base class for representing a single object on the server.
 | 
			
		||||
    """
 | 
			
		||||
    id_attribute = 'Id'
 | 
			
		||||
 | 
			
		||||
    def __init__(self, attrs=None, client=None, collection=None):
 | 
			
		||||
        #: A client pointing at the server that this object is on.
 | 
			
		||||
        self.client = client
 | 
			
		||||
 | 
			
		||||
        #: The collection that this model is part of.
 | 
			
		||||
        self.collection = collection
 | 
			
		||||
 | 
			
		||||
        #: The raw representation of this object from the API
 | 
			
		||||
        self.attrs = attrs or {}
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _attrs(client, key):
 | 
			
		||||
        raise NotImplementedError
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return "<{}: {}>".format(self.__class__.__name__, self.id)
 | 
			
		||||
 | 
			
		||||
    def __eq__(self, other):
 | 
			
		||||
        return isinstance(other, self.__class__) and self.id == other.id
 | 
			
		||||
 | 
			
		||||
    def __hash__(self):
 | 
			
		||||
        return hash("{}:{}".format(self.__class__.__name__, self.id))
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def id(self):
 | 
			
		||||
        """
 | 
			
		||||
        The ID of the object.
 | 
			
		||||
        """
 | 
			
		||||
        return self.attrs.get(self.id_attribute)
 | 
			
		||||
 | 
			
		||||
    def reload(self):
 | 
			
		||||
        """
 | 
			
		||||
        Load this object from the server again and update ``attrs`` with the
 | 
			
		||||
        new data.
 | 
			
		||||
        """
 | 
			
		||||
        new_model = self.collection.get(self.id)
 | 
			
		||||
        self.attrs = new_model.attrs
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Collection:
 | 
			
		||||
    """
 | 
			
		||||
    A base class for representing all objects of a particular type on the
 | 
			
		||||
    server.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    #: The type of object this collection represents, set by subclasses
 | 
			
		||||
    model = None
 | 
			
		||||
 | 
			
		||||
    def __init__(self, client=None):
 | 
			
		||||
        #: The client pointing at the server that this collection of objects
 | 
			
		||||
        #: is on.
 | 
			
		||||
        self.client = client
 | 
			
		||||
 | 
			
		||||
    def list(self):
 | 
			
		||||
        raise NotImplementedError
 | 
			
		||||
 | 
			
		||||
    def get(self, key):
 | 
			
		||||
        raise NotImplementedError
 | 
			
		||||
 | 
			
		||||
    def prepare_model(self, attrs):
 | 
			
		||||
        """
 | 
			
		||||
        Create a model from a set of attributes.
 | 
			
		||||
        """
 | 
			
		||||
        if isinstance(attrs, Model):
 | 
			
		||||
            attrs.client = self.client
 | 
			
		||||
            attrs.collection = self
 | 
			
		||||
            return attrs
 | 
			
		||||
        elif isinstance(attrs, dict):
 | 
			
		||||
            return self.model(attrs=attrs, client=self.client, collection=self)
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception("Can't create {} from {}".format(
 | 
			
		||||
                self.model.__name__, attrs))
 | 
			
		||||
@@ -1,103 +0,0 @@
 | 
			
		||||
from .ressource import Collection, Model
 | 
			
		||||
from ..api.specification import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH,
 | 
			
		||||
                                 CONST_FIELD_MATCHEND, CONST_FIELD_TARGET)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Subcorpus(Model):
 | 
			
		||||
    id_attribute = 'api_name'
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _attrs(client, corpus, name):
 | 
			
		||||
        api_name = '{}:{}'.format(corpus.attrs['api_name'], name)
 | 
			
		||||
        fields = {}
 | 
			
		||||
        if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_MATCH):
 | 
			
		||||
            fields['match'] = CONST_FIELD_MATCH
 | 
			
		||||
        if client.api.cqp_subcorpus_has_field(api_name,
 | 
			
		||||
                                              CONST_FIELD_MATCHEND):
 | 
			
		||||
            fields['matchend'] = CONST_FIELD_MATCHEND
 | 
			
		||||
        if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_TARGET):
 | 
			
		||||
            fields['target'] = CONST_FIELD_TARGET
 | 
			
		||||
        if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_KEYWORD):
 | 
			
		||||
            fields['keyword'] = CONST_FIELD_KEYWORD
 | 
			
		||||
        return {'api_name': api_name,
 | 
			
		||||
                'name': name,
 | 
			
		||||
                'fields': fields,
 | 
			
		||||
                'size': client.api.cqp_subcorpus_size(api_name)}
 | 
			
		||||
 | 
			
		||||
    def drop(self):
 | 
			
		||||
        return self.client.api.cqp_drop_subcorpus(self.attrs['api_name'])
 | 
			
		||||
 | 
			
		||||
    def dump(self, field, first, last):
 | 
			
		||||
        return self.client.api.cqp_dump_subcorpus(self.attrs['api_name'],
 | 
			
		||||
                                                  field, first, last)
 | 
			
		||||
 | 
			
		||||
    def export(self, context=25, cutoff=float('inf'), expand_lists=False,
 | 
			
		||||
               offset=0):
 | 
			
		||||
        if self.attrs['size'] == 0:
 | 
			
		||||
            return {"matches": []}
 | 
			
		||||
        first_match = max(0, offset)
 | 
			
		||||
        last_match = min((offset + cutoff - 1), (self.attrs['size'] - 1))
 | 
			
		||||
        match_boundaries = zip(self.dump(self.attrs['fields']['match'],
 | 
			
		||||
                                         first_match, last_match),
 | 
			
		||||
                               self.dump(self.attrs['fields']['matchend'],
 | 
			
		||||
                                         first_match, last_match))
 | 
			
		||||
        cpos_list = []
 | 
			
		||||
        matches = []
 | 
			
		||||
        for match_start, match_end in match_boundaries:
 | 
			
		||||
            c = (match_start, match_end)
 | 
			
		||||
            lc = rc = None
 | 
			
		||||
            if context == 0:
 | 
			
		||||
                cpos_list += list(range(match_start, (match_end + 1)))
 | 
			
		||||
            else:
 | 
			
		||||
                lc_rbound = max(0, (match_start - 1))
 | 
			
		||||
                if lc_rbound != match_start:
 | 
			
		||||
                    lc_lbound = max(0, (match_start - context))
 | 
			
		||||
                    lc = (lc_lbound, lc_rbound)
 | 
			
		||||
                    cpos_list_lbound = lc_lbound
 | 
			
		||||
                else:
 | 
			
		||||
                    cpos_list_lbound = match_start
 | 
			
		||||
                rc_lbound = min((match_end + 1),
 | 
			
		||||
                                (self.collection.corpus.attrs['size'] - 1))
 | 
			
		||||
                if rc_lbound != match_end:
 | 
			
		||||
                    rc_rbound = min((match_end + context),
 | 
			
		||||
                                    (self.collection.corpus.attrs['size'] - 1))
 | 
			
		||||
                    rc = (rc_lbound, rc_rbound)
 | 
			
		||||
                    cpos_list_rbound = rc_rbound
 | 
			
		||||
                else:
 | 
			
		||||
                    cpos_list_rbound = match_end
 | 
			
		||||
                cpos_list += list(range(cpos_list_lbound,
 | 
			
		||||
                                        (cpos_list_rbound + 1)))
 | 
			
		||||
            if expand_lists:
 | 
			
		||||
                match = {'lc': list(range(lc[0], (lc[1] + 1))),
 | 
			
		||||
                         'c': list(range(c[0], (c[1] + 1))),
 | 
			
		||||
                         'rc': list(range(rc[0], (rc[1] + 1)))}
 | 
			
		||||
            else:
 | 
			
		||||
                match = {'lc': lc, 'c': c, 'rc': rc}
 | 
			
		||||
            matches.append(match)
 | 
			
		||||
        lookups = self.collection.corpus.lookups_by_cpos(cpos_list)
 | 
			
		||||
        return {'matches': matches, **lookups}
 | 
			
		||||
 | 
			
		||||
    def fdist_1(self, cutoff, field, attribute):
 | 
			
		||||
        return self.client.api.cqp_fdist_1(self.attrs['api_name'], cutoff,
 | 
			
		||||
                                           field, attribute._name)
 | 
			
		||||
 | 
			
		||||
    def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
 | 
			
		||||
        return self.client.api.cqp_fdist_2(self.attrs['api_name'], cutoff,
 | 
			
		||||
                                           field_1, attribute_1._name,
 | 
			
		||||
                                           field_2, attribute_2._name)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SubcorpusCollection(Collection):
 | 
			
		||||
    model = Subcorpus
 | 
			
		||||
 | 
			
		||||
    def __init__(self, client=None, corpus=None):
 | 
			
		||||
        super(SubcorpusCollection, self).__init__(client=client)
 | 
			
		||||
        self.corpus = corpus
 | 
			
		||||
 | 
			
		||||
    def get(self, subcorpus_name):
 | 
			
		||||
        return self.prepare_model(self.model._attrs(self.client, self.corpus,
 | 
			
		||||
                                                    subcorpus_name))
 | 
			
		||||
 | 
			
		||||
    def list(self):
 | 
			
		||||
        return [self.prepare_model(self.model._attrs(self.client, self.corpus, subcorpus))  # noqa
 | 
			
		||||
                for subcorpus in self.client.api.cqp_list_subcorpora(self.corpus.attrs['api_name'])]  # noqa
 | 
			
		||||
@@ -1,5 +0,0 @@
 | 
			
		||||
from .api.specification import MAJOR_VERSION, MINOR_VERSION
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
version = '{}.{}'.format(MAJOR_VERSION, MINOR_VERSION)
 | 
			
		||||
version_info = (MAJOR_VERSION, MINOR_VERSION)
 | 
			
		||||
@@ -1,11 +1,11 @@
 | 
			
		||||
from flask import current_app, request
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from socket import gaierror
 | 
			
		||||
from . import cqi
 | 
			
		||||
from .. import db, socketio
 | 
			
		||||
from ..decorators import socketio_login_required
 | 
			
		||||
from ..events import connected_sessions
 | 
			
		||||
from ..models import Corpus, User
 | 
			
		||||
import cqi
 | 
			
		||||
import math
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user