# ########################################################################### #
# IMS CQi specification                                                       #
#                                                                             #
# Version:       0.1a ;o)                                                     #
# Author:        Stefan Evert (evert@ims.uni-stuttgart.de)                    #
# Modified by:   Patrick Jentsch <p.jentsch@uni-bielefeld.de                  #
# Modified date: Thurs Oct 10 <Uhrzeit>                                       #
# ########################################################################### #
from time import sleep
import socket
import struct


""" 1. padding """
PAD = 0x00


""" 2. CQi responses """
""" 2.1 CQI_STATUS_* """
STATUS = 0x01
STATUS_OK = 0x0101
STATUS_CONNECT_OK = 0x0102
STATUS_BYE_OK = 0x0103
STATUS_PING_OK = 0x0104

""" 2.2 CQI_ERROR_*  """
ERROR = 0x02
ERROR_GENERAL_ERROR = 0x0201
ERROR_CONNECT_REFUSED = 0x0202
ERROR_USER_ABORT = 0x0203
ERROR_SYNTAX_ERROR = 0x0204
# includes corpus/attribute/subcorpus specifier syntax

""" 2.3 CQI_DATA_* """
DATA = 0x03
DATA_BYTE = 0x0301
DATA_BOOL = 0x0302
DATA_INT = 0x0303
DATA_STRING = 0x0304
DATA_BYTE_LIST = 0x0305
DATA_BOOL_LIST = 0x0306
DATA_INT_LIST = 0x0307
DATA_STRING_LIST = 0x0308
DATA_INT_INT = 0x0309
DATA_INT_INT_INT_INT = 0x030A
DATA_INT_TABLE = 0x030B

""" 2.4 CQI_CL_ERROR_* """
"""
" NOTE: some CL error codes are not represented in the CQi specs
"       - usually because they're not used in the CL any more
"       - CDA_ENOSTRING is not considered an error (returns -1)
"       - CDA_EARGS: dynamic attribute calls not yet supported
"""
CL_ERROR = 0x04
CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401
# returned if CQi server couldn't open attribute
CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402
# CDA_EATTTYPE
CL_ERROR_OUT_OF_RANGE = 0x0403
# CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG
CL_ERROR_REGEX = 0x0404
# CDA_EPATTERN (not used), CDA_EBADREGEX
CL_ERROR_CORPUS_ACCESS = 0x0405
# CDA_ENODATA
CL_ERROR_OUT_OF_MEMORY = 0x0406
# CDA_ENOMEM
# this means the CQi server has run out of memory;
# try discarding some other corpora and/or subcorpora
CL_ERROR_INTERNAL = 0x0407
# CDA_EOTHER, CDA_ENYI
# this is the classical 'please contact technical support' error

""" 2.5 CQI_CQP_ERROR_* """
CQP_ERROR = 0x05
# CQP error messages yet to be defined
CQP_ERROR_GENERAL = 0x0501
CQP_ERROR_NO_SUCH_CORPUS = 0x0502
CQP_ERROR_INVALID_FIELD = 0x0503
CQP_ERROR_OUT_OF_RANGE = 0x0504
# various cases where a number is out of range


""" 3. CQi commands """
""" 3.1 CQI_CTRL_* """
CTRL = 0x11
CTRL_CONNECT = 0x1101
# INPUT: (STRING username, STRING password)
# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
CTRL_BYE = 0x1102
# INPUT: ()
# OUTPUT: CQI_STATUS_BYE_OK
CTRL_USER_ABORT = 0x1103
# INPUT: ()
# OUTPUT:
CTRL_PING = 0x1104
# INPUT: ()
# OUTPUT: CQI_STATUS_PING_OK
CTRL_LAST_GENERAL_ERROR = 0x1105
# INPUT: ()
# OUTPUT: CQI_DATA_STRING
# full-text error message for the last general error reported by the CQi server

""" 3.2 CQI_ASK_FEATURE_* """
ASK_FEATURE = 0x12
ASK_FEATURE_CQI_1_0 = 0x1201
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
ASK_FEATURE_CL_2_3 = 0x1202
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
ASK_FEATURE_CQP_2_3 = 0x1203
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL

""" 3.3 CQI_CORPUS_* """
CORPUS = 0x13
CORPUS_LIST_CORPORA = 0x1301
# INPUT: ()
# OUTPUT: CQI_DATA_STRING_LIST
CORPUS_CHARSET = 0x1303
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING
CORPUS_PROPERTIES = 0x1304
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
CORPUS_POSITIONAL_ATTRIBUTES = 0x1305
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
CORPUS_STRUCTURAL_ATTRIBUTES = 0x1306
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES = 0x1307
# INPUT: (STRING attribute)
# OUTPUT: CQI_DATA_BOOL
CORPUS_ALIGNMENT_ATTRIBUTES = 0x1308
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
CORPUS_FULL_NAME = 0x1309
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING
# the full name of <corpus> as specified in its registry entry
CORPUS_INFO = 0x130A
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# returns the contents of the .info file of <corpus> as a list of lines
CORPUS_DROP_CORPUS = 0x130B
# INPUT: (STRING corpus)
# OUTPUT: CQI_STATUS_OK
# try to unload a corpus and all its attributes from memory

""" 3.4 CQI_CL_* """
CL = 0x14
# low-level corpus access (CL functions)
CL_ATTRIBUTE_SIZE = 0x1401
# INPUT: (STRING attribute)
# OUTPUT: CQI_DATA_INT
# returns the size of <attribute>:
# - number of tokens (positional)
# - number of regions (structural)
# - number of alignments (alignment)
CL_LEXICON_SIZE = 0x1402
# INPUT: (STRING attribute)
# OUTPUT: CQI_DATA_INT
# returns the number of entries in the lexicon of a positional attribute;
# valid lexicon IDs range from 0 .. (lexicon_size - 1)
CL_DROP_ATTRIBUTE = 0x1403
# INPUT: (STRING attribute)
# OUTPUT: CQI_STATUS_OK
# unload attribute from memory
"""
" NOTE: simple (scalar) mappings are applied to lists (the returned list has
"       exactly the same length as the list passed as an argument)
"""
CL_STR2ID = 0x1404
# INPUT: (STRING attribute, STRING_LIST strings)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every string in <strings> that is not found in the lexicon
CL_ID2STR = 0x1405
# INPUT: (STRING attribute, INT_LIST id)
# OUTPUT: CQI_DATA_STRING_LIST
# returns "" for every ID in <id> that is out of range
CL_ID2FREQ = 0x1406
# INPUT: (STRING attribute, INT_LIST id)
# OUTPUT: CQI_DATA_INT_LIST
# returns 0 for every ID in <id> that is out of range
CL_CPOS2ID = 0x1407
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position in <cpos> that is out of range
CL_CPOS2STR = 0x1408
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_STRING_LIST
# returns "" for every corpus position in <cpos> that is out of range
CL_CPOS2STRUC = 0x1409
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position not inside a structure region
"""
" NOTE: temporary addition for the Euralex2000 tutorial, but should probably be
"       included in CQi specs
"""
CL_CPOS2LBOUND = 0x1420
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns left boundary of s-attribute region enclosing cpos, -1 if not in
# region
CL_CPOS2RBOUND = 0x1421
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns right boundary of s-attribute region enclosing cpos, -1 if not in
# region
CL_CPOS2ALG = 0x140A
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position not inside an alignment
CL_STRUC2STR = 0x140B
# INPUT: (STRING attribute, INT_LIST strucs)
# OUTPUT: CQI_DATA_STRING_LIST
# returns annotated string values of structure regions in <strucs>; "" if out
# of range
# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
"""
" NOTE: the following mappings take a single argument and return multiple
"       values, including lists of arbitrary size
"""
CL_ID2CPOS = 0x140C
# INPUT: (STRING attribute, INT id)
# OUTPUT: CQI_DATA_INT_LIST
# returns all corpus positions where the given token occurs
CL_IDLIST2CPOS = 0x140D
# INPUT: (STRING attribute, INT_LIST id_list)
# OUTPUT: CQI_DATA_INT_LIST
# returns all corpus positions where one of the tokens in <id_list>
# occurs; the returned list is sorted as a whole, not per token id
CL_REGEX2ID = 0x140E
# INPUT: (STRING attribute, STRING regex)
# OUTPUT: CQI_DATA_INT_LIST
# returns lexicon IDs of all tokens that match <regex>; the returned
# list may be empty (size 0);
CL_STRUC2CPOS = 0x140F
# INPUT: (STRING attribute, INT struc)
# OUTPUT: CQI_DATA_INT_INT
# returns start and end corpus positions of structure region <struc>
CL_ALG2CPOS = 0x1410
# INPUT: (STRING attribute, INT alg)
# OUTPUT: CQI_DATA_INT_INT_INT_INT
# returns (src_start, src_end, target_start, target_end)

""" 3.5 CQI_CQP_* """
CQP = 0x15
CQP_QUERY = 0x1501
# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
# OUTPUT: CQI_STATUS_OK
# <query> must include the ';' character terminating the query.
CQP_LIST_SUBCORPORA = 0x1502
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
CQP_SUBCORPUS_SIZE = 0x1503
# INPUT: (STRING subcorpus)
# OUTPUT: CQI_DATA_INT
CQP_SUBCORPUS_HAS_FIELD = 0x1504
# INPUT: (STRING subcorpus, BYTE field)
# OUTPUT: CQI_DATA_BOOL
CQP_DUMP_SUBCORPUS = 0x1505
# INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
# OUTPUT: CQI_DATA_INT_LIST
# Dump the values of <field> for match ranges <first> .. <last> in <subcorpus>.
# <field> is one of the CQI_CONST_FIELD_* constants.
CQP_DROP_SUBCORPUS = 0x1509
# INPUT: (STRING subcorpus)
# OUTPUT: CQI_STATUS_OK
# delete a subcorpus from memory
"""
" NOTE: The following two functions are temporarily included for the Euralex
"       2000 tutorial demo
"""
""" NOTE: frequency distribution of single tokens """
CQP_FDIST_1 = 0x1510
# INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
# OUTPUT: CQI_DATA_INT_LIST
# returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
# CQI_CONST_FIELD_KEYWORD
# NB: pairs are sorted by frequency desc.
""" NOTE: frequency distribution of pairs of tokens """
CQP_FDIST_2 = 0x1511
# INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
#         BYTE field2, STRING attribute2)
# OUTPUT: CQI_DATA_INT_LIST
# returns <n> (id1, id2, frequency) pairs flattened into a list of size 3*<n>
# NB: triples are sorted by frequency desc.


""" 4. Constant Definitions """
CONST_FALSE = 0x00
CONST_NO = 0x00
CONST_TRUE = 0x01
CONST_YES = 0x01
"""
" NOTE: The following constants specify which field will be returned by
"       CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands.
"""
CONST_FIELD_MATCH = 0x10
CONST_FIELD_MATCHEND = 0x11
"""
" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the
" numerical values 0 .. 9, so clients do not need to look up the constant
" values if they're handling arbitrary targets.
"""
CONST_FIELD_TARGET_0 = 0x00
CONST_FIELD_TARGET_1 = 0x01
CONST_FIELD_TARGET_2 = 0x02
CONST_FIELD_TARGET_3 = 0x03
CONST_FIELD_TARGET_4 = 0x04
CONST_FIELD_TARGET_5 = 0x05
CONST_FIELD_TARGET_6 = 0x06
CONST_FIELD_TARGET_7 = 0x07
CONST_FIELD_TARGET_8 = 0x08
CONST_FIELD_TARGET_9 = 0x09
"""
" NOTE: The following constants are provided for backward compatibility with
"       traditional CQP field names & while the generalised target concept
"       isn't yet implemented in the CQPserver.
"""
CONST_FIELD_TARGET = 0x00
CONST_FIELD_KEYWORD = 0x09
""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """
MAJOR_VERSION = 0x00
MINOR_VERSION = 0x01


""" 5. CQi lookup dictionary. """
lookup = {
    257: 'CQI_STATUS_OK',
    258: 'CQI_STATUS_CONNECT_OK',
    259: 'CQI_STATUS_BYE_OK',
    260: 'CQI_STATUS_PING_OK',
    513: 'CQI_ERROR_GENERAL_ERROR',
    514: 'CQI_ERROR_CONNECT_REFUSED',
    515: 'CQI_ERROR_USER_ABORT',
    516: 'CQI_ERROR_SYNTAX_ERROR',
    769: 'CQI_DATA_BYTE',
    770: 'CQI_DATA_BOOL',
    771: 'CQI_DATA_INT',
    772: 'CQI_DATA_STRING',
    773: 'CQI_DATA_BYTE_LIST',
    774: 'CQI_DATA_BOOL_LIST',
    775: 'CQI_DATA_INT_LIST',
    776: 'CQI_DATA_STRING_LIST',
    777: 'CQI_DATA_INT_INT',
    778: 'CQI_DATA_INT_INT_INT_INT',
    779: 'CQI_DATA_INT_TABLE',
    1025: 'CQI_CL_ERROR_NO_SUCH_ATTRIBUTE',
    1026: 'CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE',
    1027: 'CQI_CL_ERROR_OUT_OF_RANGE',
    1028: 'CQI_CL_ERROR_REGEX',
    1029: 'CQI_CL_ERROR_CORPUS_ACCESS',
    1030: 'CQI_CL_ERROR_OUT_OF_MEMORY',
    1031: 'CQI_CL_ERROR_INTERNAL',
    1281: 'CQI_CQP_ERROR_GENERAL',
    1282: 'CQI_CQP_ERROR_NO_SUCH_CORPUS',
    1283: 'CQI_CQP_ERROR_INVALID_FIELD',
    1284: 'CQI_CQP_ERROR_OUT_OF_RANGE',
    4353: 'CQI_CTRL_CONNECT',
    4354: 'CQI_CTRL_BYE',
    4355: 'CQI_CTRL_USER_ABORT',
    4356: 'CQI_CTRL_PING',
    4357: 'CQI_CTRL_LAST_GENERAL_ERROR',
    4609: 'CQI_ASK_FEATURE_CQI_1_0',
    4610: 'CQI_ASK_FEATURE_CL_2_3',
    4611: 'CQI_ASK_FEATURE_CQP_2_3',
    4865: 'CQI_CORPUS_LIST_CORPORA',
    4867: 'CQI_CORPUS_CHARSET',
    4868: 'CQI_CORPUS_PROPERTIES',
    4869: 'CQI_CORPUS_POSITIONAL_ATTRIBUTES',
    4870: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTES',
    4871: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES',
    4872: 'CQI_CORPUS_ALIGNMENT_ATTRIBUTES',
    4873: 'CQI_CORPUS_FULL_NAME',
    4874: 'CQI_CORPUS_INFO',
    4875: 'CQI_CORPUS_DROP_CORPUS',
    5121: 'CQI_CL_ATTRIBUTE_SIZE',
    5122: 'CQI_CL_LEXICON_SIZE',
    5123: 'CQI_CL_DROP_ATTRIBUTE',
    5124: 'CQI_CL_STR2ID',
    5125: 'CQI_CL_ID2STR',
    5126: 'CQI_CL_ID2FREQ',
    5127: 'CQI_CL_CPOS2ID',
    5128: 'CQI_CL_CPOS2STR',
    5129: 'CQI_CL_CPOS2STRUC',
    5130: 'CQI_CL_CPOS2ALG',
    5131: 'CQI_CL_STRUC2STR',
    5132: 'CQI_CL_ID2CPOS',
    5133: 'CQI_CL_IDLIST2CPOS',
    5134: 'CQI_CL_REGEX2ID',
    5135: 'CQI_CL_STRUC2CPOS',
    5136: 'CQI_CL_ALG2CPOS',
    5152: 'CQI_CL_CPOS2LBOUND',
    5153: 'CQI_CL_CPOS2RBOUND',
    5377: 'CQI_CQP_QUERY',
    5378: 'CQI_CQP_LIST_SUBCORPORA',
    5379: 'CQI_CQP_SUBCORPUS_SIZE',
    5380: 'CQI_CQP_SUBCORPUS_HAS_FIELD',
    5381: 'CQI_CQP_DUMP_SUBCORPUS',
    5385: 'CQI_CQP_DROP_SUBCORPUS',
    5392: 'CQI_CQP_FDIST_1',
    5393: 'CQI_CQP_FDIST_2'
}


class Client:
    def __init__(self, host='127.0.0.1', port=4877):
        self.host = host
        self.port = port
        self.socket = socket.socket()
        self.socket.connect((self.host, self.port))

    def ctrl_connect(self, username, password):
        # INPUT: (STRING username, STRING password)
        # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
        self.__send_WORD(CTRL_CONNECT)
        self.__send_STRING(username)
        self.__send_STRING(password)
        return self.__recv_response()

    def ctrl_bye(self):
        # INPUT: ()
        # OUTPUT: CQI_STATUS_BYE_OK
        self.__send_WORD(CTRL_BYE)
        return self.__recv_response()

    def ctrl_user_abort(self):
        # INPUT: ()
        # OUTPUT:
        self.__send_WORD(CTRL_USER_ABORT)

    def ctrl_ping(self):
        # INPUT: ()
        # OUTPUT: CQI_STATUS_PING_OK
        self.__send_WORD(CTRL_PING)
        return self.__recv_response()

    def ctrl_last_general_error(self):
        # INPUT: ()
        # OUTPUT: CQI_DATA_STRING
        # full-text error message for the last general error reported by the
        # CQi server
        self.__send_WORD(CTRL_LAST_GENERAL_ERROR)
        return self.__recv_response()

    def ask_feature_cqi_1_0(self):
        # INPUT: ()
        # OUTPUT: CQI_DATA_BOOL
        self.__send_WORD(ASK_FEATURE_CQI_1_0)
        return self.__recv_response()

    def ask_feature_cl_2_3(self):
        # INPUT: ()
        # OUTPUT: CQI_DATA_BOOL
        self.__send_WORD(ASK_FEATURE_CL_2_3)
        return self.__recv_response()

    def ask_feature_cqp_2_3(self):
        # INPUT: ()
        # OUTPUT: CQI_DATA_BOOL
        self.__send_WORD(ASK_FEATURE_CL_2_3)
        return self.__recv_response()

    def corpus_list_coprora(self):
        # INPUT: ()
        # OUTPUT: CQI_DATA_STRING_LIST
        self.__send_WORD(CORPUS_LIST_CORPORA)
        return self.__recv_response()

    def corpus_charset(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING
        self.__send_WORD(CORPUS_CHARSET)
        self.__send_STRING(corpus)
        return self.__recv_response()

    def corpus_properties(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        self.__send_WORD(CORPUS_PROPERTIES)
        self.__send_STRING(corpus)
        return self.__recv_response()

    def corpus_positional_attributes(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        self.__send_WORD(CORPUS_POSITIONAL_ATTRIBUTES)
        self.__send_STRING(corpus)
        return self.__recv_response()

    def corpus_structural_attributes(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTES)
        self.__send_STRING(corpus)
        return self.__recv_response()

    def corpus_structural_attribute_has_values(self, attribute):
        # INPUT: (STRING attribute)
        # OUTPUT: CQI_DATA_BOOL
        self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
        self.__send_STRING(attribute)
        return self.__recv_response()

    def corpus_alignment_attributes(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        self.__send_WORD(CORPUS_ALIGNMENT_ATTRIBUTES)
        self.__send_STRING(corpus)
        return self.__recv_response()

    def corpus_full_name(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING
        # the full name of <corpus> as specified in its registry entry
        self.__send_WORD(CORPUS_FULL_NAME)
        self.__send_STRING(corpus)
        return self.__recv_response()

    def corpus_info(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        # returns the contents of the .info file of <corpus> as a list of lines
        self.__send_WORD(CORPUS_INFO)
        self.__send_STRING(corpus)
        return self.__recv_response()

    def corpus_drop_corpus(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_STATUS_OK
        # try to unload a corpus and all its attributes from memory
        self.__send_WORD(CORPUS_DROP_CORPUS)
        self.__send_STRING(corpus)
        return self.__recv_response()

    def cl_attribute_size(self, attribute):
        # INPUT: (STRING attribute)
        # OUTPUT: CQI_DATA_INT
        # returns the size of <attribute>:
        #     number of tokens        (positional)
        #     number of regions       (structural)
        #     number of alignments    (alignment)
        self.__send_WORD(CL_ATTRIBUTE_SIZE)
        self.__send_STRING(attribute)
        return self.__recv_response()

    def cl_lexicon_size(self, attribute):
        # INPUT: (STRING attribute)
        # OUTPUT: CQI_DATA_INT
        # returns the number of entries in the lexicon of a positional
        # attribute;
        # valid lexicon IDs range from 0 .. (lexicon_size - 1)
        self.__send_WORD(CL_LEXICON_SIZE)
        self.__send_STRING(attribute)
        return self.__recv_response()

    def cl_drop_attribute(self, attribute):
        # INPUT: (STRING attribute)
        # OUTPUT: CQI_STATUS_OK
        # unload attribute from memory
        self.__send_WORD(CL_LEXICON_SIZE)
        self.__send_STRING(attribute)
        return self.__recv_response()

    """
    " NOTE: simple (scalar) mappings are applied to lists (the returned list
    "       has exactly the same length as the list passed as an argument)
    """

    def cl_str2id(self, attribute, strings):
        # INPUT: (STRING attribute, STRING_LIST strings)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns -1 for every string in <strings> that is not found in the
        # lexicon
        self.__send_WORD(CL_LEXICON_SIZE)
        self.__send_STRING(attribute)
        self.__send_STRING_LIST(strings)
        return self.__recv_response()

    def cl_id2str(self, attribute, id):
        # INPUT: (STRING attribute, INT_LIST id)
        # OUTPUT: CQI_DATA_STRING_LIST
        # returns "" for every ID in <id> that is out of range
        self.__send_WORD(CL_ID2STR)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(id)
        return self.__recv_response()

    def cl_id2freq(self, attribute, id):
        # INPUT: (STRING attribute, INT_LIST id)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns 0 for every ID in <id> that is out of range
        self.__send_WORD(CL_ID2FREQ)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(id)
        return self.__recv_response()

    def cl_cpos2id(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns -1 for every corpus position in <cpos> that is out of range
        self.__send_WORD(CL_ID2FREQ)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()

    def cl_cpos2str(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_STRING_LIST
        # returns "" for every corpus position in <cpos> that is out of range
        self.__send_WORD(CL_CPOS2STR)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()

    def cl_cpos2struc(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns -1 for every corpus position not inside a structure region
        self.__send_WORD(CL_CPOS2STRUC)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()

    """
    " NOTE: temporary addition for the Euralex2000 tutorial, but should
    "       probably be included in CQi specs
    """

    def cl_cpos2lbound(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns left boundary of s-attribute region enclosing cpos, -1 if not
        # in region
        self.__send_WORD(CL_CPOS2LBOUND)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()

    def cl_cpos2rbound(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns right boundary of s-attribute region enclosing cpos, -1 if
        # not in region
        self.__send_WORD(CL_CPOS2RBOUND)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()

    def cl_cpos2alg(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns -1 for every corpus position not inside an alignment
        self.__send_WORD(CL_CPOS2ALG)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()

    def cl_struc2str(self, attribute, strucs):
        # INPUT: (STRING attribute, INT_LIST strucs)
        # OUTPUT: CQI_DATA_STRING_LIST
        # returns annotated string values of structure regions in <strucs>; ""
        # if out of range
        # check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
        self.__send_WORD(CL_STRUC2STR)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(strucs)
        return self.__recv_response()

    """
    " NOTE: the following mappings take a single argument and return multiple
    "       values, including lists of arbitrary size
    """

    def cl_id2cpos(self, attribute, id):
        # INPUT: (STRING attribute, INT id)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns all corpus positions where the given token occurs
        self.__send_WORD(CL_ID2CPOS)
        self.__send_STRING(attribute)
        self.__send_INT(id)
        return self.__recv_response()

    def cl_idlist2cpos(self, attribute, id_list):
        # INPUT: (STRING attribute, INT_LIST id_list)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns all corpus positions where one of the tokens in <id_list>
        # occurs; the returned list is sorted as a whole, not per token id
        self.__send_WORD(CL_IDLIST2CPOS)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(id_list)
        return self.__recv_response()

    def cl_regex2id(self, attribute, regex):
        # INPUT: (STRING attribute, STRING regex)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns lexicon IDs of all tokens that match <regex>; the returned
        # list may be empty (size 0);
        self.__send_WORD(CL_REGEX2ID)
        self.__send_STRING(attribute)
        self.__send_STRING(regex)
        return self.__recv_response()

    def cl_struc2cpos(self, attribute, struc):
        # INPUT: (STRING attribute, INT struc)
        # OUTPUT: CQI_DATA_INT_INT
        # returns start and end corpus positions of structure region <struc>
        self.__send_WORD(CL_STRUC2CPOS)
        self.__send_STRING(attribute)
        self.__send_INT(struc)
        return self.__recv_response()

    def cl_alg2cpos(self, attribute, alg):
        # INPUT: (STRING attribute, INT alg)
        # OUTPUT: CQI_DATA_INT_INT_INT_INT
        # returns (src_start, src_end, target_start, target_end)
        self.__send_WORD(CL_ALG2CPOS)
        self.__send_STRING(attribute)
        self.__send_INT(alg)
        return self.__recv_response()

    def cqp_query(self, mother_corpus, subcorpus_name, query):
        # INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
        # OUTPUT: CQI_STATUS_OK
        # <query> must include the ';' character terminating the query.
        self.__send_WORD(CQP_QUERY)
        self.__send_STRING(mother_corpus)
        self.__send_STRING(subcorpus_name)
        self.__send_STRING(query)
        return self.__recv_response()

    def cqp_list_subcorpora(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        self.__send_WORD(CQP_LIST_SUBCORPORA)
        self.__send_STRING(corpus)
        return self.__recv_response()

    def cqp_subcorpus_size(self, subcorpus):
        # INPUT: (STRING subcorpus)
        # OUTPUT: CQI_DATA_INT
        self.__send_WORD(CQP_SUBCORPUS_SIZE)
        self.__send_STRING(subcorpus)
        return self.__recv_response()

    def cqp_subcorpus_has_field(self, subcorpus, field):
        # INPUT: (STRING subcorpus, BYTE field)
        # OUTPUT: CQI_DATA_BOOL
        self.__send_WORD(CQP_SUBCORPUS_HAS_FIELD)
        self.__send_STRING(subcorpus)
        self.__send_BYTE(field)
        return self.__recv_response()

    def cqp_dump_subcorpus(self, subcorpus, field, first, last):
        # INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
        # OUTPUT: CQI_DATA_INT_LIST
        # Dump the values of <field> for match ranges <first> .. <last>
        # in <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants.
        self.__send_WORD(CQP_DUMP_SUBCORPUS)
        self.__send_STRING(subcorpus)
        self.__send_BYTE(field)
        self.__send_INT(first)
        self.__send_INT(last)
        return self.__recv_response()

    def cqp_drop_subcorpus(self, subcorpus):
        # INPUT: (STRING subcorpus)
        # OUTPUT: CQI_STATUS_OK
        # delete a subcorpus from memory
        self.__send_WORD(CQP_DROP_SUBCORPUS)
        self.__send_STRING(subcorpus)
        return self.__recv_response()

    """
    " NOTE: The following two functions are temporarily included for the
    "       Euralex 2000 tutorial demo
    """

    def cqp_fdist_1(self, subcorpus, cutoff, field, attribute):
        """ NOTE: frequency distribution of single tokens """
        # INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
        # field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
        #                 CQI_CONST_FIELD_KEYWORD
        # NB: pairs are sorted by frequency desc.
        self.__send_WORD(CQP_FDIST_1)
        self.__send_STRING(subcorpus)
        self.__send_INT(cutoff)
        self.__send_BYTE(field)
        self.__send_STRING(attribute)
        return self.__recv_response()

    def cqp_fdist_2(self, subcorpus, cutoff, field1, attribute1, field2,
                    attribute2):
        """ NOTE: frequency distribution of pairs of tokens """
        # INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
        #         BYTE field2, STRING attribute2)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns <n> (id1, id2, frequency) pairs flattened into a list of size
        # 3*<n>
        # NB: triples are sorted by frequency desc.
        self.__send_WORD(CQP_FDIST_2)
        self.__send_STRING(subcorpus)
        self.__send_INT(cutoff)
        self.__send_BYTE(field1)
        self.__send_STRING(attribute1)
        self.__send_BYTE(field2)
        self.__send_STRING(attribute2)
        return self.__recv_response()

    def __recv_response(self):
        byte_data = self.__recv_WORD()
        response_type = byte_data >> 8
        if response_type == CL_ERROR:
            raise Exception(lookup[byte_data])
        elif response_type == CQP_ERROR:
            raise Exception(lookup[byte_data])
        elif response_type == DATA:
            return self.__recv_DATA(byte_data)
        elif response_type == ERROR:
            raise Exception(lookup[byte_data])
        elif response_type == STATUS:
            return byte_data
        else:
            raise Exception(
                'Unknown response type: {}'.format(hex(response_type))
            )

    def __recv_DATA(self, data_type):
        if data_type == DATA_BYTE:
            data = self.__recv_DATA_BYTE()
        elif data_type == DATA_BOOL:
            data = self.__recv_DATA_BOOL()
        elif data_type == DATA_INT:
            data = self.__recv_DATA_INT()
        elif data_type == DATA_STRING:
            data = self.__recv_DATA_STRING()
        elif data_type == DATA_BYTE_LIST:
            data = self.__recv_DATA_BYTE_LIST()
        elif data_type == DATA_BOOL_LIST:
            data = self.__recv_DATA_BOOL_LIST()
        elif data_type == DATA_INT_LIST:
            data = self.__recv_DATA_INT_LIST()
        elif data_type == DATA_STRING_LIST:
            data = self.__recv_DATA_STRING_LIST()
        elif data_type == DATA_INT_INT:
            data = self.__recv_DATA_INT_INT()
        elif data_type == DATA_INT_INT_INT_INT:
            data = self.__recv_DATA_INT_INT_INT_INT()
        elif data_type == DATA_INT_TABLE:
            data = self.__recv_DATA_INT_TABLE()
        else:
            raise Exception('Unknown data type: {}'.format(hex(data_type)))
        return data

    def __recv_DATA_BYTE(self):
        while True:
            if (len(self.socket.recv(1, socket.MSG_PEEK)) == 1):
                byte_data = self.socket.recv(1)
                break
            sleep(0.1)
        return struct.unpack('!B', byte_data)[0]

    def __recv_DATA_BOOL(self):
        while True:
            if (len(self.socket.recv(1, socket.MSG_PEEK)) == 1):
                byte_data = self.socket.recv(1)
                break
            sleep(0.1)
        return struct.unpack('!?', byte_data)[0]

    def __recv_DATA_INT(self):
        while True:
            if (len(self.socket.recv(4, socket.MSG_PEEK)) == 4):
                byte_data = self.socket.recv(4)
                break
            sleep(0.1)
        return struct.unpack('!i', byte_data)[0]

    def __recv_DATA_STRING(self):
        n = self.__recv_WORD()
        while True:
            if (len(self.socket.recv(n, socket.MSG_PEEK)) == n):
                byte_data = self.socket.recv(n)
                break
            sleep(0.1)
        return struct.unpack('!{}s'.format(n), byte_data)[0].decode()

    def __recv_DATA_BYTE_LIST(self):
        data = []
        n = self.__recv_DATA_INT()
        while n > 0:
            data.append(self.__recv_DATA_BYTE())
            n -= 1
        return data

    def __recv_DATA_BOOL_LIST(self):
        data = []
        n = self.__recv_DATA_INT()
        while n > 0:
            data.append(self.__recv_DATA_BOOL())
            n -= 1
        return data

    def __recv_DATA_INT_LIST(self):
        data = []
        n = self.__recv_DATA_INT()
        while n > 0:
            data.append(self.__recv_DATA_INT())
            n -= 1
        return data

    def __recv_DATA_STRING_LIST(self):
        data = []
        n = self.__recv_DATA_INT()
        while n > 0:
            data.append(self.__recv_DATA_STRING())
            n -= 1
        return data

    def __recv_DATA_INT_INT(self):
        return (self.__recv_DATA_INT(), self.__recv_DATA_INT())

    def __recv_DATA_INT_INT_INT_INT(self):
        return (self.__recv_DATA_INT(),
                self.__recv_DATA_INT(),
                self.__recv_DATA_INT(),
                self.__recv_DATA_INT())

    def __recv_DATA_INT_TABLE(self):
        rows = self.__recv_DATA_INT()
        columns = self.__recv_DATA_INT()
        data = []
        for i in range(0, rows):
            row = []
            for j in range(0, columns):
                row.append(self.__recv_DATA_INT())
            data.append(row)
        return data

    def __recv_WORD(self):
        while True:
            if (len(self.socket.recv(2, socket.MSG_PEEK)) == 2):
                byte_data = self.socket.recv(2)
                break
            sleep(0.1)
        return struct.unpack('!H', byte_data)[0]

    def __send_BYTE(self, byte_data):
        data = struct.pack('!B', byte_data)
        self.socket.sendall(data)

    def __send_BOOL(self, bool_data):
        data = struct.pack('!?', bool_data)
        self.socket.sendall(data)

    def __send_INT(self, int_data):
        data = struct.pack('!i', int_data)
        self.socket.sendall(data)

    def __send_STRING(self, string_data):
        encoded_string_data = string_data.encode('utf-8')
        n = len(encoded_string_data)
        data = struct.pack('!H{}s'.format(n), n, encoded_string_data)
        self.socket.sendall(data)

    def __send_INT_LIST(self, int_list_data):
        n = len(int_list_data)
        self.__send_INT(n)
        for int_data in int_list_data:
            self.__send_INT(int_data)

    def __send_STRING_LIST(self, string_list_data):
        n = len(string_list_data)
        self.__send_INT(n)
        for string_data in string_list_data:
            self.__send_STRING(string_data)

    def __send_WORD(self, word_data):
        data = struct.pack('!H', word_data)
        self.socket.sendall(data)