diff --git a/app/corpora/CQiWrapper/CQi.py b/app/corpora/CQiWrapper/CQi.py
index 5d39395a..62eebdb7 100644
--- a/app/corpora/CQiWrapper/CQi.py
+++ b/app/corpora/CQiWrapper/CQi.py
@@ -6,6 +6,8 @@
# Modified by: Patrick Jentsch
#
# ########################################################################### #
+import socket
+import struct
""" 1. padding """
@@ -404,3 +406,610 @@ lookup = {
5392: 'CQI_CQP_FDIST_1',
5393: 'CQI_CQP_FDIST_2'
}
+
+
+class Client:
+ def __init__(self, host='127.0.0.1', port=4877):
+ self.host = host
+ self.port = port
+ self.connection = socket.socket()
+ self.connection.connect((self.host, self.port))
+
+ def ctrl_connect(self, username, password):
+ # INPUT: (STRING username, STRING password)
+ # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
+ # print('CTRL_CONNECT')
+ self.__send_WORD(CTRL_CONNECT)
+ self.__send_STRING(username)
+ self.__send_STRING(password)
+ return self.__recv_response()
+
+ def ctrl_bye(self):
+ # INPUT: ()
+ # OUTPUT: CQI_STATUS_BYE_OK
+ # print('CTRL_BYE')
+ self.__send_WORD(CTRL_BYE)
+ return self.__recv_response()
+
+ def ctrl_user_abort(self):
+ # INPUT: ()
+ # OUTPUT:
+ # print('CTRL_USER_ABORT')
+ self.__send_WORD(CTRL_USER_ABORT)
+
+ def ctrl_ping(self):
+ # INPUT: ()
+ # OUTPUT: CQI_STATUS_PING_OK
+ # print('CTRL_PING')
+ self.__send_WORD(CTRL_PING)
+ return self.__recv_response()
+
+ def ctrl_last_general_error(self):
+ # INPUT: ()
+ # OUTPUT: CQI_DATA_STRING
+ # full-text error message for the last general error reported by the
+ # CQi server
+ # print('CTRL_LAST_GENERAL_ERROR')
+ self.__send_WORD(CTRL_LAST_GENERAL_ERROR)
+ return self.__recv_response()
+
+ def ask_feature_cqi_1_0(self):
+ # INPUT: ()
+ # OUTPUT: CQI_DATA_BOOL
+ # print('ASK_FEATURE_CQI_1_0')
+ self.__send_WORD(ASK_FEATURE_CQI_1_0)
+ return self.__recv_response()
+
+ def ask_feature_cl_2_3(self):
+ # INPUT: ()
+ # OUTPUT: CQI_DATA_BOOL
+ # print('ASK_FEATURE_CL_2_3')
+ self.__send_WORD(ASK_FEATURE_CL_2_3)
+ return self.__recv_response()
+
+ def ask_feature_cqp_2_3(self):
+ # INPUT: ()
+ # OUTPUT: CQI_DATA_BOOL
+ # print('ASK_FEATURE_CL_2_3')
+ self.__send_WORD(ASK_FEATURE_CL_2_3)
+ return self.__recv_response()
+
+ def corpus_list_coprora(self):
+ # INPUT: ()
+ # OUTPUT: CQI_DATA_STRING_LIST
+ # print('CORPUS_LIST_CORPORA')
+ self.__send_WORD(CORPUS_LIST_CORPORA)
+ return self.__recv_response()
+
+ def corpus_charset(self, corpus):
+ # INPUT: (STRING corpus)
+ # OUTPUT: CQI_DATA_STRING
+ # print('CORPUS_CHARSET')
+ self.__send_WORD(CORPUS_CHARSET)
+ self.__send_STRING(corpus)
+ return self.__recv_response()
+
+ def corpus_properties(self, corpus):
+ # INPUT: (STRING corpus)
+ # OUTPUT: CQI_DATA_STRING_LIST
+ # print('CORPUS_PROPERTIES')
+ self.__send_WORD(CORPUS_PROPERTIES)
+ self.__send_STRING(corpus)
+ return self.__recv_response()
+
+ def corpus_positional_attributes(self, corpus):
+ # INPUT: (STRING corpus)
+ # OUTPUT: CQI_DATA_STRING_LIST
+ # print('CORPUS_POSITIONAL_ATTRIBUTES')
+ self.__send_WORD(CORPUS_POSITIONAL_ATTRIBUTES)
+ self.__send_STRING(corpus)
+ return self.__recv_response()
+
+ def corpus_structural_attributes(self, corpus):
+ # INPUT: (STRING corpus)
+ # OUTPUT: CQI_DATA_STRING_LIST
+ # print('CORPUS_STRUCTURAL_ATTRIBUTES')
+ self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTES)
+ self.__send_STRING(corpus)
+ return self.__recv_response()
+
+ def corpus_structural_attribute_has_values(self, attribute):
+ # INPUT: (STRING attribute)
+ # OUTPUT: CQI_DATA_BOOL
+ # print('CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES')
+ self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
+ self.__send_STRING(attribute)
+ return self.__recv_response()
+
+ def corpus_alignment_attributes(self, corpus):
+ # INPUT: (STRING corpus)
+ # OUTPUT: CQI_DATA_STRING_LIST
+ # print('CORPUS_ALIGNMENT_ATTRIBUTES')
+ self.__send_WORD(CORPUS_ALIGNMENT_ATTRIBUTES)
+ self.__send_STRING(corpus)
+ return self.__recv_response()
+
+ def corpus_full_name(self, corpus):
+ # INPUT: (STRING corpus)
+ # OUTPUT: CQI_DATA_STRING
+ # the full name of as specified in its registry entry
+ # print('CORPUS_FULL_NAME')
+ self.__send_WORD(CORPUS_FULL_NAME)
+ self.__send_STRING(corpus)
+ return self.__recv_response()
+
+ def corpus_info(self, corpus):
+ # INPUT: (STRING corpus)
+ # OUTPUT: CQI_DATA_STRING_LIST
+ # returns the contents of the .info file of as a list of lines
+ # print('CORPUS_INFO')
+ self.__send_WORD(CORPUS_INFO)
+ self.__send_STRING(corpus)
+ return self.__recv_response()
+
+ def corpus_drop_corpus(self, corpus):
+ '''
+ ' Broken
+ ' TODO: Check what type of return value is provided by the server.
+ '''
+ # INPUT: (STRING corpus)
+ # OUTPUT: CQI_STATUS_OK
+ # try to unload a corpus and all its attributes from memory
+ # print('CORPUS_DROP_CORPUS')
+ self.__send_WORD(CORPUS_DROP_CORPUS)
+ self.__send_STRING(corpus)
+ return self.__recv_response()
+
+ def cl_attribute_size(self, attribute):
+ # INPUT: (STRING attribute)
+ # OUTPUT: CQI_DATA_INT
+ # returns the size of :
+ # number of tokens (positional)
+ # number of regions (structural)
+ # number of alignments (alignment)
+ # print('CL_ATTRIBUTE_SIZE')
+ self.__send_WORD(CL_ATTRIBUTE_SIZE)
+ self.__send_STRING(attribute)
+ return self.__recv_response()
+
+ def cl_lexicon_size(self, attribute):
+ # INPUT: (STRING attribute)
+ # OUTPUT: CQI_DATA_INT
+ # returns the number of entries in the lexicon of a positional
+ # attribute;
+ # valid lexicon IDs range from 0 .. (lexicon_size - 1)
+ # print('CL_LEXICON_SIZE')
+ self.__send_WORD(CL_LEXICON_SIZE)
+ self.__send_STRING(attribute)
+ return self.__recv_response()
+
+ def cl_drop_attribute(self, attribute):
+ # INPUT: (STRING attribute)
+ # OUTPUT: CQI_STATUS_OK
+ # unload attribute from memory
+ # print('CL_DROP_ATTRIBUTE')
+ self.__send_WORD(CL_LEXICON_SIZE)
+ self.__send_STRING(attribute)
+ return self.__recv_response()
+
+ """
+ " NOTE: simple (scalar) mappings are applied to lists (the returned list
+ " has exactly the same length as the list passed as an argument)
+ """
+
+ def cl_str2id(self, attribute, strings):
+ # INPUT: (STRING attribute, STRING_LIST strings)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns -1 for every string in that is not found in the
+ # lexicon
+ # print('CL_STR2ID')
+ self.__send_WORD(CL_LEXICON_SIZE)
+ self.__send_STRING(attribute)
+ self.__send_STRING_LIST(strings)
+ return self.__recv_response()
+
+ def cl_id2str(self, attribute, id):
+ # INPUT: (STRING attribute, INT_LIST id)
+ # OUTPUT: CQI_DATA_STRING_LIST
+ # returns "" for every ID in that is out of range
+ # print('CL_ID2STR')
+ self.__send_WORD(CL_ID2STR)
+ self.__send_STRING(attribute)
+ self.__send_INT_LIST(id)
+ return self.__recv_response()
+
+ def cl_id2freq(self, attribute, id):
+ # INPUT: (STRING attribute, INT_LIST id)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns 0 for every ID in that is out of range
+ # print('CL_ID2FREQ')
+ self.__send_WORD(CL_ID2FREQ)
+ self.__send_STRING(attribute)
+ self.__send_INT_LIST(id)
+ return self.__recv_response()
+
+ def cl_cpos2id(self, attribute, cpos):
+ # INPUT: (STRING attribute, INT_LIST cpos)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns -1 for every corpus position in that is out of range
+ # print('CL_CPOS2ID')
+ self.__send_WORD(CL_ID2FREQ)
+ self.__send_STRING(attribute)
+ self.__send_INT_LIST(cpos)
+ return self.__recv_response()
+
+ def cl_cpos2str(self, attribute, cpos):
+ # INPUT: (STRING attribute, INT_LIST cpos)
+ # OUTPUT: CQI_DATA_STRING_LIST
+ # returns "" for every corpus position in that is out of range
+ # print('CL_CPOS2STR')
+ self.__send_WORD(CL_CPOS2STR)
+ self.__send_STRING(attribute)
+ self.__send_INT_LIST(cpos)
+ return self.__recv_response()
+
+ def cl_cpos2struc(self, attribute, cpos):
+ # INPUT: (STRING attribute, INT_LIST cpos)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns -1 for every corpus position not inside a structure region
+ # print('CL_CPOS2STRUC')
+ self.__send_WORD(CL_CPOS2STRUC)
+ self.__send_STRING(attribute)
+ self.__send_INT_LIST(cpos)
+ return self.__recv_response()
+
+ """
+ " NOTE: temporary addition for the Euralex2000 tutorial, but should
+ " probably be included in CQi specs
+ """
+
+ def cl_cpos2lbound(self, attribute, cpos):
+ # INPUT: (STRING attribute, INT_LIST cpos)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns left boundary of s-attribute region enclosing cpos, -1 if not
+ # in region
+ # print('CL_CPOS2LBOUND')
+ self.__send_WORD(CL_CPOS2LBOUND)
+ self.__send_STRING(attribute)
+ self.__send_INT_LIST(cpos)
+ return self.__recv_response()
+
+ def cl_cpos2rbound(self, attribute, cpos):
+ # INPUT: (STRING attribute, INT_LIST cpos)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns right boundary of s-attribute region enclosing cpos, -1 if
+ # not in region
+ # print('CL_CPOS2RBOUND')
+ self.__send_WORD(CL_CPOS2RBOUND)
+ self.__send_STRING(attribute)
+ self.__send_INT_LIST(cpos)
+ return self.__recv_response()
+
+ def cl_cpos2alg(self, attribute, cpos):
+ # INPUT: (STRING attribute, INT_LIST cpos)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns -1 for every corpus position not inside an alignment
+ # print('CL_CPOS2ALG')
+ self.__send_WORD(CL_CPOS2ALG)
+ self.__send_STRING(attribute)
+ self.__send_INT_LIST(cpos)
+ return self.__recv_response()
+
+ def cl_struc2str(self, attribute, strucs):
+ # INPUT: (STRING attribute, INT_LIST strucs)
+ # OUTPUT: CQI_DATA_STRING_LIST
+ # returns annotated string values of structure regions in ; ""
+ # if out of range
+ # check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES() first
+ # print('CL_STRUC2STR')
+ self.__send_WORD(CL_STRUC2STR)
+ self.__send_STRING(attribute)
+ self.__send_INT_LIST(strucs)
+ return self.__recv_response()
+
+ """
+ " NOTE: the following mappings take a single argument and return multiple
+ " values, including lists of arbitrary size
+ """
+
+ def cl_id2cpos(self, attribute, id):
+ # INPUT: (STRING attribute, INT id)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns all corpus positions where the given token occurs
+ # print('CL_ID2CPOS')
+ self.__send_WORD(CL_ID2CPOS)
+ self.__send_STRING(attribute)
+ self.__send_INT(id)
+ return self.__recv_response()
+
+ def cl_idlist2cpos(self, attribute, id_list):
+ # INPUT: (STRING attribute, INT_LIST id_list)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns all corpus positions where one of the tokens in
+ # occurs; the returned list is sorted as a whole, not per token id
+ # print('CL_IDLIST2CPOS')
+ self.__send_WORD(CL_IDLIST2CPOS)
+ self.__send_STRING(attribute)
+ self.__send_INT_LIST(id_list)
+ return self.__recv_response()
+
+ def cl_regex2id(self, attribute, regex):
+ # INPUT: (STRING attribute, STRING regex)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns lexicon IDs of all tokens that match ; the returned
+ # list may be empty (size 0);
+ # print('CL_REGEX2ID')
+ self.__send_WORD(CL_REGEX2ID)
+ self.__send_STRING(attribute)
+ self.__send_STRING(regex)
+ return self.__recv_response()
+
+ def cl_struc2cpos(self, attribute, struc):
+ # INPUT: (STRING attribute, INT struc)
+ # OUTPUT: CQI_DATA_INT_INT
+ # returns start and end corpus positions of structure region
+ # print('CL_STRUC2CPOS')
+ self.__send_WORD(CL_STRUC2CPOS)
+ self.__send_STRING(attribute)
+ self.__send_INT(struc)
+ return self.__recv_response()
+
+ def cl_alg2cpos(self, attribute, alg):
+ # INPUT: (STRING attribute, INT alg)
+ # OUTPUT: CQI_DATA_INT_INT_INT_INT
+ # returns (src_start, src_end, target_start, target_end)
+ # print('CL_ALG2CPOS')
+ self.__send_WORD(CL_ALG2CPOS)
+ self.__send_STRING(attribute)
+ self.__send_INT(alg)
+ return self.__recv_response()
+
+ def cqp_query(self, mother_corpus, subcorpus_name, query):
+ # INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
+ # OUTPUT: CQI_STATUS_OK
+ # must include the ';' character terminating the query.
+ # print('CQP_QUERY')
+ self.__send_WORD(CQP_QUERY)
+ self.__send_STRING(mother_corpus)
+ self.__send_STRING(subcorpus_name)
+ self.__send_STRING(query)
+ return self.__recv_response()
+
+ def cqp_list_subcorpora(self, corpus):
+ # INPUT: (STRING corpus)
+ # OUTPUT: CQI_DATA_STRING_LIST
+ # print('CQP_LIST_SUBCORPORA')
+ self.__send_WORD(CQP_LIST_SUBCORPORA)
+ self.__send_STRING(corpus)
+ return self.__recv_response()
+
+ def cqp_subcorpus_size(self, subcorpus):
+ # INPUT: (STRING subcorpus)
+ # OUTPUT: CQI_DATA_INT
+ # print('CQP_SUBCORPUS_SIZE')
+ self.__send_WORD(CQP_SUBCORPUS_SIZE)
+ self.__send_STRING(subcorpus)
+ return self.__recv_response()
+
+ def cqp_subcorpus_has_field(self, subcorpus, field):
+ # INPUT: (STRING subcorpus, BYTE field)
+ # OUTPUT: CQI_DATA_BOOL
+ # print('CQP_SUBCORPUS_HAS_FIELD')
+ self.__send_WORD(CQP_SUBCORPUS_HAS_FIELD)
+ self.__send_STRING(subcorpus)
+ self.__send_BYTE(field)
+ return self.__recv_response()
+
+ def cqp_dump_subcorpus(self, subcorpus, field, first, last):
+ # INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # Dump the values of for match ranges ..
+ # in . is one of the CQI_CONST_FIELD_* constants.
+ # print('CQP_DUMP_SUBCORPUS')
+ self.__send_WORD(CQP_DUMP_SUBCORPUS)
+ self.__send_STRING(subcorpus)
+ self.__send_BYTE(field)
+ self.__send_INT(first)
+ self.__send_INT(last)
+ return self.__recv_response()
+
+ def cqp_drop_subcorpus(self, subcorpus):
+ # INPUT: (STRING subcorpus)
+ # OUTPUT: CQI_STATUS_OK
+ # delete a subcorpus from memory
+ # print('CQP_DROP_SUBCORPUS')
+ self.__send_WORD(CQP_DROP_SUBCORPUS)
+ self.__send_STRING(subcorpus)
+ return self.__recv_response()
+
+ """
+ " NOTE: The following two functions are temporarily included for the
+ " Euralex 2000 tutorial demo
+ """
+
+ def cqp_fdist_1(self, subcorpus, cutoff, field, attribute):
+ """ NOTE: frequency distribution of single tokens """
+ # INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns (id, frequency) pairs flattened into a list of size 2*
+ # field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
+ # CQI_CONST_FIELD_KEYWORD
+ # NB: pairs are sorted by frequency desc.
+ # print('CQP_FDIST_1')
+ self.__send_WORD(CQP_FDIST_1)
+ self.__send_STRING(subcorpus)
+ self.__send_INT(cutoff)
+ self.__send_BYTE(field)
+ self.__send_STRING(attribute)
+ return self.__recv_response()
+
+ def cqp_fdist_2(self, subcorpus, cutoff, field1, attribute1, field2,
+ attribute2):
+ """ NOTE: frequency distribution of pairs of tokens """
+ # INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
+ # BYTE field2, STRING attribute2)
+ # OUTPUT: CQI_DATA_INT_LIST
+ # returns (id1, id2, frequency) pairs flattened into a list of size
+ # 3*
+ # NB: triples are sorted by frequency desc.
+ # print('CQP_FDIST_2')
+ self.__send_WORD(CQP_FDIST_2)
+ self.__send_STRING(subcorpus)
+ self.__send_INT(cutoff)
+ self.__send_BYTE(field1)
+ self.__send_STRING(attribute1)
+ self.__send_BYTE(field2)
+ self.__send_STRING(attribute2)
+ return self.__recv_response()
+
+ def __recv_response(self):
+ byte_data = self.__recv_WORD()
+ response_type = byte_data >> 8
+ if response_type == CL_ERROR:
+ raise Exception(lookup[byte_data])
+ elif response_type == CQP_ERROR:
+ raise Exception(lookup[byte_data])
+ elif response_type == DATA:
+ return self.__recv_DATA(byte_data)
+ elif response_type == ERROR:
+ raise Exception(lookup[byte_data])
+ elif response_type == STATUS:
+ return byte_data
+ else:
+ raise Exception(
+ 'Unknown response type: {}'.format(hex(response_type))
+ )
+
+ def __recv_DATA(self, data_type):
+ if data_type == DATA_BYTE:
+ data = self.__recv_DATA_BYTE()
+ elif data_type == DATA_BOOL:
+ data = self.__recv_DATA_BOOL()
+ elif data_type == DATA_INT:
+ data = self.__recv_DATA_INT()
+ elif data_type == DATA_STRING:
+ data = self.__recv_DATA_STRING()
+ elif data_type == DATA_BYTE_LIST:
+ data = self.__recv_DATA_BYTE_LIST()
+ elif data_type == DATA_BOOL_LIST:
+ data = self.__recv_DATA_BOOL_LIST()
+ elif data_type == DATA_INT_LIST:
+ data = self.__recv_DATA_INT_LIST()
+ elif data_type == DATA_STRING_LIST:
+ data = self.__recv_DATA_STRING_LIST()
+ elif data_type == DATA_INT_INT:
+ data = self.__recv_DATA_INT_INT()
+ elif data_type == DATA_INT_INT_INT_INT:
+ data = self.__recv_DATA_INT_INT_INT_INT()
+ elif data_type == DATA_INT_TABLE:
+ data = self.__recv_DATA_INT_TABLE()
+ else:
+ raise Exception('Unknown data type: {}'.format(hex(data_type)))
+ return data
+
+ def __recv_DATA_BYTE(self):
+ byte_data = self.connection.recv(1, socket.MSG_WAITALL)
+ return struct.unpack('!B', byte_data)[0]
+
+ def __recv_DATA_BOOL(self):
+ byte_data = self.connection.recv(1, socket.MSG_WAITALL)
+ return struct.unpack('!?', byte_data)[0]
+
+ def __recv_DATA_INT(self):
+ byte_data = self.connection.recv(4, socket.MSG_WAITALL)
+ return struct.unpack('!i', byte_data)[0]
+
+ def __recv_DATA_STRING(self):
+ n = self.__recv_WORD()
+ byte_data = self.connection.recv(n, socket.MSG_WAITALL)
+ return struct.unpack('!{}s'.format(n), byte_data)[0].decode()
+
+ def __recv_DATA_BYTE_LIST(self):
+ data = []
+ n = self.__recv_DATA_INT()
+ while n > 0:
+ data.append(self.__recv_DATA_BYTE())
+ n -= 1
+ return data
+
+ def __recv_DATA_BOOL_LIST(self):
+ data = []
+ n = self.__recv_DATA_INT()
+ while n > 0:
+ data.append(self.__recv_DATA_BOOL())
+ n -= 1
+ return data
+
+ def __recv_DATA_INT_LIST(self):
+ data = []
+ n = self.__recv_DATA_INT()
+ while n > 0:
+ data.append(self.__recv_DATA_INT())
+ n -= 1
+ return data
+
+ def __recv_DATA_STRING_LIST(self):
+ data = []
+ n = self.__recv_DATA_INT()
+ while n > 0:
+ data.append(self.__recv_DATA_STRING())
+ n -= 1
+ return data
+
+ def __recv_DATA_INT_INT(self):
+ return (self.__recv_DATA_INT(), self.__recv_DATA_INT())
+
+ def __recv_DATA_INT_INT_INT_INT(self):
+ return (self.__recv_DATA_INT(),
+ self.__recv_DATA_INT(),
+ self.__recv_DATA_INT(),
+ self.__recv_DATA_INT())
+
+ def __recv_DATA_INT_TABLE(self):
+ rows = self.__recv_DATA_INT()
+ columns = self.__recv_DATA_INT()
+ data = []
+ for i in range(0, rows):
+ row = []
+ for j in range(0, columns):
+ row.append(self.__recv_DATA_INT())
+ data.append(row)
+ return data
+
+ def __recv_WORD(self):
+ byte_data = self.connection.recv(2, socket.MSG_WAITALL)
+ return struct.unpack('!H', byte_data)[0]
+
+ def __send_BYTE(self, byte_data):
+ data = struct.pack('!B', byte_data)
+ self.connection.sendall(data)
+
+ def __send_BOOL(self, bool_data):
+ data = struct.pack('!?', bool_data)
+ self.connection.sendall(data)
+
+ def __send_INT(self, int_data):
+ data = struct.pack('!i', int_data)
+ self.connection.sendall(data)
+
+ def __send_STRING(self, string_data):
+ encoded_string_data = string_data.encode('utf-8')
+ n = len(encoded_string_data)
+ data = struct.pack('!H{}s'.format(n), n, encoded_string_data)
+ self.connection.sendall(data)
+
+ def __send_INT_LIST(self, int_list_data):
+ n = len(int_list_data)
+ self.__send_INT(n)
+ for int_data in int_list_data:
+ self.__send_INT(int_data)
+
+ def __send_STRING_LIST(self, string_list_data):
+ n = len(string_list_data)
+ self.__send_INT(n)
+ for string_data in string_list_data:
+ self.__send_STRING(string_data)
+
+ def __send_WORD(self, word_data):
+ data = struct.pack('!H', word_data)
+ self.connection.sendall(data)
diff --git a/app/corpora/CQiWrapper/CQiClient.py b/app/corpora/CQiWrapper/CQiClient.py
deleted file mode 100644
index fc620eca..00000000
--- a/app/corpora/CQiWrapper/CQiClient.py
+++ /dev/null
@@ -1,620 +0,0 @@
-from . import CQi
-import socket
-import struct
-
-
-class CQiClient:
- def __init__(self, host='127.0.0.1', port=4877):
- self.host = host
- self.port = port
- self.connection = socket.socket()
- self.connection.connect((self.host, self.port))
-
- def ctrl_connect(self, username, password):
- # INPUT: (STRING username, STRING password)
- # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
- # print('CTRL_CONNECT')
- self.__send_WORD(CQi.CTRL_CONNECT)
- self.__send_STRING(username)
- self.__send_STRING(password)
- return self.__recv_response(CQi.STATUS_CONNECT_OK)
-
- def ctrl_bye(self):
- # INPUT: ()
- # OUTPUT: CQI_STATUS_BYE_OK
- # print('CTRL_BYE')
- self.__send_WORD(CQi.CTRL_BYE)
- return self.__recv_response(CQi.STATUS_BYE_OK)
-
- def ctrl_user_abort(self):
- # INPUT: ()
- # OUTPUT:
- # print('CTRL_USER_ABORT')
- self.__send_WORD(CQi.CTRL_USER_ABORT)
-
- def ctrl_ping(self):
- # INPUT: ()
- # OUTPUT: CQI_STATUS_PING_OK
- # print('CTRL_PING')
- self.__send_WORD(CQi.CTRL_PING)
- return self.__recv_response(CQi.STATUS_PING_OK)
-
- def ctrl_last_general_error(self):
- # INPUT: ()
- # OUTPUT: CQI_DATA_STRING
- # full-text error message for the last general error reported by the
- # CQi server
- # print('CTRL_LAST_GENERAL_ERROR')
- self.__send_WORD(CQi.CTRL_LAST_GENERAL_ERROR)
- return self.__recv_response(CQi.DATA_STRING)
-
- def ask_feature_cqi_1_0(self):
- # INPUT: ()
- # OUTPUT: CQI_DATA_BOOL
- # print('ASK_FEATURE_CQI_1_0')
- self.__send_WORD(CQi.ASK_FEATURE_CQI_1_0)
- return self.__recv_response(CQi.DATA_BOOL)
-
- def ask_feature_cl_2_3(self):
- # INPUT: ()
- # OUTPUT: CQI_DATA_BOOL
- # print('ASK_FEATURE_CL_2_3')
- self.__send_WORD(CQi.ASK_FEATURE_CL_2_3)
- return self.__recv_response(CQi.DATA_BOOL)
-
- def ask_feature_cqp_2_3(self):
- # INPUT: ()
- # OUTPUT: CQI_DATA_BOOL
- # print('ASK_FEATURE_CL_2_3')
- self.__send_WORD(CQi.ASK_FEATURE_CL_2_3)
- return self.__recv_response(CQi.DATA_BOOL)
-
- def corpus_list_coprora(self):
- # INPUT: ()
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_LIST_CORPORA')
- self.__send_WORD(CQi.CORPUS_LIST_CORPORA)
- return self.__recv_response(CQi.DATA_STRING_LIST)
-
- def corpus_charset(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING
- # print('CORPUS_CHARSET')
- self.__send_WORD(CQi.CORPUS_CHARSET)
- self.__send_STRING(corpus)
- return self.__recv_response(CQi.DATA_STRING)
-
- def corpus_properties(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_PROPERTIES')
- self.__send_WORD(CQi.CORPUS_PROPERTIES)
- self.__send_STRING(corpus)
- return self.__recv_response(CQi.DATA_STRING_LIST)
-
- def corpus_positional_attributes(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_POSITIONAL_ATTRIBUTES')
- self.__send_WORD(CQi.CORPUS_POSITIONAL_ATTRIBUTES)
- self.__send_STRING(corpus)
- return self.__recv_response(CQi.DATA_STRING_LIST)
-
- def corpus_structural_attributes(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_STRUCTURAL_ATTRIBUTES')
- self.__send_WORD(CQi.CORPUS_STRUCTURAL_ATTRIBUTES)
- self.__send_STRING(corpus)
- return self.__recv_response(CQi.DATA_STRING_LIST)
-
- def corpus_structural_attribute_has_values(self, attribute):
- # INPUT: (STRING attribute)
- # OUTPUT: CQI_DATA_BOOL
- # print('CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES')
- self.__send_WORD(CQi.CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
- self.__send_STRING(attribute)
- return self.__recv_response(CQi.DATA_BOOL)
-
- def corpus_alignment_attributes(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_ALIGNMENT_ATTRIBUTES')
- self.__send_WORD(CQi.CORPUS_ALIGNMENT_ATTRIBUTES)
- self.__send_STRING(corpus)
- return self.__recv_response(CQi.DATA_STRING_LIST)
-
- def corpus_full_name(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING
- # the full name of as specified in its registry entry
- # print('CORPUS_FULL_NAME')
- self.__send_WORD(CQi.CORPUS_FULL_NAME)
- self.__send_STRING(corpus)
- return self.__recv_response(CQi.DATA_STRING)
-
- def corpus_info(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # returns the contents of the .info file of as a list of lines
- # print('CORPUS_INFO')
- self.__send_WORD(CQi.CORPUS_INFO)
- self.__send_STRING(corpus)
- return self.__recv_response(CQi.DATA_STRING_LIST)
-
- def corpus_drop_corpus(self, corpus):
- '''
- ' Broken
- ' TODO: Check what type of return value is provided by the server.
- '''
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_STATUS_OK
- # try to unload a corpus and all its attributes from memory
- # print('CORPUS_DROP_CORPUS')
- self.__send_WORD(CQi.CORPUS_DROP_CORPUS)
- self.__send_STRING(corpus)
- return self.__recv_response(CQi.STATUS_OK)
-
- def cl_attribute_size(self, attribute):
- # INPUT: (STRING attribute)
- # OUTPUT: CQI_DATA_INT
- # returns the size of :
- # number of tokens (positional)
- # number of regions (structural)
- # number of alignments (alignment)
- # print('CL_ATTRIBUTE_SIZE')
- self.__send_WORD(CQi.CL_ATTRIBUTE_SIZE)
- self.__send_STRING(attribute)
- return self.__recv_response(CQi.DATA_INT)
-
- def cl_lexicon_size(self, attribute):
- # INPUT: (STRING attribute)
- # OUTPUT: CQI_DATA_INT
- # returns the number of entries in the lexicon of a positional
- # attribute;
- # valid lexicon IDs range from 0 .. (lexicon_size - 1)
- # print('CL_LEXICON_SIZE')
- self.__send_WORD(CQi.CL_LEXICON_SIZE)
- self.__send_STRING(attribute)
- return self.__recv_response(CQi.DATA_INT)
-
- def cl_drop_attribute(self, attribute):
- # INPUT: (STRING attribute)
- # OUTPUT: CQI_STATUS_OK
- # unload attribute from memory
- # print('CL_DROP_ATTRIBUTE')
- self.__send_WORD(CQi.CL_LEXICON_SIZE)
- self.__send_STRING(attribute)
- return self.__recv_response(CQi.STATUS_OK)
-
- """
- " NOTE: simple (scalar) mappings are applied to lists (the returned list
- " has exactly the same length as the list passed as an argument)
- """
-
- def cl_str2id(self, attribute, strings):
- # INPUT: (STRING attribute, STRING_LIST strings)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns -1 for every string in that is not found in the
- # lexicon
- # print('CL_STR2ID')
- self.__send_WORD(CQi.CL_LEXICON_SIZE)
- self.__send_STRING(attribute)
- self.__send_STRING_LIST(strings)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cl_id2str(self, attribute, id):
- # INPUT: (STRING attribute, INT_LIST id)
- # OUTPUT: CQI_DATA_STRING_LIST
- # returns "" for every ID in that is out of range
- # print('CL_ID2STR')
- self.__send_WORD(CQi.CL_ID2STR)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(id)
- return self.__recv_response(CQi.DATA_STRING_LIST)
-
- def cl_id2freq(self, attribute, id):
- # INPUT: (STRING attribute, INT_LIST id)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns 0 for every ID in that is out of range
- # print('CL_ID2FREQ')
- self.__send_WORD(CQi.CL_ID2FREQ)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(id)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cl_cpos2id(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns -1 for every corpus position in that is out of range
- # print('CL_CPOS2ID')
- self.__send_WORD(CQi.CL_ID2FREQ)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cl_cpos2str(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_STRING_LIST
- # returns "" for every corpus position in that is out of range
- # print('CL_CPOS2STR')
- self.__send_WORD(CQi.CL_CPOS2STR)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response(CQi.DATA_STRING_LIST)
-
- def cl_cpos2struc(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns -1 for every corpus position not inside a structure region
- # print('CL_CPOS2STRUC')
- self.__send_WORD(CQi.CL_CPOS2STRUC)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- """
- " NOTE: temporary addition for the Euralex2000 tutorial, but should
- " probably be included in CQi specs
- """
-
- def cl_cpos2lbound(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns left boundary of s-attribute region enclosing cpos, -1 if not
- # in region
- # print('CL_CPOS2LBOUND')
- self.__send_WORD(CQi.CL_CPOS2LBOUND)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cl_cpos2rbound(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns right boundary of s-attribute region enclosing cpos, -1 if
- # not in region
- # print('CL_CPOS2RBOUND')
- self.__send_WORD(CQi.CL_CPOS2RBOUND)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cl_cpos2alg(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns -1 for every corpus position not inside an alignment
- # print('CL_CPOS2ALG')
- self.__send_WORD(CQi.CL_CPOS2ALG)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cl_struc2str(self, attribute, strucs):
- # INPUT: (STRING attribute, INT_LIST strucs)
- # OUTPUT: CQI_DATA_STRING_LIST
- # returns annotated string values of structure regions in ; ""
- # if out of range
- # check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES() first
- # print('CL_STRUC2STR')
- self.__send_WORD(CQi.CL_STRUC2STR)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(strucs)
- return self.__recv_response(CQi.DATA_STRING_LIST)
-
- """
- " NOTE: the following mappings take a single argument and return multiple
- " values, including lists of arbitrary size
- """
-
- def cl_id2cpos(self, attribute, id):
- # INPUT: (STRING attribute, INT id)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns all corpus positions where the given token occurs
- # print('CL_ID2CPOS')
- self.__send_WORD(CQi.CL_ID2CPOS)
- self.__send_STRING(attribute)
- self.__send_INT(id)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cl_idlist2cpos(self, attribute, id_list):
- # INPUT: (STRING attribute, INT_LIST id_list)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns all corpus positions where one of the tokens in
- # occurs; the returned list is sorted as a whole, not per token id
- # print('CL_IDLIST2CPOS')
- self.__send_WORD(CQi.CL_IDLIST2CPOS)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(id_list)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cl_regex2id(self, attribute, regex):
- # INPUT: (STRING attribute, STRING regex)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns lexicon IDs of all tokens that match ; the returned
- # list may be empty (size 0);
- # print('CL_REGEX2ID')
- self.__send_WORD(CQi.CL_REGEX2ID)
- self.__send_STRING(attribute)
- self.__send_STRING(regex)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cl_struc2cpos(self, attribute, struc):
- # INPUT: (STRING attribute, INT struc)
- # OUTPUT: CQI_DATA_INT_INT
- # returns start and end corpus positions of structure region
- # print('CL_STRUC2CPOS')
- self.__send_WORD(CQi.CL_STRUC2CPOS)
- self.__send_STRING(attribute)
- self.__send_INT(struc)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cl_alg2cpos(self, attribute, alg):
- # INPUT: (STRING attribute, INT alg)
- # OUTPUT: CQI_DATA_INT_INT_INT_INT
- # returns (src_start, src_end, target_start, target_end)
- # print('CL_ALG2CPOS')
- self.__send_WORD(CQi.CL_ALG2CPOS)
- self.__send_STRING(attribute)
- self.__send_INT(alg)
- return self.__recv_response(CQi.DATA_INT_INT_INT_INT)
-
- def cqp_query(self, mother_corpus, subcorpus_name, query):
- # INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
- # OUTPUT: CQI_STATUS_OK
- # must include the ';' character terminating the query.
- # print('CQP_QUERY')
- self.__send_WORD(CQi.CQP_QUERY)
- self.__send_STRING(mother_corpus)
- self.__send_STRING(subcorpus_name)
- self.__send_STRING(query)
- return self.__recv_response(CQi.STATUS_OK)
-
- def cqp_list_subcorpora(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CQP_LIST_SUBCORPORA')
- self.__send_WORD(CQi.CQP_LIST_SUBCORPORA)
- self.__send_STRING(corpus)
- return self.__recv_response(CQi.DATA_STRING_LIST)
-
- def cqp_subcorpus_size(self, subcorpus):
- # INPUT: (STRING subcorpus)
- # OUTPUT: CQI_DATA_INT
- # print('CQP_SUBCORPUS_SIZE')
- self.__send_WORD(CQi.CQP_SUBCORPUS_SIZE)
- self.__send_STRING(subcorpus)
- return self.__recv_response(CQi.DATA_INT)
-
- def cqp_subcorpus_has_field(self, subcorpus, field):
- # INPUT: (STRING subcorpus, BYTE field)
- # OUTPUT: CQI_DATA_BOOL
- # print('CQP_SUBCORPUS_HAS_FIELD')
- self.__send_WORD(CQi.CQP_SUBCORPUS_HAS_FIELD)
- self.__send_STRING(subcorpus)
- self.__send_BYTE(field)
- return self.__recv_response(CQi.DATA_BOOL)
-
- def cqp_dump_subcorpus(self, subcorpus, field, first, last):
- # INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
- # OUTPUT: CQI_DATA_INT_LIST
- # Dump the values of for match ranges ..
- # in . is one of the CQI_CONST_FIELD_* constants.
- # print('CQP_DUMP_SUBCORPUS')
- self.__send_WORD(CQi.CQP_DUMP_SUBCORPUS)
- self.__send_STRING(subcorpus)
- self.__send_BYTE(field)
- self.__send_INT(first)
- self.__send_INT(last)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cqp_drop_subcorpus(self, subcorpus):
- # INPUT: (STRING subcorpus)
- # OUTPUT: CQI_STATUS_OK
- # delete a subcorpus from memory
- # print('CQP_DROP_SUBCORPUS')
- self.__send_WORD(CQi.CQP_DROP_SUBCORPUS)
- self.__send_STRING(subcorpus)
- return self.__recv_response(CQi.STATUS_OK)
-
- """
- " NOTE: The following two functions are temporarily included for the
- " Euralex 2000 tutorial demo
- """
-
- def cqp_fdist_1(self, subcorpus, cutoff, field, attribute):
- """ NOTE: frequency distribution of single tokens """
- # INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns (id, frequency) pairs flattened into a list of size 2*
- # field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
- # CQI_CONST_FIELD_KEYWORD
- # NB: pairs are sorted by frequency desc.
- # print('CQP_FDIST_1')
- self.__send_WORD(CQi.CQP_FDIST_1)
- self.__send_STRING(subcorpus)
- self.__send_INT(cutoff)
- self.__send_BYTE(field)
- self.__send_STRING(attribute)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def cqp_fdist_2(self, subcorpus, cutoff, field1, attribute1, field2,
- attribute2):
- """ NOTE: frequency distribution of pairs of tokens """
- # INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
- # BYTE field2, STRING attribute2)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns (id1, id2, frequency) pairs flattened into a list of size
- # 3*
- # NB: triples are sorted by frequency desc.
- # print('CQP_FDIST_2')
- self.__send_WORD(CQi.CQP_FDIST_2)
- self.__send_STRING(subcorpus)
- self.__send_INT(cutoff)
- self.__send_BYTE(field1)
- self.__send_STRING(attribute1)
- self.__send_BYTE(field2)
- self.__send_STRING(attribute2)
- return self.__recv_response(CQi.DATA_INT_LIST)
-
- def __recv_response(self, expected_response_type=None):
- byte_data = self.__recv_WORD()
- response_type = byte_data >> 8
- if response_type == CQi.CL_ERROR:
- raise Exception(CQi.lookup[byte_data])
- elif response_type == CQi.CQP_ERROR:
- raise Exception(CQi.lookup[byte_data])
- elif response_type == CQi.DATA:
- if (expected_response_type is not None
- and byte_data != expected_response_type):
- raise Exception("Expected different response type."
- + "Received: {}, ".format(byte_data)
- + "Expected {}".format(expected_response_type))
- return self.__recv_DATA(byte_data)
- elif response_type == CQi.ERROR:
- raise Exception(CQi.lookup[byte_data])
- elif response_type == CQi.STATUS:
- if (expected_response_type is not None
- and byte_data != expected_response_type):
- raise Exception("Expected different response type."
- + "Received: {}, ".format(byte_data)
- + "Expected {}".format(expected_response_type))
- return byte_data
- else:
- raise Exception(
- 'Unknown response type: {}'.format(hex(response_type))
- )
-
- def __recv_DATA(self, data_type):
- if data_type == CQi.DATA_BYTE:
- data = self.__recv_DATA_BYTE()
- elif data_type == CQi.DATA_BOOL:
- data = self.__recv_DATA_BOOL()
- elif data_type == CQi.DATA_INT:
- data = self.__recv_DATA_INT()
- elif data_type == CQi.DATA_STRING:
- data = self.__recv_DATA_STRING()
- elif data_type == CQi.DATA_BYTE_LIST:
- data = self.__recv_DATA_BYTE_LIST()
- elif data_type == CQi.DATA_BOOL_LIST:
- data = self.__recv_DATA_BOOL_LIST()
- elif data_type == CQi.DATA_INT_LIST:
- data = self.__recv_DATA_INT_LIST()
- elif data_type == CQi.DATA_STRING_LIST:
- data = self.__recv_DATA_STRING_LIST()
- elif data_type == CQi.DATA_INT_INT:
- data = self.__recv_DATA_INT_INT()
- elif data_type == CQi.DATA_INT_INT_INT_INT:
- data = self.__recv_DATA_INT_INT_INT_INT()
- elif data_type == CQi.DATA_INT_TABLE:
- data = self.__recv_DATA_INT_TABLE()
- else:
- raise Exception('Unknown data type: {}'.format(hex(data_type)))
- return data
-
- def __recv_DATA_BYTE(self):
- byte_data = self.connection.recv(1)
- return struct.unpack('!B', byte_data)[0]
-
- def __recv_DATA_BOOL(self):
- byte_data = self.connection.recv(1)
- return struct.unpack('!?', byte_data)[0]
-
- def __recv_DATA_INT(self):
- byte_data = self.connection.recv(4)
- return struct.unpack('!i', byte_data)[0]
-
- def __recv_DATA_STRING(self):
- n = self.__recv_WORD()
- byte_data = self.connection.recv(n)
- return struct.unpack('!{}s'.format(n), byte_data)[0].decode()
-
- def __recv_DATA_BYTE_LIST(self):
- data = []
- n = self.__recv_DATA_INT()
- while n > 0:
- data.append(self.__recv_DATA_BYTE())
- n -= 1
- return data
-
- def __recv_DATA_BOOL_LIST(self):
- data = []
- n = self.__recv_DATA_INT()
- while n > 0:
- data.append(self.__recv_DATA_BOOL())
- n -= 1
- return data
-
- def __recv_DATA_INT_LIST(self):
- data = []
- n = self.__recv_DATA_INT()
- while n > 0:
- data.append(self.__recv_DATA_INT())
- n -= 1
- return data
-
- def __recv_DATA_STRING_LIST(self):
- data = []
- n = self.__recv_DATA_INT()
- while n > 0:
- data.append(self.__recv_DATA_STRING())
- n -= 1
- return data
-
- def __recv_DATA_INT_INT(self):
- return (self.__recv_DATA_INT(), self.__recv_DATA_INT())
-
- def __recv_DATA_INT_INT_INT_INT(self):
- return (self.__recv_DATA_INT(),
- self.__recv_DATA_INT(),
- self.__recv_DATA_INT(),
- self.__recv_DATA_INT())
-
- def __recv_DATA_INT_TABLE(self):
- rows = self.__recv_DATA_INT()
- columns = self.__recv_DATA_INT()
- data = []
- for i in range(0, rows):
- row = []
- for j in range(0, columns):
- row.append(self.__recv_DATA_INT())
- data.append(row)
- return data
-
- def __recv_WORD(self):
- byte_data = self.connection.recv(2)
- return struct.unpack('!H', byte_data)[0]
-
- def __send_BYTE(self, byte_data):
- data = struct.pack('!B', byte_data)
- self.connection.sendall(data)
-
- def __send_BOOL(self, bool_data):
- data = struct.pack('!?', bool_data)
- self.connection.sendall(data)
-
- def __send_INT(self, int_data):
- data = struct.pack('!i', int_data)
- self.connection.sendall(data)
-
- def __send_STRING(self, string_data):
- encoded_string_data = string_data.encode('utf-8')
- n = len(encoded_string_data)
- data = struct.pack('!H{}s'.format(n), n, encoded_string_data)
- self.connection.sendall(data)
-
- def __send_INT_LIST(self, int_list_data):
- n = len(int_list_data)
- self.__send_INT(n)
- for int_data in int_list_data:
- self.__send_INT(int_data)
-
- def __send_STRING_LIST(self, string_list_data):
- n = len(string_list_data)
- self.__send_INT(n)
- for string_data in string_list_data:
- self.__send_STRING(string_data)
-
- def __send_WORD(self, word_data):
- data = struct.pack('!H', word_data)
- self.connection.sendall(data)
diff --git a/app/corpora/CQiWrapper/CQiWrapper.py b/app/corpora/CQiWrapper/CQiWrapper.py
index dd0d29bc..dcd5d8f0 100644
--- a/app/corpora/CQiWrapper/CQiWrapper.py
+++ b/app/corpora/CQiWrapper/CQiWrapper.py
@@ -1,10 +1,9 @@
-from .CQiClient import CQiClient
-from .CQi import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND
-import time
from app import logger # only works if imported into opaque web app
+from . import CQi
+import time
-class CQiWrapper(CQiClient):
+class CQiWrapper(CQi.Client):
'''
CQIiWrapper object
@@ -155,11 +154,11 @@ class CQiWrapper(CQiClient):
offset_end = min((self.nr_matches + result_offset - 1), self.match_count - 1)
logger.warning('Offset end is: {}'.format(offset_end))
match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus,
- CONST_FIELD_MATCH,
+ CQi.CONST_FIELD_MATCH,
offset_start,
offset_end),
self.cqp_dump_subcorpus(self.result_subcorpus,
- CONST_FIELD_MATCHEND,
+ CQi.CONST_FIELD_MATCHEND,
offset_start,
offset_end))