Add CQiWrapper

2025-07-27 06:11:34 +00:00 · 2019-11-07 15:48:47 +01:00
parent 13421a9e7f
commit 8e5e8408bd
3 changed files with 1257 additions and 0 deletions
--- a/app/corpora/CQiWrapper/CQi.py
+++ b/app/corpora/CQiWrapper/CQi.py
@@ -0,0 +1,406 @@
 # ########################################################################### #
 # IMS CQi specification                                                       #
 #                                                                             #
 # Version:       0.1a ;o)                                                     #
 # Author:        Stefan Evert (evert@ims.uni-stuttgart.de)                    #
 # Modified by:   Patrick Jentsch <p.jentsch@uni-bielefeld.de                  #
 # Modified date: Thurs Oct 10 <Uhrzeit>                                       #
 # ########################################################################### #
 """ 1. padding """
 PAD = 0x00
 """ 2. CQi responses """
 """ 2.1 CQI_STATUS_* """
 STATUS = 0x01
 STATUS_OK = 0x0101
 STATUS_CONNECT_OK = 0x0102
 STATUS_BYE_OK = 0x0103
 STATUS_PING_OK = 0x0104
 """ 2.2 CQI_ERROR_*  """
 ERROR = 0x02
 ERROR_GENERAL_ERROR = 0x0201
 ERROR_CONNECT_REFUSED = 0x0202
 ERROR_USER_ABORT = 0x0203
 ERROR_SYNTAX_ERROR = 0x0204
 # includes corpus/attribute/subcorpus specifier syntax
 """ 2.3 CQI_DATA_* """
 DATA = 0x03
 DATA_BYTE = 0x0301
 DATA_BOOL = 0x0302
 DATA_INT = 0x0303
 DATA_STRING = 0x0304
 DATA_BYTE_LIST = 0x0305
 DATA_BOOL_LIST = 0x0306
 DATA_INT_LIST = 0x0307
 DATA_STRING_LIST = 0x0308
 DATA_INT_INT = 0x0309
 DATA_INT_INT_INT_INT = 0x030A
 DATA_INT_TABLE = 0x030B
 """ 2.4 CQI_CL_ERROR_* """
 """
 " NOTE: some CL error codes are not represented in the CQi specs
 "       - usually because they're not used in the CL any more
 "       - CDA_ENOSTRING is not considered an error (returns -1)
 "       - CDA_EARGS: dynamic attribute calls not yet supported
 """
 CL_ERROR = 0x04
 CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401
 # returned if CQi server couldn't open attribute
 CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402
 # CDA_EATTTYPE
 CL_ERROR_OUT_OF_RANGE = 0x0403
 # CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG
 CL_ERROR_REGEX = 0x0404
 # CDA_EPATTERN (not used), CDA_EBADREGEX
 CL_ERROR_CORPUS_ACCESS = 0x0405
 # CDA_ENODATA
 CL_ERROR_OUT_OF_MEMORY = 0x0406
 # CDA_ENOMEM
 # this means the CQi server has run out of memory;
 # try discarding some other corpora and/or subcorpora
 CL_ERROR_INTERNAL = 0x0407
 # CDA_EOTHER, CDA_ENYI
 # this is the classical 'please contact technical support' error
 """ 2.5 CQI_CQP_ERROR_* """
 CQP_ERROR = 0x05
 # CQP error messages yet to be defined
 CQP_ERROR_GENERAL = 0x0501
 CQP_ERROR_NO_SUCH_CORPUS = 0x0502
 CQP_ERROR_INVALID_FIELD = 0x0503
 CQP_ERROR_OUT_OF_RANGE = 0x0504
 # various cases where a number is out of range
 """ 3. CQi commands """
 """ 3.1 CQI_CTRL_* """
 CTRL = 0x11
 CTRL_CONNECT = 0x1101
 # INPUT: (STRING username, STRING password)
 # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
 CTRL_BYE = 0x1102
 # INPUT: ()
 # OUTPUT: CQI_STATUS_BYE_OK
 CTRL_USER_ABORT = 0x1103
 # INPUT: ()
 # OUTPUT:
 CTRL_PING = 0x1104
 # INPUT: ()
 # OUTPUT: CQI_STATUS_PING_OK
 CTRL_LAST_GENERAL_ERROR = 0x1105
 # INPUT: ()
 # OUTPUT: CQI_DATA_STRING
 # full-text error message for the last general error reported by the CQi server
 """ 3.2 CQI_ASK_FEATURE_* """
 ASK_FEATURE = 0x12
 ASK_FEATURE_CQI_1_0 = 0x1201
 # INPUT: ()
 # OUTPUT: CQI_DATA_BOOL
 ASK_FEATURE_CL_2_3 = 0x1202
 # INPUT: ()
 # OUTPUT: CQI_DATA_BOOL
 ASK_FEATURE_CQP_2_3 = 0x1203
 # INPUT: ()
 # OUTPUT: CQI_DATA_BOOL
 """ 3.3 CQI_CORPUS_* """
 CORPUS = 0x13
 CORPUS_LIST_CORPORA = 0x1301
 # INPUT: ()
 # OUTPUT: CQI_DATA_STRING_LIST
 CORPUS_CHARSET = 0x1303
 # INPUT: (STRING corpus)
 # OUTPUT: CQI_DATA_STRING
 CORPUS_PROPERTIES = 0x1304
 # INPUT: (STRING corpus)
 # OUTPUT: CQI_DATA_STRING_LIST
 CORPUS_POSITIONAL_ATTRIBUTES = 0x1305
 # INPUT: (STRING corpus)
 # OUTPUT: CQI_DATA_STRING_LIST
 CORPUS_STRUCTURAL_ATTRIBUTES = 0x1306
 # INPUT: (STRING corpus)
 # OUTPUT: CQI_DATA_STRING_LIST
 CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES = 0x1307
 # INPUT: (STRING attribute)
 # OUTPUT: CQI_DATA_BOOL
 CORPUS_ALIGNMENT_ATTRIBUTES = 0x1308
 # INPUT: (STRING corpus)
 # OUTPUT: CQI_DATA_STRING_LIST
 CORPUS_FULL_NAME = 0x1309
 # INPUT: (STRING corpus)
 # OUTPUT: CQI_DATA_STRING
 # the full name of <corpus> as specified in its registry entry
 CORPUS_INFO = 0x130A
 # INPUT: (STRING corpus)
 # OUTPUT: CQI_DATA_STRING_LIST
 # returns the contents of the .info file of <corpus> as a list of lines
 CORPUS_DROP_CORPUS = 0x130B
 # INPUT: (STRING corpus)
 # OUTPUT: CQI_STATUS_OK
 # try to unload a corpus and all its attributes from memory
 """ 3.4 CQI_CL_* """
 CL = 0x14
 # low-level corpus access (CL functions)
 CL_ATTRIBUTE_SIZE = 0x1401
 # INPUT: (STRING attribute)
 # OUTPUT: CQI_DATA_INT
 # returns the size of <attribute>:
 # - number of tokens (positional)
 # - number of regions (structural)
 # - number of alignments (alignment)
 CL_LEXICON_SIZE = 0x1402
 # INPUT: (STRING attribute)
 # OUTPUT: CQI_DATA_INT
 # returns the number of entries in the lexicon of a positional attribute;
 # valid lexicon IDs range from 0 .. (lexicon_size - 1)
 CL_DROP_ATTRIBUTE = 0x1403
 # INPUT: (STRING attribute)
 # OUTPUT: CQI_STATUS_OK
 # unload attribute from memory
 """
 " NOTE: simple (scalar) mappings are applied to lists (the returned list has
 "       exactly the same length as the list passed as an argument)
 """
 CL_STR2ID = 0x1404
 # INPUT: (STRING attribute, STRING_LIST strings)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns -1 for every string in <strings> that is not found in the lexicon
 CL_ID2STR = 0x1405
 # INPUT: (STRING attribute, INT_LIST id)
 # OUTPUT: CQI_DATA_STRING_LIST
 # returns "" for every ID in <id> that is out of range
 CL_ID2FREQ = 0x1406
 # INPUT: (STRING attribute, INT_LIST id)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns 0 for every ID in <id> that is out of range
 CL_CPOS2ID = 0x1407
 # INPUT: (STRING attribute, INT_LIST cpos)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns -1 for every corpus position in <cpos> that is out of range
 CL_CPOS2STR = 0x1408
 # INPUT: (STRING attribute, INT_LIST cpos)
 # OUTPUT: CQI_DATA_STRING_LIST
 # returns "" for every corpus position in <cpos> that is out of range
 CL_CPOS2STRUC = 0x1409
 # INPUT: (STRING attribute, INT_LIST cpos)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns -1 for every corpus position not inside a structure region
 """
 " NOTE: temporary addition for the Euralex2000 tutorial, but should probably be
 "       included in CQi specs
 """
 CL_CPOS2LBOUND = 0x1420
 # INPUT: (STRING attribute, INT_LIST cpos)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns left boundary of s-attribute region enclosing cpos, -1 if not in
 # region
 CL_CPOS2RBOUND = 0x1421
 # INPUT: (STRING attribute, INT_LIST cpos)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns right boundary of s-attribute region enclosing cpos, -1 if not in
 # region
 CL_CPOS2ALG = 0x140A
 # INPUT: (STRING attribute, INT_LIST cpos)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns -1 for every corpus position not inside an alignment
 CL_STRUC2STR = 0x140B
 # INPUT: (STRING attribute, INT_LIST strucs)
 # OUTPUT: CQI_DATA_STRING_LIST
 # returns annotated string values of structure regions in <strucs>; "" if out
 # of range
 # check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
 """
 " NOTE: the following mappings take a single argument and return multiple
 "       values, including lists of arbitrary size
 """
 CL_ID2CPOS = 0x140C
 # INPUT: (STRING attribute, INT id)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns all corpus positions where the given token occurs
 CL_IDLIST2CPOS = 0x140D
 # INPUT: (STRING attribute, INT_LIST id_list)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns all corpus positions where one of the tokens in <id_list>
 # occurs; the returned list is sorted as a whole, not per token id
 CL_REGEX2ID = 0x140E
 # INPUT: (STRING attribute, STRING regex)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns lexicon IDs of all tokens that match <regex>; the returned
 # list may be empty (size 0);
 CL_STRUC2CPOS = 0x140F
 # INPUT: (STRING attribute, INT struc)
 # OUTPUT: CQI_DATA_INT_INT
 # returns start and end corpus positions of structure region <struc>
 CL_ALG2CPOS = 0x1410
 # INPUT: (STRING attribute, INT alg)
 # OUTPUT: CQI_DATA_INT_INT_INT_INT
 # returns (src_start, src_end, target_start, target_end)
 """ 3.5 CQI_CQP_* """
 CQP = 0x15
 CQP_QUERY = 0x1501
 # INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
 # OUTPUT: CQI_STATUS_OK
 # <query> must include the ';' character terminating the query.
 CQP_LIST_SUBCORPORA = 0x1502
 # INPUT: (STRING corpus)
 # OUTPUT: CQI_DATA_STRING_LIST
 CQP_SUBCORPUS_SIZE = 0x1503
 # INPUT: (STRING subcorpus)
 # OUTPUT: CQI_DATA_INT
 CQP_SUBCORPUS_HAS_FIELD = 0x1504
 # INPUT: (STRING subcorpus, BYTE field)
 # OUTPUT: CQI_DATA_BOOL
 CQP_DUMP_SUBCORPUS = 0x1505
 # INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
 # OUTPUT: CQI_DATA_INT_LIST
 # Dump the values of <field> for match ranges <first> .. <last> in <subcorpus>.
 # <field> is one of the CQI_CONST_FIELD_* constants.
 CQP_DROP_SUBCORPUS = 0x1509
 # INPUT: (STRING subcorpus)
 # OUTPUT: CQI_STATUS_OK
 # delete a subcorpus from memory
 """
 " NOTE: The following two functions are temporarily included for the Euralex
 "       2000 tutorial demo
 """
 """ NOTE: frequency distribution of single tokens """
 CQP_FDIST_1 = 0x1510
 # INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
 # field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
 # CQI_CONST_FIELD_KEYWORD
 # NB: pairs are sorted by frequency desc.
 """ NOTE: frequency distribution of pairs of tokens """
 CQP_FDIST_2 = 0x1511
 # INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
 #         BYTE field2, STRING attribute2)
 # OUTPUT: CQI_DATA_INT_LIST
 # returns <n> (id1, id2, frequency) pairs flattened into a list of size 3*<n>
 # NB: triples are sorted by frequency desc.
 """ 4. Constant Definitions """
 CONST_FALSE = 0x00
 CONST_NO = 0x00
 CONST_TRUE = 0x01
 CONST_YES = 0x01
 """
 " NOTE: The following constants specify which field will be returned by
 "       CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands.
 """
 CONST_FIELD_MATCH = 0x10
 CONST_FIELD_MATCHEND = 0x11
 """
 " NOTE: The constants specifiying target0 .. target9 are guaranteed to have the
 " numerical values 0 .. 9, so clients do not need to look up the constant
 " values if they're handling arbitrary targets.
 """
 CONST_FIELD_TARGET_0 = 0x00
 CONST_FIELD_TARGET_1 = 0x01
 CONST_FIELD_TARGET_2 = 0x02
 CONST_FIELD_TARGET_3 = 0x03
 CONST_FIELD_TARGET_4 = 0x04
 CONST_FIELD_TARGET_5 = 0x05
 CONST_FIELD_TARGET_6 = 0x06
 CONST_FIELD_TARGET_7 = 0x07
 CONST_FIELD_TARGET_8 = 0x08
 CONST_FIELD_TARGET_9 = 0x09
 """
 " NOTE: The following constants are provided for backward compatibility with
 "       traditional CQP field names & while the generalised target concept
 "       isn't yet implemented in the CQPserver.
 """
 CONST_FIELD_TARGET = 0x00
 CONST_FIELD_KEYWORD = 0x09
 """ NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """
 MAJOR_VERSION = 0x00
 MINOR_VERSION = 0x01
 """ 5. CQi lookup dictionary. """
 lookup = {
    257: 'CQI_STATUS_OK',
    258: 'CQI_STATUS_CONNECT_OK',
    259: 'CQI_STATUS_BYE_OK',
    260: 'CQI_STATUS_PING_OK',
    513: 'CQI_ERROR_GENERAL_ERROR',
    514: 'CQI_ERROR_CONNECT_REFUSED',
    515: 'CQI_ERROR_USER_ABORT',
    516: 'CQI_ERROR_SYNTAX_ERROR',
    769: 'CQI_DATA_BYTE',
    770: 'CQI_DATA_BOOL',
    771: 'CQI_DATA_INT',
    772: 'CQI_DATA_STRING',
    773: 'CQI_DATA_BYTE_LIST',
    774: 'CQI_DATA_BOOL_LIST',
    775: 'CQI_DATA_INT_LIST',
    776: 'CQI_DATA_STRING_LIST',
    777: 'CQI_DATA_INT_INT',
    778: 'CQI_DATA_INT_INT_INT_INT',
    779: 'CQI_DATA_INT_TABLE',
    1025: 'CQI_CL_ERROR_NO_SUCH_ATTRIBUTE',
    1026: 'CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE',
    1027: 'CQI_CL_ERROR_OUT_OF_RANGE',
    1028: 'CQI_CL_ERROR_REGEX',
    1029: 'CQI_CL_ERROR_CORPUS_ACCESS',
    1030: 'CQI_CL_ERROR_OUT_OF_MEMORY',
    1031: 'CQI_CL_ERROR_INTERNAL',
    1281: 'CQI_CQP_ERROR_GENERAL',
    1282: 'CQI_CQP_ERROR_NO_SUCH_CORPUS',
    1283: 'CQI_CQP_ERROR_INVALID_FIELD',
    1284: 'CQI_CQP_ERROR_OUT_OF_RANGE',
    4353: 'CQI_CTRL_CONNECT',
    4354: 'CQI_CTRL_BYE',
    4355: 'CQI_CTRL_USER_ABORT',
    4356: 'CQI_CTRL_PING',
    4357: 'CQI_CTRL_LAST_GENERAL_ERROR',
    4609: 'CQI_ASK_FEATURE_CQI_1_0',
    4610: 'CQI_ASK_FEATURE_CL_2_3',
    4611: 'CQI_ASK_FEATURE_CQP_2_3',
    4865: 'CQI_CORPUS_LIST_CORPORA',
    4867: 'CQI_CORPUS_CHARSET',
    4868: 'CQI_CORPUS_PROPERTIES',
    4869: 'CQI_CORPUS_POSITIONAL_ATTRIBUTES',
    4870: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTES',
    4871: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES',
    4872: 'CQI_CORPUS_ALIGNMENT_ATTRIBUTES',
    4873: 'CQI_CORPUS_FULL_NAME',
    4874: 'CQI_CORPUS_INFO',
    4875: 'CQI_CORPUS_DROP_CORPUS',
    5121: 'CQI_CL_ATTRIBUTE_SIZE',
    5122: 'CQI_CL_LEXICON_SIZE',
    5123: 'CQI_CL_DROP_ATTRIBUTE',
    5124: 'CQI_CL_STR2ID',
    5125: 'CQI_CL_ID2STR',
    5126: 'CQI_CL_ID2FREQ',
    5127: 'CQI_CL_CPOS2ID',
    5128: 'CQI_CL_CPOS2STR',
    5129: 'CQI_CL_CPOS2STRUC',
    5130: 'CQI_CL_CPOS2ALG',
    5131: 'CQI_CL_STRUC2STR',
    5132: 'CQI_CL_ID2CPOS',
    5133: 'CQI_CL_IDLIST2CPOS',
    5134: 'CQI_CL_REGEX2ID',
    5135: 'CQI_CL_STRUC2CPOS',
    5136: 'CQI_CL_ALG2CPOS',
    5152: 'CQI_CL_CPOS2LBOUND',
    5153: 'CQI_CL_CPOS2RBOUND',
    5377: 'CQI_CQP_QUERY',
    5378: 'CQI_CQP_LIST_SUBCORPORA',
    5379: 'CQI_CQP_SUBCORPUS_SIZE',
    5380: 'CQI_CQP_SUBCORPUS_HAS_FIELD',
    5381: 'CQI_CQP_DUMP_SUBCORPUS',
    5385: 'CQI_CQP_DROP_SUBCORPUS',
    5392: 'CQI_CQP_FDIST_1',
    5393: 'CQI_CQP_FDIST_2'
 }
--- a/app/corpora/CQiWrapper/CQiClient.py
+++ b/app/corpora/CQiWrapper/CQiClient.py
@@ -0,0 +1,611 @@
 from . import CQi
 import socket
 import struct
 class CQiClient:
    def __init__(self, host='127.0.0.1', port=4877):
        self.host = host
        self.port = port
        self.connection = socket.socket()
        self.connection.connect((self.host, self.port))
    def ctrl_connect(self, username, password):
        # INPUT: (STRING username, STRING password)
        # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
        # print('CTRL_CONNECT')
        self.__send_WORD(CQi.CTRL_CONNECT)
        self.__send_STRING(username)
        self.__send_STRING(password)
        self.__recv_response()
    def ctrl_bye(self):
        # INPUT: ()
        # OUTPUT: CQI_STATUS_BYE_OK
        # print('CTRL_BYE')
        self.__send_WORD(CQi.CTRL_BYE)
        self.__recv_response()
    def ctrl_user_abort(self):
        # INPUT: ()
        # OUTPUT:
        # print('CTRL_USER_ABORT')
        self.__send_WORD(CQi.CTRL_USER_ABORT)
    def ctrl_ping(self):
        # INPUT: ()
        # OUTPUT: CQI_STATUS_PING_OK
        # print('CTRL_PING')
        self.__send_WORD(CQi.CTRL_PING)
        self.__recv_response()
    def ctrl_last_general_error(self):
        # INPUT: ()
        # OUTPUT: CQI_DATA_STRING
        # full-text error message for the last general error reported by the
        # CQi server
        # print('CTRL_LAST_GENERAL_ERROR')
        self.__send_WORD(CQi.CTRL_LAST_GENERAL_ERROR)
        return self.__recv_response()
    def ask_feature_cqi_1_0(self):
        # INPUT: ()
        # OUTPUT: CQI_DATA_BOOL
        # print('ASK_FEATURE_CQI_1_0')
        self.__send_WORD(CQi.ASK_FEATURE_CQI_1_0)
        return self.__recv_response()
    def ask_feature_cl_2_3(self):
        # INPUT: ()
        # OUTPUT: CQI_DATA_BOOL
        # print('ASK_FEATURE_CL_2_3')
        self.__send_WORD(CQi.ASK_FEATURE_CL_2_3)
        return self.__recv_response()
    def ask_feature_cqp_2_3(self):
        # INPUT: ()
        # OUTPUT: CQI_DATA_BOOL
        # print('ASK_FEATURE_CL_2_3')
        self.__send_WORD(CQi.ASK_FEATURE_CL_2_3)
        return self.__recv_response()
    def corpus_list_coprora(self):
        # INPUT: ()
        # OUTPUT: CQI_DATA_STRING_LIST
        # print('CORPUS_LIST_CORPORA')
        self.__send_WORD(CQi.CORPUS_LIST_CORPORA)
        return self.__recv_response()
    def corpus_charset(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING
        # print('CORPUS_CHARSET')
        self.__send_WORD(CQi.CORPUS_CHARSET)
        self.__send_STRING(corpus)
        return self.__recv_response()
    def corpus_properties(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        # print('CORPUS_PROPERTIES')
        self.__send_WORD(CQi.CORPUS_PROPERTIES)
        self.__send_STRING(corpus)
        return self.__recv_response()
    def corpus_positional_attributes(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        # print('CORPUS_POSITIONAL_ATTRIBUTES')
        self.__send_WORD(CQi.CORPUS_POSITIONAL_ATTRIBUTES)
        self.__send_STRING(corpus)
        return self.__recv_response()
    def corpus_structural_attributes(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        # print('CORPUS_STRUCTURAL_ATTRIBUTES')
        self.__send_WORD(CQi.CORPUS_STRUCTURAL_ATTRIBUTES)
        self.__send_STRING(corpus)
        return self.__recv_response()
    def corpus_structural_attribute_has_values(self, attribute):
        # INPUT: (STRING attribute)
        # OUTPUT: CQI_DATA_BOOL
        # print('CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES')
        self.__send_WORD(CQi.CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
        self.__send_STRING(attribute)
        return self.__recv_response()
    def corpus_alignment_attributes(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        # print('CORPUS_ALIGNMENT_ATTRIBUTES')
        self.__send_WORD(CQi.CORPUS_ALIGNMENT_ATTRIBUTES)
        self.__send_STRING(corpus)
        return self.__recv_response()
    def corpus_full_name(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING
        # the full name of <corpus> as specified in its registry entry
        # print('CORPUS_FULL_NAME')
        self.__send_WORD(CQi.CORPUS_FULL_NAME)
        self.__send_STRING(corpus)
        return self.__recv_response()
    def corpus_info(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        # returns the contents of the .info file of <corpus> as a list of lines
        # print('CORPUS_INFO')
        self.__send_WORD(CQi.CORPUS_INFO)
        self.__send_STRING(corpus)
        return self.__recv_response()
    def corpus_drop_corpus(self, corpus):
        '''
        ' Broken
        ' TODO: Check what type of return value is provided by the server.
        '''
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_STATUS_OK
        # try to unload a corpus and all its attributes from memory
        # print('CORPUS_DROP_CORPUS')
        self.__send_WORD(CQi.CORPUS_DROP_CORPUS)
        self.__send_STRING(corpus)
        self.__recv_response()
    def cl_attribute_size(self, attribute):
        # INPUT: (STRING attribute)
        # OUTPUT: CQI_DATA_INT
        # returns the size of <attribute>:
        #     number of tokens        (positional)
        #     number of regions       (structural)
        #     number of alignments    (alignment)
        # print('CL_ATTRIBUTE_SIZE')
        self.__send_WORD(CQi.CL_ATTRIBUTE_SIZE)
        self.__send_STRING(attribute)
        return self.__recv_response()
    def cl_lexicon_size(self, attribute):
        # INPUT: (STRING attribute)
        # OUTPUT: CQI_DATA_INT
        # returns the number of entries in the lexicon of a positional
        # attribute;
        # valid lexicon IDs range from 0 .. (lexicon_size - 1)
        # print('CL_LEXICON_SIZE')
        self.__send_WORD(CQi.CL_LEXICON_SIZE)
        self.__send_STRING(attribute)
        return self.__recv_response()
    def cl_drop_attribute(self, attribute):
        # INPUT: (STRING attribute)
        # OUTPUT: CQI_STATUS_OK
        # unload attribute from memory
        # print('CL_DROP_ATTRIBUTE')
        self.__send_WORD(CQi.CL_LEXICON_SIZE)
        self.__send_STRING(attribute)
        self.__recv_response()
    """
    " NOTE: simple (scalar) mappings are applied to lists (the returned list
    "       has exactly the same length as the list passed as an argument)
    """
    def cl_str2id(self, attribute, strings):
        # INPUT: (STRING attribute, STRING_LIST strings)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns -1 for every string in <strings> that is not found in the
        # lexicon
        # print('CL_STR2ID')
        self.__send_WORD(CQi.CL_LEXICON_SIZE)
        self.__send_STRING(attribute)
        self.__send_STRING_LIST(strings)
        return self.__recv_response()
    def cl_id2str(self, attribute, id):
        # INPUT: (STRING attribute, INT_LIST id)
        # OUTPUT: CQI_DATA_STRING_LIST
        # returns "" for every ID in <id> that is out of range
        # print('CL_ID2STR')
        self.__send_WORD(CQi.CL_ID2STR)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(id)
        return self.__recv_response()
    def cl_id2freq(self, attribute, id):
        # INPUT: (STRING attribute, INT_LIST id)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns 0 for every ID in <id> that is out of range
        # print('CL_ID2FREQ')
        self.__send_WORD(CQi.CL_ID2FREQ)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(id)
        return self.__recv_response()
    def cl_cpos2id(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns -1 for every corpus position in <cpos> that is out of range
        # print('CL_CPOS2ID')
        self.__send_WORD(CQi.CL_ID2FREQ)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()
    def cl_cpos2str(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_STRING_LIST
        # returns "" for every corpus position in <cpos> that is out of range
        # print('CL_CPOS2STR')
        self.__send_WORD(CQi.CL_CPOS2STR)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()
    def cl_cpos2struc(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns -1 for every corpus position not inside a structure region
        # print('CL_CPOS2STRUC')
        self.__send_WORD(CQi.CL_CPOS2STRUC)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()
    """
    " NOTE: temporary addition for the Euralex2000 tutorial, but should
    "       probably be included in CQi specs
    """
    def cl_cpos2lbound(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns left boundary of s-attribute region enclosing cpos, -1 if not
        # in region
        # print('CL_CPOS2LBOUND')
        self.__send_WORD(CQi.CL_CPOS2LBOUND)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()
    def cl_cpos2rbound(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns right boundary of s-attribute region enclosing cpos, -1 if
        # not in region
        # print('CL_CPOS2RBOUND')
        self.__send_WORD(CQi.CL_CPOS2RBOUND)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()
    def cl_cpos2alg(self, attribute, cpos):
        # INPUT: (STRING attribute, INT_LIST cpos)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns -1 for every corpus position not inside an alignment
        # print('CL_CPOS2ALG')
        self.__send_WORD(CQi.CL_CPOS2ALG)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(cpos)
        return self.__recv_response()
    def cl_struc2str(self, attribute, strucs):
        # INPUT: (STRING attribute, INT_LIST strucs)
        # OUTPUT: CQI_DATA_STRING_LIST
        # returns annotated string values of structure regions in <strucs>; ""
        # if out of range
        # check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
        # print('CL_STRUC2STR')
        self.__send_WORD(CQi.CL_STRUC2STR)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(strucs)
        return self.__recv_response()
    """
    " NOTE: the following mappings take a single argument and return multiple
    "       values, including lists of arbitrary size
    """
    def cl_id2cpos(self, attribute, id):
        # INPUT: (STRING attribute, INT id)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns all corpus positions where the given token occurs
        # print('CL_ID2CPOS')
        self.__send_WORD(CQi.CL_ID2CPOS)
        self.__send_STRING(attribute)
        self.__send_INT(id)
        return self.__recv_response()
    def cl_idlist2cpos(self, attribute, id_list):
        # INPUT: (STRING attribute, INT_LIST id_list)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns all corpus positions where one of the tokens in <id_list>
        # occurs; the returned list is sorted as a whole, not per token id
        # print('CL_IDLIST2CPOS')
        self.__send_WORD(CQi.CL_IDLIST2CPOS)
        self.__send_STRING(attribute)
        self.__send_INT_LIST(id_list)
        return self.__recv_response()
    def cl_regex2id(self, attribute, regex):
        # INPUT: (STRING attribute, STRING regex)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns lexicon IDs of all tokens that match <regex>; the returned
        # list may be empty (size 0);
        # print('CL_REGEX2ID')
        self.__send_WORD(CQi.CL_REGEX2ID)
        self.__send_STRING(attribute)
        self.__send_STRING(regex)
        return self.__recv_response()
    def cl_struc2cpos(self, attribute, struc):
        # INPUT: (STRING attribute, INT struc)
        # OUTPUT: CQI_DATA_INT_INT
        # returns start and end corpus positions of structure region <struc>
        # print('CL_STRUC2CPOS')
        self.__send_WORD(CQi.CL_STRUC2CPOS)
        self.__send_STRING(attribute)
        self.__send_INT(struc)
        return self.__recv_response()
    def cl_alg2cpos(self, attribute, alg):
        # INPUT: (STRING attribute, INT alg)
        # OUTPUT: CQI_DATA_INT_INT_INT_INT
        # returns (src_start, src_end, target_start, target_end)
        # print('CL_ALG2CPOS')
        self.__send_WORD(CQi.CL_ALG2CPOS)
        self.__send_STRING(attribute)
        self.__send_INT(alg)
        return self.__recv_response()
    def cqp_query(self, mother_corpus, subcorpus_name, query):
        # INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
        # OUTPUT: CQI_STATUS_OK
        # <query> must include the ';' character terminating the query.
        # print('CQP_QUERY')
        self.__send_WORD(CQi.CQP_QUERY)
        self.__send_STRING(mother_corpus)
        self.__send_STRING(subcorpus_name)
        self.__send_STRING(query)
        self.__recv_WORD()
    def cqp_list_subcorpora(self, corpus):
        # INPUT: (STRING corpus)
        # OUTPUT: CQI_DATA_STRING_LIST
        # print('CQP_LIST_SUBCORPORA')
        self.__send_WORD(CQi.CQP_LIST_SUBCORPORA)
        self.__send_STRING(corpus)
        return self.__recv_response()
    def cqp_subcorpus_size(self, subcorpus):
        # INPUT: (STRING subcorpus)
        # OUTPUT: CQI_DATA_INT
        # print('CQP_SUBCORPUS_SIZE')
        self.__send_WORD(CQi.CQP_SUBCORPUS_SIZE)
        self.__send_STRING(subcorpus)
        return self.__recv_response()
    def cqp_subcorpus_has_field(self, subcorpus, field):
        # INPUT: (STRING subcorpus, BYTE field)
        # OUTPUT: CQI_DATA_BOOL
        # print('CQP_SUBCORPUS_HAS_FIELD')
        self.__send_WORD(CQi.CQP_SUBCORPUS_HAS_FIELD)
        self.__send_STRING(subcorpus)
        self.__send_BYTE(field)
        return self.__recv_response()
    def cqp_dump_subcorpus(self, subcorpus, field, first, last):
        # INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
        # OUTPUT: CQI_DATA_INT_LIST
        # Dump the values of <field> for match ranges <first> .. <last>
        # in <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants.
        # print('CQP_DUMP_SUBCORPUS')
        self.__send_WORD(CQi.CQP_DUMP_SUBCORPUS)
        self.__send_STRING(subcorpus)
        self.__send_BYTE(field)
        self.__send_INT(first)
        self.__send_INT(last)
        return self.__recv_response()
    def cqp_drop_subcorpus(self, subcorpus):
        # INPUT: (STRING subcorpus)
        # OUTPUT: CQI_STATUS_OK
        # delete a subcorpus from memory
        # print('CQP_DROP_SUBCORPUS')
        self.__send_WORD(CQi.CQP_DROP_SUBCORPUS)
        self.__send_STRING(subcorpus)
        self.__recv_response()
    """
    " NOTE: The following two functions are temporarily included for the
    "       Euralex 2000 tutorial demo
    """
    def cqp_fdist_1(self, subcorpus, cutoff, field, attribute):
        """ NOTE: frequency distribution of single tokens """
        # INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
        # field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
        #                 CQI_CONST_FIELD_KEYWORD
        # NB: pairs are sorted by frequency desc.
        # print('CQP_FDIST_1')
        self.__send_WORD(CQi.CQP_FDIST_1)
        self.__send_STRING(subcorpus)
        self.__send_INT(cutoff)
        self.__send_BYTE(field)
        self.__send_STRING(attribute)
        return self.__recv_response()
    def cqp_fdist_2(self, subcorpus, cutoff, field1, attribute1, field2,
                    attribute2):
        """ NOTE: frequency distribution of pairs of tokens """
        # INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
        #         BYTE field2, STRING attribute2)
        # OUTPUT: CQI_DATA_INT_LIST
        # returns <n> (id1, id2, frequency) pairs flattened into a list of size
        # 3*<n>
        # NB: triples are sorted by frequency desc.
        # print('CQP_FDIST_2')
        self.__send_WORD(CQi.CQP_FDIST_2)
        self.__send_STRING(subcorpus)
        self.__send_INT(cutoff)
        self.__send_BYTE(field1)
        self.__send_STRING(attribute1)
        self.__send_BYTE(field2)
        self.__send_STRING(attribute2)
        return self.__recv_response()
    def __recv_response(self):
        byte_data = self.__recv_WORD()
        response_type = byte_data >> 8
        if response_type == CQi.STATUS:
            response = byte_data
        elif response_type == CQi.ERROR:
            raise Exception(CQi.lookup[byte_data])
        elif response_type == CQi.DATA:
            response = self.__recv_DATA(byte_data)
        elif response_type == CQi.CL_ERROR:
            raise Exception(CQi.lookup[byte_data])
        elif response_type == CQi.CQP_ERROR:
            raise Exception(CQi.lookup[byte_data])
        else:
            raise Exception(
                'Unknown response type: {}'.format(hex(response_type))
            )
        return response
    def __recv_DATA(self, data_type):
        if data_type == CQi.DATA_BYTE:
            data = self.__recv_DATA_BYTE()
        elif data_type == CQi.DATA_BOOL:
            data = self.__recv_DATA_BOOL()
        elif data_type == CQi.DATA_INT:
            data = self.__recv_DATA_INT()
        elif data_type == CQi.DATA_STRING:
            data = self.__recv_DATA_STRING()
        elif data_type == CQi.DATA_BYTE_LIST:
            data = self.__recv_DATA_BYTE_LIST()
        elif data_type == CQi.DATA_BOOL_LIST:
            data = self.__recv_DATA_BOOL_LIST()
        elif data_type == CQi.DATA_INT_LIST:
            data = self.__recv_DATA_INT_LIST()
        elif data_type == CQi.DATA_STRING_LIST:
            data = self.__recv_DATA_STRING_LIST()
        elif data_type == CQi.DATA_INT_INT:
            data = self.__recv_DATA_INT_INT()
        elif data_type == CQi.DATA_INT_INT_INT_INT:
            data = self.__recv_DATA_INT_INT_INT_INT()
        elif data_type == CQi.DATA_INT_TABLE:
            data = self.__recv_DATA_INT_TABLE()
        else:
            raise Exception('Unknown data type: {}'.format(hex(data_type)))
        return data
    def __recv_DATA_BYTE(self):
        byte_data = self.connection.recv(1)
        return struct.unpack('!B', byte_data)[0]
    def __recv_DATA_BOOL(self):
        byte_data = self.connection.recv(1)
        return struct.unpack('!?', byte_data)[0]
    def __recv_DATA_INT(self):
        byte_data = self.connection.recv(4)
        return struct.unpack('!i', byte_data)[0]
    def __recv_DATA_STRING(self):
        n = self.__recv_WORD()
        byte_data = self.connection.recv(n)
        return struct.unpack('!{}s'.format(n), byte_data)[0].decode()
    def __recv_DATA_BYTE_LIST(self):
        data = []
        n = self.__recv_DATA_INT()
        while n > 0:
            data.append(self.__recv_DATA_BYTE())
            n -= 1
        return data
    def __recv_DATA_BOOL_LIST(self):
        data = []
        n = self.__recv_DATA_INT()
        while n > 0:
            data.append(self.__recv_DATA_BOOL())
            n -= 1
        return data
    def __recv_DATA_INT_LIST(self):
        data = []
        n = self.__recv_DATA_INT()
        while n > 0:
            data.append(self.__recv_DATA_INT())
            n -= 1
        return data
    def __recv_DATA_STRING_LIST(self):
        data = []
        n = self.__recv_DATA_INT()
        while n > 0:
            data.append(self.__recv_DATA_STRING())
            n -= 1
        return data
    def __recv_DATA_INT_INT(self):
        return (self.__recv_INT(), self.__recv_INT())
    def __recv_DATA_INT_INT_INT_INT(self):
        return (self.__recv_INT(),
                self.__recv_INT(),
                self.__recv_INT(),
                self.__recv_INT())
    def __recv_DATA_INT_TABLE(self):
        rows = self.__recv_DATA_INT()
        columns = self.__recv_DATA_INT()
        data = []
        for i in range(0, rows):
            row = []
            for j in range(0, columns):
                row.append(self.__recv_DATA_INT())
            data.append(row)
        return data
    def __recv_WORD(self):
        byte_data = self.connection.recv(2)
        return struct.unpack('!H', byte_data)[0]
    def __send_BYTE(self, byte_data):
        data = struct.pack('!B', byte_data)
        self.connection.sendall(data)
    def __send_BOOL(self, bool_data):
        data = struct.pack('!?', bool_data)
        self.connection.sendall(data)
    def __send_INT(self, int_data):
        data = struct.pack('!i', int_data)
        self.connection.sendall(data)
    def __send_STRING(self, string_data):
        encoded_string_data = string_data.encode('utf-8')
        n = len(encoded_string_data)
        data = struct.pack('!H{}s'.format(n), n, encoded_string_data)
        self.connection.sendall(data)
    def __send_INT_LIST(self, int_list_data):
        n = len(int_list_data)
        self.__send_INT(n)
        for int_data in int_list_data:
            self.__send_INT(int_data)
    def __send_STRING_LIST(self, string_list_data):
        n = len(string_list_data)
        self.__send_INT(n)
        for string_data in string_list_data:
            self.__send_STRING(string_data)
    def __send_WORD(self, word_data):
        data = struct.pack('!H', word_data)
        self.connection.sendall(data)
--- a/app/corpora/CQiWrapper/CQiWrapper.py
+++ b/app/corpora/CQiWrapper/CQiWrapper.py
@@ -0,0 +1,240 @@
 from .CQiClient import CQiClient
 import multiprocessing
 import collections
 import socket
 class CQiWrapper(CQiClient):
    """
    CQIiWrapper object
    High level wrapper that groups and renames some functions of CQiClient
    for ease of use. Also structures recieved data into python dictionaries.
    Keyword arguments:
    username -- username used to connect to the cqp server
    password -- password of the user to connect to the cqp server
    """
    SUBCORPUS_NAMES = []
    def __init__(self, host='127.0.0.1', port=4877, username='opaque',
                 password='opaque'):
        super(CQiWrapper, self).__init__(host=host, port=port)
        self.username = username
        self.password = password
    def connect(self):
        """
        Connect with CQP server
        Connects via socket to the CQP server using the given username and
        password from class initiation.
        """
        self.ctrl_connect(self.username, self.password)
    def create_attribute_strings(self, corpus_name):
        self.word_str = corpus_name + '.word'
        self.lemma_str = corpus_name + '.lemma'
        self.pos_str = corpus_name + '.pos'
        self.sem_str = corpus_name + '.sem'
        self.entry_str = corpus_name + '.entry'
        self.entry_author_str = self.entry_str + '_author'
        self.entry_title_str = self.entry_str + '_title'
        self.attributes = [self.word_str,
                           self.lemma_str,
                           self.pos_str,
                           self.sem_str,
                           self.entry_str,
                           self.entry_author_str,
                           self.entry_title_str]
    def disconnect(self):
        """
        Disconnect from CQP server
        Disconnects from the CQP server. Closes used socket after disconnect.
        """
        self.ctrl_bye()
        self.connection.close()
    def query_subcorpus(self, corpus_name, result_subcorpus_name, query):
        """
        Create subcorpus
        Input query will be used to create a subcorpus holding all cpos match
        positions for that query.
        Keyword arguments:
        corpus_name -- name of the corpus the query will be used on
        result_subcorpus_name -- user set name of the subcorpus which holds all
        cpos match positions, produced by the query
        query -- query written in cqp query language
        """
        self.cqp_query(corpus_name, result_subcorpus_name, query)
        self.result_subcorpus_ns = (corpus_name
                                    + ':'
                                    + result_subcorpus_name)
        self.SUBCORPUS_NAMES.append(self.result_subcorpus_ns)
        self.nr_matches = self.cqp_subcorpus_size(self.result_subcorpus_ns)
        print('Nr of all matches is:', self.nr_matches)
    def show_subcorpora(self):
        print('Known subcorpora:', self.SUBCORPUS_NAMES)
        return self.SUBCORPUS_NAMES
    def show_results(self,
                     corpus_name,
                     result_start_count=0,
                     result_max_count=50,
                     context_len=10,):
        """
        Show query results
        Shows the actual matched strings produce by the query. Uses the cpos
        match indexes to grab those strings. saves them into an orderd
        dictionary. Also saves coresponding tags, lemmas and context:
        OrderedDict([
            (0,
                {
                    'tokens': ['Big', 'Brother', 'himself'],
                    'lemmas': ['big', 'brother', 'himself'],
                    'pos_tags': ['JJ', 'NN1', 'PPX1'],
                    'sem_tags': ['|A11.1+|N3.2+|N5+|', '|S2.2m|S4m|S9/S2.2m|',
                                 '|Z8m|'],
                    'context_before': ['figures', 'of', 'the', 'Party', ',',
                                       'almost', 'on', 'a', 'level', 'with'],
                    'context_after': [',', 'and', 'then', 'had', 'engaged',
                                      'in', 'counter-revolu-', 'tionary',
                                      'activities', ','],
                    'entry_title': '1984', 'entry_author':
                    'george_orwell',
                    'cpos_start': 110490,
                    'cpos_end': 110492
                }
            )
        ])
        Keyword arguments:
        corpus_name -- name of the parent corpus the subcorpus is part of
        result_start_count -- start position of the dumped subcorpus.
        (default 0) If it is 0 matches 0 to 50 will be shown. If it is 50
        matches 50 to 100 will be shown.
        result_max_count -- defines how many matches at once will be shown.
        (default 50)
        context_len -- defines how many words before and after a match will be
        shown (default 10)
        """
        self.context_len = context_len
        word_str = corpus_name + '.word'
        self.corpus_max_len = self.cl_attribute_size(word_str)
        if self.nr_matches == 0:
            print('Query resulted in 0 matches.')
        else:
            if self.nr_matches <= 50:
                matches_start = self.cqp_dump_subcorpus(self.result_subcorpus_ns,
                                                        0x10,
                                                        0,
                                                        self.nr_matches - 1)
                matches_end = self.cqp_dump_subcorpus(self.result_subcorpus_ns,
                                                      0x11,
                                                      0, self.nr_matches - 1)
            else:
                matches_start = self.cqp_dump_subcorpus(self.result_subcorpus_ns,
                                                        0x10,
                                                        result_start_count,
                                                        result_max_count - 1)
                matches_end = self.cqp_dump_subcorpus(self.result_subcorpus_ns,
                                                      0x11,
                                                      result_start_count,
                                                      result_max_count - 1)
            match_indexes = zip(matches_start, matches_end)
            matches = []
            manager = multiprocessing.Manager()
            return_dict = manager.dict()
            for i, index_pair in enumerate(match_indexes):
                match = multiprocessing.Process(target=self.__get_matches,
                                                args=(i,
                                                      index_pair,
                                                      corpus_name,
                                                      return_dict))
                matches.append(match)
                match.start()
            for match in matches:
                match.join()
            #  sort matches into ordered dict
            ordered_results = collections.OrderedDict()
            for key in sorted(return_dict.keys()):
                ordered_results[key] = return_dict[key]
            print('ORDERED_RESULTS', ordered_results)
    def __get_matches(self, i, index_pair, corpus_name, return_dict):
        """
        Get matches as readable output
        Gets the actual match strings of cpos match indexes. Private helper
        method used in show_results.
        Keyword arguments:
        i -- serial number for match at given cpos
        index_pair -- match start and match end cpos
        corpus_name -- name of the parent corpus
        return_dict -- dictionary created with manager.dict() that holds the
        extracted strings tags etc.
        """
        print('START:', index_pair[0])
        print('END:', index_pair[1])
        print('=============================')
        tmp_session = CQiWrapper(username=self.username, password=self.password,
                                 host=self.host, port=self.port)
        tmp_session.connect()
        tokens = tmp_session.cl_cpos2str(self.word_str,
                                         range(index_pair[0],
                                               index_pair[1] + 1))
        lemmas = tmp_session.cl_cpos2str(self.lemma_str,
                                         range(index_pair[0],
                                               index_pair[1] + 1))
        pos_tags = tmp_session.cl_cpos2str(self.pos_str,
                                           range(index_pair[0],
                                                 index_pair[1] + 1))
        sem_tags = tmp_session.cl_cpos2str(self.sem_str,
                                           range(index_pair[0],
                                                 index_pair[1] + 1))
        struc_entry = tmp_session.cl_cpos2struc(self.entry_str,
                                                range(index_pair[0],
                                                      index_pair[1] + 1))
        before_index = max([0, index_pair[0] - self.context_len])
        after_index = min([self.corpus_max_len,
                           index_pair[1] + self.context_len])
        context_before = tmp_session.cl_cpos2str(self.word_str,
                                                 range(before_index,
                                                       index_pair[0]))
        context_after = tmp_session.cl_cpos2str(self.word_str,
                                                range(index_pair[1] + 1,
                                                      after_index + 1))
        entry_titles = tmp_session.cl_struc2str(self.entry_title_str,
                                                struc_entry)
        entry_authors = tmp_session.cl_struc2str(self.entry_author_str,
                                                 struc_entry)
        return_dict[i] = {'tokens': tokens,
                          'lemmas': lemmas,
                          'pos_tags': pos_tags,
                          'sem_tags': sem_tags,
                          'context_before': context_before,
                          'context_after': context_after,
                          'entry_title': entry_titles[0],
                          'entry_author': entry_authors[0],
                          'cpos_start': index_pair[0],
                          'cpos_end': index_pair[1]}
        tmp_session.disconnect()
    def get_cpos_info(self, cpos):
        match_dict = collections.OrderedDict()
        for attribute in self.attributes:
            if '.entry' not in attribute:
                match_str = self.cl_cpos2str(attribute, range(cpos[0], cpos[1]))
                match_dict[attribute] = match_str
            else:
                continue
        print(match_dict)