Add package implementation of cqi

2026-01-09 04:10:54 +00:00 · 2020-03-23 09:10:35 +01:00
parent 7752e7fb57
commit acfcc0321b
13 changed files with 353 additions and 33 deletions
--- a/app/corpora/cqi/init.py
+++ b/app/corpora/cqi/init.py
@@ -0,0 +1,9 @@
+# flake8: noqa
+from .api import APIClient
+from .client import CQiClient
+from .wrapper import CQiWrapper
+from .version import version, version_info
+
+
+__title__ = 'CQi'
+__version__ = version
--- a/app/corpora/cqi/api/init.py
+++ b/app/corpora/cqi/api/init.py
@@ -0,0 +1,2 @@
+# flake8: noqa
+from .client import APIClient
--- a/app/corpora/cqi/api/client.py
+++ b/app/corpora/cqi/api/client.py
@@ -0,0 +1,999 @@
+from time import sleep
+import socket
+import struct
+
+
+# ########################################################################### #
+# IMS CQi specification                                                       #
+#                                                                             #
+# Version: 0.1a ;o)                                                           #
+# Author: Stefan Evert (evert@ims.uni-stuttgart.de)                           #
+# Modified by (codestyle): Patrick Jentsch (p.jentsch@uni-bielefeld.de)       #
+# Modified date: Thurs Oct 10                                                 #
+# ########################################################################### #
+""" 1. padding """
+PAD = 0x00
+
+
+""" 2. CQi responses """
+""" 2.1 CQI_STATUS_* """
+STATUS = 0x01
+STATUS_OK = 0x0101
+STATUS_CONNECT_OK = 0x0102
+STATUS_BYE_OK = 0x0103
+STATUS_PING_OK = 0x0104
+
+""" 2.2 CQI_ERROR_*  """
+ERROR = 0x02
+ERROR_GENERAL_ERROR = 0x0201
+ERROR_CONNECT_REFUSED = 0x0202
+ERROR_USER_ABORT = 0x0203
+ERROR_SYNTAX_ERROR = 0x0204
+# includes corpus/attribute/subcorpus specifier syntax
+
+""" 2.3 CQI_DATA_* """
+DATA = 0x03
+DATA_BYTE = 0x0301
+DATA_BOOL = 0x0302
+DATA_INT = 0x0303
+DATA_STRING = 0x0304
+DATA_BYTE_LIST = 0x0305
+DATA_BOOL_LIST = 0x0306
+DATA_INT_LIST = 0x0307
+DATA_STRING_LIST = 0x0308
+DATA_INT_INT = 0x0309
+DATA_INT_INT_INT_INT = 0x030A
+DATA_INT_TABLE = 0x030B
+
+""" 2.4 CQI_CL_ERROR_* """
+"""
+" NOTE: some CL error codes are not represented in the CQi specs
+"       - usually because they're not used in the CL any more
+"       - CDA_ENOSTRING is not considered an error (returns -1)
+"       - CDA_EARGS: dynamic attribute calls not yet supported
+"""
+CL_ERROR = 0x04
+CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401
+# returned if CQi server couldn't open attribute
+CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402
+# CDA_EATTTYPE
+CL_ERROR_OUT_OF_RANGE = 0x0403
+# CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG
+CL_ERROR_REGEX = 0x0404
+# CDA_EPATTERN (not used), CDA_EBADREGEX
+CL_ERROR_CORPUS_ACCESS = 0x0405
+# CDA_ENODATA
+CL_ERROR_OUT_OF_MEMORY = 0x0406
+# CDA_ENOMEM
+# this means the CQi server has run out of memory;
+# try discarding some other corpora and/or subcorpora
+CL_ERROR_INTERNAL = 0x0407
+# CDA_EOTHER, CDA_ENYI
+# this is the classical 'please contact technical support' error
+
+""" 2.5 CQI_CQP_ERROR_* """
+CQP_ERROR = 0x05
+# CQP error messages yet to be defined
+CQP_ERROR_GENERAL = 0x0501
+CQP_ERROR_NO_SUCH_CORPUS = 0x0502
+CQP_ERROR_INVALID_FIELD = 0x0503
+CQP_ERROR_OUT_OF_RANGE = 0x0504
+# various cases where a number is out of range
+
+
+""" 3. CQi commands """
+""" 3.1 CQI_CTRL_* """
+CTRL = 0x11
+CTRL_CONNECT = 0x1101
+# INPUT: (STRING username, STRING password)
+# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
+CTRL_BYE = 0x1102
+# INPUT: ()
+# OUTPUT: CQI_STATUS_BYE_OK
+CTRL_USER_ABORT = 0x1103
+# INPUT: ()
+# OUTPUT:
+CTRL_PING = 0x1104
+# INPUT: ()
+# OUTPUT: CQI_STATUS_PING_OK
+CTRL_LAST_GENERAL_ERROR = 0x1105
+# INPUT: ()
+# OUTPUT: CQI_DATA_STRING
+# full-text error message for the last general error reported by the CQi server
+
+""" 3.2 CQI_ASK_FEATURE_* """
+ASK_FEATURE = 0x12
+ASK_FEATURE_CQI_1_0 = 0x1201
+# INPUT: ()
+# OUTPUT: CQI_DATA_BOOL
+ASK_FEATURE_CL_2_3 = 0x1202
+# INPUT: ()
+# OUTPUT: CQI_DATA_BOOL
+ASK_FEATURE_CQP_2_3 = 0x1203
+# INPUT: ()
+# OUTPUT: CQI_DATA_BOOL
+
+""" 3.3 CQI_CORPUS_* """
+CORPUS = 0x13
+CORPUS_LIST_CORPORA = 0x1301
+# INPUT: ()
+# OUTPUT: CQI_DATA_STRING_LIST
+CORPUS_CHARSET = 0x1303
+# INPUT: (STRING corpus)
+# OUTPUT: CQI_DATA_STRING
+CORPUS_PROPERTIES = 0x1304
+# INPUT: (STRING corpus)
+# OUTPUT: CQI_DATA_STRING_LIST
+CORPUS_POSITIONAL_ATTRIBUTES = 0x1305
+# INPUT: (STRING corpus)
+# OUTPUT: CQI_DATA_STRING_LIST
+CORPUS_STRUCTURAL_ATTRIBUTES = 0x1306
+# INPUT: (STRING corpus)
+# OUTPUT: CQI_DATA_STRING_LIST
+CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES = 0x1307
+# INPUT: (STRING attribute)
+# OUTPUT: CQI_DATA_BOOL
+CORPUS_ALIGNMENT_ATTRIBUTES = 0x1308
+# INPUT: (STRING corpus)
+# OUTPUT: CQI_DATA_STRING_LIST
+CORPUS_FULL_NAME = 0x1309
+# INPUT: (STRING corpus)
+# OUTPUT: CQI_DATA_STRING
+# the full name of <corpus> as specified in its registry entry
+CORPUS_INFO = 0x130A
+# INPUT: (STRING corpus)
+# OUTPUT: CQI_DATA_STRING_LIST
+# returns the contents of the .info file of <corpus> as a list of lines
+CORPUS_DROP_CORPUS = 0x130B
+# INPUT: (STRING corpus)
+# OUTPUT: CQI_STATUS_OK
+# try to unload a corpus and all its attributes from memory
+
+""" 3.4 CQI_CL_* """
+CL = 0x14
+# low-level corpus access (CL functions)
+CL_ATTRIBUTE_SIZE = 0x1401
+# INPUT: (STRING attribute)
+# OUTPUT: CQI_DATA_INT
+# returns the size of <attribute>:
+# - number of tokens (positional)
+# - number of regions (structural)
+# - number of alignments (alignment)
+CL_LEXICON_SIZE = 0x1402
+# INPUT: (STRING attribute)
+# OUTPUT: CQI_DATA_INT
+# returns the number of entries in the lexicon of a positional attribute;
+# valid lexicon IDs range from 0 .. (lexicon_size - 1)
+CL_DROP_ATTRIBUTE = 0x1403
+# INPUT: (STRING attribute)
+# OUTPUT: CQI_STATUS_OK
+# unload attribute from memory
+"""
+" NOTE: simple (scalar) mappings are applied to lists (the returned list has
+"       exactly the same length as the list passed as an argument)
+"""
+CL_STR2ID = 0x1404
+# INPUT: (STRING attribute, STRING_LIST strings)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns -1 for every string in <strings> that is not found in the lexicon
+CL_ID2STR = 0x1405
+# INPUT: (STRING attribute, INT_LIST id)
+# OUTPUT: CQI_DATA_STRING_LIST
+# returns "" for every ID in <id> that is out of range
+CL_ID2FREQ = 0x1406
+# INPUT: (STRING attribute, INT_LIST id)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns 0 for every ID in <id> that is out of range
+CL_CPOS2ID = 0x1407
+# INPUT: (STRING attribute, INT_LIST cpos)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns -1 for every corpus position in <cpos> that is out of range
+CL_CPOS2STR = 0x1408
+# INPUT: (STRING attribute, INT_LIST cpos)
+# OUTPUT: CQI_DATA_STRING_LIST
+# returns "" for every corpus position in <cpos> that is out of range
+CL_CPOS2STRUC = 0x1409
+# INPUT: (STRING attribute, INT_LIST cpos)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns -1 for every corpus position not inside a structure region
+"""
+" NOTE: temporary addition for the Euralex2000 tutorial, but should probably be
+"       included in CQi specs
+"""
+CL_CPOS2LBOUND = 0x1420
+# INPUT: (STRING attribute, INT_LIST cpos)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns left boundary of s-attribute region enclosing cpos, -1 if not in
+# region
+CL_CPOS2RBOUND = 0x1421
+# INPUT: (STRING attribute, INT_LIST cpos)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns right boundary of s-attribute region enclosing cpos, -1 if not in
+# region
+CL_CPOS2ALG = 0x140A
+# INPUT: (STRING attribute, INT_LIST cpos)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns -1 for every corpus position not inside an alignment
+CL_STRUC2STR = 0x140B
+# INPUT: (STRING attribute, INT_LIST strucs)
+# OUTPUT: CQI_DATA_STRING_LIST
+# returns annotated string values of structure regions in <strucs>; "" if out
+# of range
+# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
+"""
+" NOTE: the following mappings take a single argument and return multiple
+"       values, including lists of arbitrary size
+"""
+CL_ID2CPOS = 0x140C
+# INPUT: (STRING attribute, INT id)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns all corpus positions where the given token occurs
+CL_IDLIST2CPOS = 0x140D
+# INPUT: (STRING attribute, INT_LIST id_list)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns all corpus positions where one of the tokens in <id_list>
+# occurs; the returned list is sorted as a whole, not per token id
+CL_REGEX2ID = 0x140E
+# INPUT: (STRING attribute, STRING regex)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns lexicon IDs of all tokens that match <regex>; the returned
+# list may be empty (size 0);
+CL_STRUC2CPOS = 0x140F
+# INPUT: (STRING attribute, INT struc)
+# OUTPUT: CQI_DATA_INT_INT
+# returns start and end corpus positions of structure region <struc>
+CL_ALG2CPOS = 0x1410
+# INPUT: (STRING attribute, INT alg)
+# OUTPUT: CQI_DATA_INT_INT_INT_INT
+# returns (src_start, src_end, target_start, target_end)
+
+""" 3.5 CQI_CQP_* """
+CQP = 0x15
+CQP_QUERY = 0x1501
+# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
+# OUTPUT: CQI_STATUS_OK
+# <query> must include the ';' character terminating the query.
+CQP_LIST_SUBCORPORA = 0x1502
+# INPUT: (STRING corpus)
+# OUTPUT: CQI_DATA_STRING_LIST
+CQP_SUBCORPUS_SIZE = 0x1503
+# INPUT: (STRING subcorpus)
+# OUTPUT: CQI_DATA_INT
+CQP_SUBCORPUS_HAS_FIELD = 0x1504
+# INPUT: (STRING subcorpus, BYTE field)
+# OUTPUT: CQI_DATA_BOOL
+CQP_DUMP_SUBCORPUS = 0x1505
+# INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
+# OUTPUT: CQI_DATA_INT_LIST
+# Dump the values of <field> for match ranges <first> .. <last> in <subcorpus>.
+# <field> is one of the CQI_CONST_FIELD_* constants.
+CQP_DROP_SUBCORPUS = 0x1509
+# INPUT: (STRING subcorpus)
+# OUTPUT: CQI_STATUS_OK
+# delete a subcorpus from memory
+"""
+" NOTE: The following two functions are temporarily included for the Euralex
+"       2000 tutorial demo
+"""
+""" NOTE: frequency distribution of single tokens """
+CQP_FDIST_1 = 0x1510
+# INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
+# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
+# CQI_CONST_FIELD_KEYWORD
+# NB: pairs are sorted by frequency desc.
+""" NOTE: frequency distribution of pairs of tokens """
+CQP_FDIST_2 = 0x1511
+# INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
+#         BYTE field2, STRING attribute2)
+# OUTPUT: CQI_DATA_INT_LIST
+# returns <n> (id1, id2, frequency) pairs flattened into a list of size 3*<n>
+# NB: triples are sorted by frequency desc.
+
+
+""" 4. Constant Definitions """
+CONST_FALSE = 0x00
+CONST_NO = 0x00
+CONST_TRUE = 0x01
+CONST_YES = 0x01
+"""
+" NOTE: The following constants specify which field will be returned by
+"       CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands.
+"""
+CONST_FIELD_MATCH = 0x10
+CONST_FIELD_MATCHEND = 0x11
+"""
+" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the
+" numerical values 0 .. 9, so clients do not need to look up the constant
+" values if they're handling arbitrary targets.
+"""
+CONST_FIELD_TARGET_0 = 0x00
+CONST_FIELD_TARGET_1 = 0x01
+CONST_FIELD_TARGET_2 = 0x02
+CONST_FIELD_TARGET_3 = 0x03
+CONST_FIELD_TARGET_4 = 0x04
+CONST_FIELD_TARGET_5 = 0x05
+CONST_FIELD_TARGET_6 = 0x06
+CONST_FIELD_TARGET_7 = 0x07
+CONST_FIELD_TARGET_8 = 0x08
+CONST_FIELD_TARGET_9 = 0x09
+"""
+" NOTE: The following constants are provided for backward compatibility with
+"       traditional CQP field names & while the generalised target concept
+"       isn't yet implemented in the CQPserver.
+"""
+CONST_FIELD_TARGET = 0x00
+CONST_FIELD_KEYWORD = 0x09
+""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """
+MAJOR_VERSION = 0x00
+MINOR_VERSION = 0x01
+
+
+""" 5. CQi lookup dictionary. """
+lookup = {
+    257: 'CQI_STATUS_OK',
+    258: 'CQI_STATUS_CONNECT_OK',
+    259: 'CQI_STATUS_BYE_OK',
+    260: 'CQI_STATUS_PING_OK',
+    513: 'CQI_ERROR_GENERAL_ERROR',
+    514: 'CQI_ERROR_CONNECT_REFUSED',
+    515: 'CQI_ERROR_USER_ABORT',
+    516: 'CQI_ERROR_SYNTAX_ERROR',
+    769: 'CQI_DATA_BYTE',
+    770: 'CQI_DATA_BOOL',
+    771: 'CQI_DATA_INT',
+    772: 'CQI_DATA_STRING',
+    773: 'CQI_DATA_BYTE_LIST',
+    774: 'CQI_DATA_BOOL_LIST',
+    775: 'CQI_DATA_INT_LIST',
+    776: 'CQI_DATA_STRING_LIST',
+    777: 'CQI_DATA_INT_INT',
+    778: 'CQI_DATA_INT_INT_INT_INT',
+    779: 'CQI_DATA_INT_TABLE',
+    1025: 'CQI_CL_ERROR_NO_SUCH_ATTRIBUTE',
+    1026: 'CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE',
+    1027: 'CQI_CL_ERROR_OUT_OF_RANGE',
+    1028: 'CQI_CL_ERROR_REGEX',
+    1029: 'CQI_CL_ERROR_CORPUS_ACCESS',
+    1030: 'CQI_CL_ERROR_OUT_OF_MEMORY',
+    1031: 'CQI_CL_ERROR_INTERNAL',
+    1281: 'CQI_CQP_ERROR_GENERAL',
+    1282: 'CQI_CQP_ERROR_NO_SUCH_CORPUS',
+    1283: 'CQI_CQP_ERROR_INVALID_FIELD',
+    1284: 'CQI_CQP_ERROR_OUT_OF_RANGE',
+    4353: 'CQI_CTRL_CONNECT',
+    4354: 'CQI_CTRL_BYE',
+    4355: 'CQI_CTRL_USER_ABORT',
+    4356: 'CQI_CTRL_PING',
+    4357: 'CQI_CTRL_LAST_GENERAL_ERROR',
+    4609: 'CQI_ASK_FEATURE_CQI_1_0',
+    4610: 'CQI_ASK_FEATURE_CL_2_3',
+    4611: 'CQI_ASK_FEATURE_CQP_2_3',
+    4865: 'CQI_CORPUS_LIST_CORPORA',
+    4867: 'CQI_CORPUS_CHARSET',
+    4868: 'CQI_CORPUS_PROPERTIES',
+    4869: 'CQI_CORPUS_POSITIONAL_ATTRIBUTES',
+    4870: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTES',
+    4871: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES',
+    4872: 'CQI_CORPUS_ALIGNMENT_ATTRIBUTES',
+    4873: 'CQI_CORPUS_FULL_NAME',
+    4874: 'CQI_CORPUS_INFO',
+    4875: 'CQI_CORPUS_DROP_CORPUS',
+    5121: 'CQI_CL_ATTRIBUTE_SIZE',
+    5122: 'CQI_CL_LEXICON_SIZE',
+    5123: 'CQI_CL_DROP_ATTRIBUTE',
+    5124: 'CQI_CL_STR2ID',
+    5125: 'CQI_CL_ID2STR',
+    5126: 'CQI_CL_ID2FREQ',
+    5127: 'CQI_CL_CPOS2ID',
+    5128: 'CQI_CL_CPOS2STR',
+    5129: 'CQI_CL_CPOS2STRUC',
+    5130: 'CQI_CL_CPOS2ALG',
+    5131: 'CQI_CL_STRUC2STR',
+    5132: 'CQI_CL_ID2CPOS',
+    5133: 'CQI_CL_IDLIST2CPOS',
+    5134: 'CQI_CL_REGEX2ID',
+    5135: 'CQI_CL_STRUC2CPOS',
+    5136: 'CQI_CL_ALG2CPOS',
+    5152: 'CQI_CL_CPOS2LBOUND',
+    5153: 'CQI_CL_CPOS2RBOUND',
+    5377: 'CQI_CQP_QUERY',
+    5378: 'CQI_CQP_LIST_SUBCORPORA',
+    5379: 'CQI_CQP_SUBCORPUS_SIZE',
+    5380: 'CQI_CQP_SUBCORPUS_HAS_FIELD',
+    5381: 'CQI_CQP_DUMP_SUBCORPUS',
+    5385: 'CQI_CQP_DROP_SUBCORPUS',
+    5392: 'CQI_CQP_FDIST_1',
+    5393: 'CQI_CQP_FDIST_2'
+}
+
+
+# ########################################################################### #
+# IMS CQi client                                                              #
+#                                                                             #
+# Version: 0.1a                                                               #
+# Author: Patrick Jentsch (p.jentsch@uni-bielefeld.de)                        #
+# ########################################################################### #
+class APIClient:
+    def __init__(self, host, port=4877):
+        self.host = host
+        self.port = port
+        self.socket = socket.socket()
+
+    def setup(self):
+        self.socket.connect((self.host, self.port))
+
+    def teardown(self):
+        self.socket.close()
+
+    def ctrl_connect(self, username, password):
+        # INPUT: (STRING username, STRING password)
+        # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
+        self.__send_WORD(CTRL_CONNECT)
+        self.__send_STRING(username)
+        self.__send_STRING(password)
+        return self.__recv_response()
+
+    def ctrl_bye(self):
+        # INPUT: ()
+        # OUTPUT: CQI_STATUS_BYE_OK
+        self.__send_WORD(CTRL_BYE)
+        return self.__recv_response()
+
+    def ctrl_user_abort(self):
+        # INPUT: ()
+        # OUTPUT:
+        self.__send_WORD(CTRL_USER_ABORT)
+
+    def ctrl_ping(self):
+        # INPUT: ()
+        # OUTPUT: CQI_STATUS_PING_OK
+        self.__send_WORD(CTRL_PING)
+        return self.__recv_response()
+
+    def ctrl_last_general_error(self):
+        # INPUT: ()
+        # OUTPUT: CQI_DATA_STRING
+        # full-text error message for the last general error reported by the
+        # CQi server
+        self.__send_WORD(CTRL_LAST_GENERAL_ERROR)
+        return self.__recv_response()
+
+    def ask_feature_cqi_1_0(self):
+        # INPUT: ()
+        # OUTPUT: CQI_DATA_BOOL
+        self.__send_WORD(ASK_FEATURE_CQI_1_0)
+        return self.__recv_response()
+
+    def ask_feature_cl_2_3(self):
+        # INPUT: ()
+        # OUTPUT: CQI_DATA_BOOL
+        self.__send_WORD(ASK_FEATURE_CL_2_3)
+        return self.__recv_response()
+
+    def ask_feature_cqp_2_3(self):
+        # INPUT: ()
+        # OUTPUT: CQI_DATA_BOOL
+        self.__send_WORD(ASK_FEATURE_CL_2_3)
+        return self.__recv_response()
+
+    def corpus_list_coprora(self):
+        # INPUT: ()
+        # OUTPUT: CQI_DATA_STRING_LIST
+        self.__send_WORD(CORPUS_LIST_CORPORA)
+        return self.__recv_response()
+
+    def corpus_charset(self, corpus):
+        # INPUT: (STRING corpus)
+        # OUTPUT: CQI_DATA_STRING
+        self.__send_WORD(CORPUS_CHARSET)
+        self.__send_STRING(corpus)
+        return self.__recv_response()
+
+    def corpus_properties(self, corpus):
+        # INPUT: (STRING corpus)
+        # OUTPUT: CQI_DATA_STRING_LIST
+        self.__send_WORD(CORPUS_PROPERTIES)
+        self.__send_STRING(corpus)
+        return self.__recv_response()
+
+    def corpus_positional_attributes(self, corpus):
+        # INPUT: (STRING corpus)
+        # OUTPUT: CQI_DATA_STRING_LIST
+        self.__send_WORD(CORPUS_POSITIONAL_ATTRIBUTES)
+        self.__send_STRING(corpus)
+        return self.__recv_response()
+
+    def corpus_structural_attributes(self, corpus):
+        # INPUT: (STRING corpus)
+        # OUTPUT: CQI_DATA_STRING_LIST
+        self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTES)
+        self.__send_STRING(corpus)
+        return self.__recv_response()
+
+    def corpus_structural_attribute_has_values(self, attribute):
+        # INPUT: (STRING attribute)
+        # OUTPUT: CQI_DATA_BOOL
+        self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
+        self.__send_STRING(attribute)
+        return self.__recv_response()
+
+    def corpus_alignment_attributes(self, corpus):
+        # INPUT: (STRING corpus)
+        # OUTPUT: CQI_DATA_STRING_LIST
+        self.__send_WORD(CORPUS_ALIGNMENT_ATTRIBUTES)
+        self.__send_STRING(corpus)
+        return self.__recv_response()
+
+    def corpus_full_name(self, corpus):
+        # INPUT: (STRING corpus)
+        # OUTPUT: CQI_DATA_STRING
+        # the full name of <corpus> as specified in its registry entry
+        self.__send_WORD(CORPUS_FULL_NAME)
+        self.__send_STRING(corpus)
+        return self.__recv_response()
+
+    def corpus_info(self, corpus):
+        # INPUT: (STRING corpus)
+        # OUTPUT: CQI_DATA_STRING_LIST
+        # returns the contents of the .info file of <corpus> as a list of lines
+        self.__send_WORD(CORPUS_INFO)
+        self.__send_STRING(corpus)
+        return self.__recv_response()
+
+    def corpus_drop_corpus(self, corpus):
+        # INPUT: (STRING corpus)
+        # OUTPUT: CQI_STATUS_OK
+        # try to unload a corpus and all its attributes from memory
+        self.__send_WORD(CORPUS_DROP_CORPUS)
+        self.__send_STRING(corpus)
+        return self.__recv_response()
+
+    def cl_attribute_size(self, attribute):
+        # INPUT: (STRING attribute)
+        # OUTPUT: CQI_DATA_INT
+        # returns the size of <attribute>:
+        #     number of tokens        (positional)
+        #     number of regions       (structural)
+        #     number of alignments    (alignment)
+        self.__send_WORD(CL_ATTRIBUTE_SIZE)
+        self.__send_STRING(attribute)
+        return self.__recv_response()
+
+    def cl_lexicon_size(self, attribute):
+        # INPUT: (STRING attribute)
+        # OUTPUT: CQI_DATA_INT
+        # returns the number of entries in the lexicon of a positional
+        # attribute;
+        # valid lexicon IDs range from 0 .. (lexicon_size - 1)
+        self.__send_WORD(CL_LEXICON_SIZE)
+        self.__send_STRING(attribute)
+        return self.__recv_response()
+
+    def cl_drop_attribute(self, attribute):
+        # INPUT: (STRING attribute)
+        # OUTPUT: CQI_STATUS_OK
+        # unload attribute from memory
+        self.__send_WORD(CL_LEXICON_SIZE)
+        self.__send_STRING(attribute)
+        return self.__recv_response()
+
+    """
+    " NOTE: simple (scalar) mappings are applied to lists (the returned list
+    "       has exactly the same length as the list passed as an argument)
+    """
+
+    def cl_str2id(self, attribute, strings):
+        # INPUT: (STRING attribute, STRING_LIST strings)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns -1 for every string in <strings> that is not found in the
+        # lexicon
+        self.__send_WORD(CL_LEXICON_SIZE)
+        self.__send_STRING(attribute)
+        self.__send_STRING_LIST(strings)
+        return self.__recv_response()
+
+    def cl_id2str(self, attribute, id):
+        # INPUT: (STRING attribute, INT_LIST id)
+        # OUTPUT: CQI_DATA_STRING_LIST
+        # returns "" for every ID in <id> that is out of range
+        self.__send_WORD(CL_ID2STR)
+        self.__send_STRING(attribute)
+        self.__send_INT_LIST(id)
+        return self.__recv_response()
+
+    def cl_id2freq(self, attribute, id):
+        # INPUT: (STRING attribute, INT_LIST id)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns 0 for every ID in <id> that is out of range
+        self.__send_WORD(CL_ID2FREQ)
+        self.__send_STRING(attribute)
+        self.__send_INT_LIST(id)
+        return self.__recv_response()
+
+    def cl_cpos2id(self, attribute, cpos):
+        # INPUT: (STRING attribute, INT_LIST cpos)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns -1 for every corpus position in <cpos> that is out of range
+        self.__send_WORD(CL_ID2FREQ)
+        self.__send_STRING(attribute)
+        self.__send_INT_LIST(cpos)
+        return self.__recv_response()
+
+    def cl_cpos2str(self, attribute, cpos):
+        # INPUT: (STRING attribute, INT_LIST cpos)
+        # OUTPUT: CQI_DATA_STRING_LIST
+        # returns "" for every corpus position in <cpos> that is out of range
+        self.__send_WORD(CL_CPOS2STR)
+        self.__send_STRING(attribute)
+        self.__send_INT_LIST(cpos)
+        return self.__recv_response()
+
+    def cl_cpos2struc(self, attribute, cpos):
+        # INPUT: (STRING attribute, INT_LIST cpos)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns -1 for every corpus position not inside a structure region
+        self.__send_WORD(CL_CPOS2STRUC)
+        self.__send_STRING(attribute)
+        self.__send_INT_LIST(cpos)
+        return self.__recv_response()
+
+    """
+    " NOTE: temporary addition for the Euralex2000 tutorial, but should
+    "       probably be included in CQi specs
+    """
+
+    def cl_cpos2lbound(self, attribute, cpos):
+        # INPUT: (STRING attribute, INT_LIST cpos)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns left boundary of s-attribute region enclosing cpos, -1 if not
+        # in region
+        self.__send_WORD(CL_CPOS2LBOUND)
+        self.__send_STRING(attribute)
+        self.__send_INT_LIST(cpos)
+        return self.__recv_response()
+
+    def cl_cpos2rbound(self, attribute, cpos):
+        # INPUT: (STRING attribute, INT_LIST cpos)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns right boundary of s-attribute region enclosing cpos, -1 if
+        # not in region
+        self.__send_WORD(CL_CPOS2RBOUND)
+        self.__send_STRING(attribute)
+        self.__send_INT_LIST(cpos)
+        return self.__recv_response()
+
+    def cl_cpos2alg(self, attribute, cpos):
+        # INPUT: (STRING attribute, INT_LIST cpos)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns -1 for every corpus position not inside an alignment
+        self.__send_WORD(CL_CPOS2ALG)
+        self.__send_STRING(attribute)
+        self.__send_INT_LIST(cpos)
+        return self.__recv_response()
+
+    def cl_struc2str(self, attribute, strucs):
+        # INPUT: (STRING attribute, INT_LIST strucs)
+        # OUTPUT: CQI_DATA_STRING_LIST
+        # returns annotated string values of structure regions in <strucs>; ""
+        # if out of range
+        # check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
+        self.__send_WORD(CL_STRUC2STR)
+        self.__send_STRING(attribute)
+        self.__send_INT_LIST(strucs)
+        return self.__recv_response()
+
+    """
+    " NOTE: the following mappings take a single argument and return multiple
+    "       values, including lists of arbitrary size
+    """
+
+    def cl_id2cpos(self, attribute, id):
+        # INPUT: (STRING attribute, INT id)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns all corpus positions where the given token occurs
+        self.__send_WORD(CL_ID2CPOS)
+        self.__send_STRING(attribute)
+        self.__send_INT(id)
+        return self.__recv_response()
+
+    def cl_idlist2cpos(self, attribute, id_list):
+        # INPUT: (STRING attribute, INT_LIST id_list)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns all corpus positions where one of the tokens in <id_list>
+        # occurs; the returned list is sorted as a whole, not per token id
+        self.__send_WORD(CL_IDLIST2CPOS)
+        self.__send_STRING(attribute)
+        self.__send_INT_LIST(id_list)
+        return self.__recv_response()
+
+    def cl_regex2id(self, attribute, regex):
+        # INPUT: (STRING attribute, STRING regex)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns lexicon IDs of all tokens that match <regex>; the returned
+        # list may be empty (size 0);
+        self.__send_WORD(CL_REGEX2ID)
+        self.__send_STRING(attribute)
+        self.__send_STRING(regex)
+        return self.__recv_response()
+
+    def cl_struc2cpos(self, attribute, struc):
+        # INPUT: (STRING attribute, INT struc)
+        # OUTPUT: CQI_DATA_INT_INT
+        # returns start and end corpus positions of structure region <struc>
+        self.__send_WORD(CL_STRUC2CPOS)
+        self.__send_STRING(attribute)
+        self.__send_INT(struc)
+        return self.__recv_response()
+
+    def cl_alg2cpos(self, attribute, alg):
+        # INPUT: (STRING attribute, INT alg)
+        # OUTPUT: CQI_DATA_INT_INT_INT_INT
+        # returns (src_start, src_end, target_start, target_end)
+        self.__send_WORD(CL_ALG2CPOS)
+        self.__send_STRING(attribute)
+        self.__send_INT(alg)
+        return self.__recv_response()
+
+    def cqp_query(self, mother_corpus, subcorpus_name, query):
+        # INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
+        # OUTPUT: CQI_STATUS_OK
+        # <query> must include the ';' character terminating the query.
+        self.__send_WORD(CQP_QUERY)
+        self.__send_STRING(mother_corpus)
+        self.__send_STRING(subcorpus_name)
+        self.__send_STRING(query)
+        return self.__recv_response()
+
+    def cqp_list_subcorpora(self, corpus):
+        # INPUT: (STRING corpus)
+        # OUTPUT: CQI_DATA_STRING_LIST
+        self.__send_WORD(CQP_LIST_SUBCORPORA)
+        self.__send_STRING(corpus)
+        return self.__recv_response()
+
+    def cqp_subcorpus_size(self, subcorpus):
+        # INPUT: (STRING subcorpus)
+        # OUTPUT: CQI_DATA_INT
+        self.__send_WORD(CQP_SUBCORPUS_SIZE)
+        self.__send_STRING(subcorpus)
+        return self.__recv_response()
+
+    def cqp_subcorpus_has_field(self, subcorpus, field):
+        # INPUT: (STRING subcorpus, BYTE field)
+        # OUTPUT: CQI_DATA_BOOL
+        self.__send_WORD(CQP_SUBCORPUS_HAS_FIELD)
+        self.__send_STRING(subcorpus)
+        self.__send_BYTE(field)
+        return self.__recv_response()
+
+    def cqp_dump_subcorpus(self, subcorpus, field, first, last):
+        # INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # Dump the values of <field> for match ranges <first> .. <last>
+        # in <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants.
+        self.__send_WORD(CQP_DUMP_SUBCORPUS)
+        self.__send_STRING(subcorpus)
+        self.__send_BYTE(field)
+        self.__send_INT(first)
+        self.__send_INT(last)
+        return self.__recv_response()
+
+    def cqp_drop_subcorpus(self, subcorpus):
+        # INPUT: (STRING subcorpus)
+        # OUTPUT: CQI_STATUS_OK
+        # delete a subcorpus from memory
+        self.__send_WORD(CQP_DROP_SUBCORPUS)
+        self.__send_STRING(subcorpus)
+        return self.__recv_response()
+
+    """
+    " NOTE: The following two functions are temporarily included for the
+    "       Euralex 2000 tutorial demo
+    """
+
+    def cqp_fdist_1(self, subcorpus, cutoff, field, attribute):
+        """ NOTE: frequency distribution of single tokens """
+        # INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
+        # field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
+        #                 CQI_CONST_FIELD_KEYWORD
+        # NB: pairs are sorted by frequency desc.
+        self.__send_WORD(CQP_FDIST_1)
+        self.__send_STRING(subcorpus)
+        self.__send_INT(cutoff)
+        self.__send_BYTE(field)
+        self.__send_STRING(attribute)
+        return self.__recv_response()
+
+    def cqp_fdist_2(self, subcorpus, cutoff, field1, attribute1, field2,
+                    attribute2):
+        """ NOTE: frequency distribution of pairs of tokens """
+        # INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
+        #         BYTE field2, STRING attribute2)
+        # OUTPUT: CQI_DATA_INT_LIST
+        # returns <n> (id1, id2, frequency) pairs flattened into a list of size
+        # 3*<n>
+        # NB: triples are sorted by frequency desc.
+        self.__send_WORD(CQP_FDIST_2)
+        self.__send_STRING(subcorpus)
+        self.__send_INT(cutoff)
+        self.__send_BYTE(field1)
+        self.__send_STRING(attribute1)
+        self.__send_BYTE(field2)
+        self.__send_STRING(attribute2)
+        return self.__recv_response()
+
+    def __recv_response(self):
+        byte_data = self.__recv_WORD()
+        response_type = byte_data >> 8
+        if response_type == CL_ERROR:
+            raise Exception(lookup[byte_data])
+        elif response_type == CQP_ERROR:
+            raise Exception(lookup[byte_data])
+        elif response_type == DATA:
+            return self.__recv_DATA(byte_data)
+        elif response_type == ERROR:
+            raise Exception(lookup[byte_data])
+        elif response_type == STATUS:
+            return byte_data
+        else:
+            raise Exception(
+                'Unknown response type: {}'.format(hex(response_type))
+            )
+
+    def __recv_DATA(self, data_type):
+        if data_type == DATA_BYTE:
+            data = self.__recv_DATA_BYTE()
+        elif data_type == DATA_BOOL:
+            data = self.__recv_DATA_BOOL()
+        elif data_type == DATA_INT:
+            data = self.__recv_DATA_INT()
+        elif data_type == DATA_STRING:
+            data = self.__recv_DATA_STRING()
+        elif data_type == DATA_BYTE_LIST:
+            data = self.__recv_DATA_BYTE_LIST()
+        elif data_type == DATA_BOOL_LIST:
+            data = self.__recv_DATA_BOOL_LIST()
+        elif data_type == DATA_INT_LIST:
+            data = self.__recv_DATA_INT_LIST()
+        elif data_type == DATA_STRING_LIST:
+            data = self.__recv_DATA_STRING_LIST()
+        elif data_type == DATA_INT_INT:
+            data = self.__recv_DATA_INT_INT()
+        elif data_type == DATA_INT_INT_INT_INT:
+            data = self.__recv_DATA_INT_INT_INT_INT()
+        elif data_type == DATA_INT_TABLE:
+            data = self.__recv_DATA_INT_TABLE()
+        else:
+            raise Exception('Unknown data type: {}'.format(hex(data_type)))
+        return data
+
+    def __recv_DATA_BYTE(self):
+        while True:
+            if (len(self.socket.recv(1, socket.MSG_PEEK)) == 1):
+                byte_data = self.socket.recv(1)
+                break
+            sleep(0.1)
+        return struct.unpack('!B', byte_data)[0]
+
+    def __recv_DATA_BOOL(self):
+        while True:
+            if (len(self.socket.recv(1, socket.MSG_PEEK)) == 1):
+                byte_data = self.socket.recv(1)
+                break
+            sleep(0.1)
+        return struct.unpack('!?', byte_data)[0]
+
+    def __recv_DATA_INT(self):
+        while True:
+            if (len(self.socket.recv(4, socket.MSG_PEEK)) == 4):
+                byte_data = self.socket.recv(4)
+                break
+            sleep(0.1)
+        return struct.unpack('!i', byte_data)[0]
+
+    def __recv_DATA_STRING(self):
+        n = self.__recv_WORD()
+        while True:
+            if (len(self.socket.recv(n, socket.MSG_PEEK)) == n):
+                byte_data = self.socket.recv(n)
+                break
+            sleep(0.1)
+        return struct.unpack('!{}s'.format(n), byte_data)[0].decode()
+
+    def __recv_DATA_BYTE_LIST(self):
+        data = []
+        n = self.__recv_DATA_INT()
+        while n > 0:
+            data.append(self.__recv_DATA_BYTE())
+            n -= 1
+        return data
+
+    def __recv_DATA_BOOL_LIST(self):
+        data = []
+        n = self.__recv_DATA_INT()
+        while n > 0:
+            data.append(self.__recv_DATA_BOOL())
+            n -= 1
+        return data
+
+    def __recv_DATA_INT_LIST(self):
+        data = []
+        n = self.__recv_DATA_INT()
+        while n > 0:
+            data.append(self.__recv_DATA_INT())
+            n -= 1
+        return data
+
+    def __recv_DATA_STRING_LIST(self):
+        data = []
+        n = self.__recv_DATA_INT()
+        while n > 0:
+            data.append(self.__recv_DATA_STRING())
+            n -= 1
+        return data
+
+    def __recv_DATA_INT_INT(self):
+        return (self.__recv_DATA_INT(), self.__recv_DATA_INT())
+
+    def __recv_DATA_INT_INT_INT_INT(self):
+        return (self.__recv_DATA_INT(),
+                self.__recv_DATA_INT(),
+                self.__recv_DATA_INT(),
+                self.__recv_DATA_INT())
+
+    def __recv_DATA_INT_TABLE(self):
+        rows = self.__recv_DATA_INT()
+        columns = self.__recv_DATA_INT()
+        data = []
+        for i in range(0, rows):
+            row = []
+            for j in range(0, columns):
+                row.append(self.__recv_DATA_INT())
+            data.append(row)
+        return data
+
+    def __recv_WORD(self):
+        while True:
+            if (len(self.socket.recv(2, socket.MSG_PEEK)) == 2):
+                byte_data = self.socket.recv(2)
+                break
+            sleep(0.1)
+        return struct.unpack('!H', byte_data)[0]
+
+    def __send_BYTE(self, byte_data):
+        data = struct.pack('!B', byte_data)
+        self.socket.sendall(data)
+
+    def __send_BOOL(self, bool_data):
+        data = struct.pack('!?', bool_data)
+        self.socket.sendall(data)
+
+    def __send_INT(self, int_data):
+        data = struct.pack('!i', int_data)
+        self.socket.sendall(data)
+
+    def __send_STRING(self, string_data):
+        encoded_string_data = string_data.encode('utf-8')
+        n = len(encoded_string_data)
+        data = struct.pack('!H{}s'.format(n), n, encoded_string_data)
+        self.socket.sendall(data)
+
+    def __send_INT_LIST(self, int_list_data):
+        n = len(int_list_data)
+        self.__send_INT(n)
+        for int_data in int_list_data:
+            self.__send_INT(int_data)
+
+    def __send_STRING_LIST(self, string_list_data):
+        n = len(string_list_data)
+        self.__send_INT(n)
+        for string_data in string_list_data:
+            self.__send_STRING(string_data)
+
+    def __send_WORD(self, word_data):
+        data = struct.pack('!H', word_data)
+        self.socket.sendall(data)
--- a/app/corpora/cqi/client.py
+++ b/app/corpora/cqi/client.py
@@ -0,0 +1,31 @@
+from .api import APIClient
+from .constants import MAJOR_VERSION, MINOR_VERSION
+from .models.corpora import CorpusCollection
+
+
+class CQiClient(APIClient):
+    def __init__(self, host, port=4877):
+        super(CQiClient, self).__init__(host, port=port)
+
+    def connect(self, username='anonymous', password=''):
+        super(CQiClient, self).setup()
+        self.ctrl_connect(username, password)
+        self.__load()
+
+    def disconnect(self):
+        self.ctrl_bye()
+        super(CQiClient, self).teardown()
+
+    def __load(self):
+        self.corpora = CorpusCollection(self)
+        self.info = {'version': '{}.{}'.format(MAJOR_VERSION, MINOR_VERSION)}
+
+    def features(self):
+        features = []
+        if self.ask_feature_cqi_1_0():
+            features.append('cqi_1_0')
+        if self.ask_feature_cl_2_3():
+            features.append('cl_2_3')
+        if self.ask_feature_cqp_2_3():
+            features.append('cqp_2_3')
+        return features
--- a/app/corpora/cqi/constants.py
+++ b/app/corpora/cqi/constants.py
@@ -0,0 +1,36 @@
+""" 4. Constant Definitions """
+CONST_FALSE = 0x00
+CONST_NO = 0x00
+CONST_TRUE = 0x01
+CONST_YES = 0x01
+"""
+" NOTE: The following constants specify which field will be returned by
+"       CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands.
+"""
+CONST_FIELD_MATCH = 0x10
+CONST_FIELD_MATCHEND = 0x11
+"""
+" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the
+" numerical values 0 .. 9, so clients do not need to look up the constant
+" values if they're handling arbitrary targets.
+"""
+CONST_FIELD_TARGET_0 = 0x00
+CONST_FIELD_TARGET_1 = 0x01
+CONST_FIELD_TARGET_2 = 0x02
+CONST_FIELD_TARGET_3 = 0x03
+CONST_FIELD_TARGET_4 = 0x04
+CONST_FIELD_TARGET_5 = 0x05
+CONST_FIELD_TARGET_6 = 0x06
+CONST_FIELD_TARGET_7 = 0x07
+CONST_FIELD_TARGET_8 = 0x08
+CONST_FIELD_TARGET_9 = 0x09
+"""
+" NOTE: The following constants are provided for backward compatibility with
+"       traditional CQP field names & while the generalised target concept
+"       isn't yet implemented in the CQPserver.
+"""
+CONST_FIELD_TARGET = 0x00
+CONST_FIELD_KEYWORD = 0x09
+""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """
+MAJOR_VERSION = 0x00
+MINOR_VERSION = 0x01
--- a/app/corpora/cqi/models/init.py
+++ b/app/corpora/cqi/models/init.py
--- a/app/corpora/cqi/models/corpora.py
+++ b/app/corpora/cqi/models/corpora.py
@@ -0,0 +1,102 @@
+from .subcorpora import SubcorpusCollection
+
+
+class CorpusCollection:
+    def __init__(self, client):
+        self.client = client
+
+    def get(self, name):
+        return Corpus(self.client, name)
+
+    def list(self):
+        return [Corpus(self.client, corpus) for corpus
+                in self.client.corpus_list_coprora()]
+
+
+class Corpus:
+    def __init__(self, client, name):
+        self.client = client
+        self.name = name
+        self.__load()
+
+    def __load(self):
+        self.size = self.client.cl_attribute_size('{}.word'.format(self.name))
+        # self.info = client.corpus_info(self.name)
+        self.charset = self.client.corpus_charset(self.name)
+        # self.full_name = self.client.corpus_full_name(self.name)
+        self.properties = self.client.corpus_properties(self.name)
+        self.alignment_attributes = \
+            self.client.corpus_alignment_attributes(self.name)
+        self.structural_attributes = \
+            self.client.corpus_structural_attributes(self.name)
+        self.positional_attributes = \
+            self.client.corpus_positional_attributes(self.name)
+        self.subcorpora = SubcorpusCollection(self.client, self)
+
+    def alg2cpos(self, attribute, alg):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_alg2cpos(__attribute, alg)
+
+    def cpos2alg(self, attribute, cpos_list):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_cpos2alg(__attribute, cpos_list)
+
+    def cpos2id(self, attribute, cpos_list):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_cpos2id(__attribute, cpos_list)
+
+    def cpos2lbound(self, attribute, cpos_list):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_cpos2lbound(__attribute, cpos_list)
+
+    def cpos2rbound(self, attribute, cpos_list):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_cpos2rbound(__attribute, cpos_list)
+
+    def cpos2str(self, attribute, cpos_list):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_cpos2str(__attribute, cpos_list)
+
+    def cpos2struc(self, attribute, cpos_list):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_cpos2struc(__attribute, cpos_list)
+
+    def id2cpos(self, attribute, id):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_id2cpos(__attribute, id)
+
+    def idlist2cpos(self, attribute, ids):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_idlist2cpos(__attribute, ids)
+
+    def id2freq(self, attribute, ids):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_id2freq(__attribute, ids)
+
+    def id2str(self, attribute, ids):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_id2str(__attribute, ids)
+
+    def query(self, query, subcorpus_name='Results'):
+        self.client.cqp_query(self.name, subcorpus_name, query)
+        return self.subcorpora.get('Results')
+
+    def regex2id(self, attribute, regex):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_regex2id(__attribute, regex)
+
+    def structural_attribute_has_values(self, attribute):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.corpus_structural_attribute_has_values(__attribute)
+
+    def str2id(self, attribute, strings):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_str2id(__attribute, strings)
+
+    def struc2cpos(self, attribute, struc):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_struc2cpos(__attribute, struc)
+
+    def struc2str(self, attribute, strucs):
+        __attribute = '{}.{}'.format(self.name, attribute)
+        return self.client.cl_struc2str(__attribute, strucs)
--- a/app/corpora/cqi/models/subcorpora.py
+++ b/app/corpora/cqi/models/subcorpora.py
@@ -0,0 +1,112 @@
+from ..constants import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH,
+                         CONST_FIELD_MATCHEND, CONST_FIELD_TARGET)
+
+
+class SubcorpusCollection:
+    def __init__(self, client, parent_corpus):
+        self.client = client
+        self.parent_corpus = parent_corpus
+
+    def get(self, name):
+        return Subcorpus(self.client, self.parent_corpus, name)
+
+    def list(self):
+        return [Subcorpus(self.client, self.parent_corpus, subcorpus)
+                for subcorpus
+                in self.client.cqp_list_subcorpora(self.parent_corpus.name)]
+
+
+class Subcorpus:
+    def __init__(self, client, parent_corpus, name):
+        self.client = client
+        self.parent_corpus = parent_corpus
+        self.name = name
+        self.__name = '{}:{}'.format(parent_corpus.name, name)
+        self.__load()
+
+    def __load(self):
+        self.fields = {}
+        if self.client.cqp_subcorpus_has_field(self.__name, CONST_FIELD_MATCH):
+            self.fields['match'] = CONST_FIELD_MATCH
+        if self.client.cqp_subcorpus_has_field(self.__name,
+                                               CONST_FIELD_MATCHEND):
+            self.fields['matchend'] = CONST_FIELD_MATCHEND
+        if self.client.cqp_subcorpus_has_field(self.__name,
+                                               CONST_FIELD_TARGET):
+            self.fields['target'] = CONST_FIELD_TARGET
+        if self.client.cqp_subcorpus_has_field(self.__name,
+                                               CONST_FIELD_KEYWORD):
+            self.fields['keyword'] = CONST_FIELD_KEYWORD
+        self.size = self.client.cqp_subcorpus_size(self.__name)
+
+    def drop(self):
+        return self.client.cqp_drop_subcorpus(self.__name)
+
+    def dump(self, field, first, last):
+        return self.client.cqp_dump_subcorpus(self.__name, field, first, last)
+
+    def dump_values(self, context=25, first_result=0,
+                    num_results=float('inf')):
+        first_result = max(0, first_result)
+        last_result = min((first_result + num_results), (self.size - 1))
+        matches = []
+        match_boundaries = zip(self.dump(self.fields['match'], first_result,
+                                         last_result),
+                               self.dump(self.fields['matchend'], first_result,
+                                         last_result))
+        for match_start, match_end in match_boundaries:
+            left_start = max(0, match_start - context)
+            right_end = min(self.parent_corpus.size, (match_end + 1 + context))
+            matches.append({'left': list(range(left_start, match_start)),
+                            'hit': list(range(match_start, match_end + 1)),
+                            'right': list(range(match_end + 1, right_end))})
+        cpos_list = []
+        for match in matches:
+            cpos_list += match['left'] + match['hit'] + match['right']
+        cpos_list = list(set(cpos_list))
+        lookups = {}
+        if len(cpos_list) > 0:
+            lookups['cpos_lookup'] = {}
+        for cpos in cpos_list:
+            lookups['cpos_lookup'][cpos] = {}
+        for attr in self.parent_corpus.positional_attributes:
+            cpos_attr_values = self.parent_corpus.cpos2str(attr, cpos_list)
+            for i, cpos in enumerate(cpos_list):
+                lookups['cpos_lookup'][cpos][attr] = cpos_attr_values[i]
+        for attr in self.parent_corpus.structural_attributes:
+            if self.parent_corpus.structural_attribute_has_values(attr):
+                continue
+            cpos_attr_ids = self.parent_corpus.cpos2struc(attr, cpos_list)
+            for i, cpos in enumerate(cpos_list):
+                if cpos_attr_ids[i] != -1:
+                    lookups['cpos_lookup'][cpos][attr] = cpos_attr_ids[i]
+            occured_attr_ids = list(set(cpos_attr_ids))
+            occured_attr_ids = list(filter(lambda x: x != -1,
+                                           occured_attr_ids))
+            if len(occured_attr_ids) == 0:
+                continue
+            attr_subattrs = \
+                list(filter(lambda x: x.startswith(attr + '_'),
+                            self.parent_corpus.structural_attributes))
+            attr_subattrs = list(map(lambda x: x.split('_', 1)[1],
+                                     attr_subattrs))
+            if len(attr_subattrs) == 0:
+                continue
+            lookups[attr + '_lookup'] = {}
+            for attr_id in occured_attr_ids:
+                lookups[attr + '_lookup'][attr_id] = {}
+            for subattr in attr_subattrs:
+                __subattr = attr + '_' + subattr
+                subattr_values = \
+                    self.parent_corpus.struc2str(__subattr, occured_attr_ids)
+                for i, value in enumerate(subattr_values):
+                    lookups[attr + '_lookup'][occured_attr_ids[i]][subattr] = \
+                        value
+        return {'matches': matches, **lookups}
+
+    def fdist_1(self, cutoff, field, attribute):
+        return self.client.cqp_fdist_1(self.__name, cutoff, field, attribute)
+
+    def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
+        return self.client.cqp_fdist_2(self.__name, cutoff, field_1,
+                                       attribute_1, field_2, attribute_2)
--- a/app/corpora/cqi/version.py
+++ b/app/corpora/cqi/version.py
@@ -0,0 +1,5 @@
+from .constants import MAJOR_VERSION, MINOR_VERSION
+
+
+version = '{}.{}'.format(MAJOR_VERSION, MINOR_VERSION)
+version_info = (MAJOR_VERSION, MINOR_VERSION)
--- a/app/corpora/cqi/wrapper.py
+++ b/app/corpora/cqi/wrapper.py
@@ -0,0 +1,323 @@
+from .api import APIClient
+from .constants import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND
+import time
+
+
+class CQiWrapper(APIClient):
+    '''
+    CQIiWrapper object
+
+    High level wrapper that groups and renames some functions of CQiClient
+    for ease of use. Also structures recieved data into python dictionaries.
+
+    Keyword arguments:
+    host -- host IP adress or hostname wher the cqp server is running
+    port -- port of the cqp server
+    username -- username used to connect to the cqp server
+    password -- password of the user to connect to the cqp server
+    '''
+
+    SUBCORPUS_NAMES = []
+
+    def __init__(self, host='127.0.0.1', port=4877, username='anonymous',
+                 password=''):
+        super(CQiWrapper, self).__init__(host, port=port)
+        self.username = username
+        self.password = password
+
+    def connect(self):
+        '''
+        Connect with CQP server
+
+        Connects via socket to the CQP server using the given username and
+        password from class initiation.
+        '''
+        super(CQiWrapper, self).setup()
+        self.ctrl_connect(self.username, self.password)
+
+    def __create_attribute_strings(self):
+        '''
+        Creates all needed attribute strings to query for word, lemma etc. in
+        the given corpus.
+        For example: CORPUS_NAME.word to query words
+        Automaticalle creates strings for all pre defined tags.
+        '''
+        p_attrs = self.corpus_positional_attributes(self.corpus_name)
+        struct_attrs = self.corpus_structural_attributes(self.corpus_name)
+        self.attr_strings = {}
+        self.attr_strings['positional_attrs'] = {}
+        self.attr_strings['struct_attrs'] = {}
+        for p_attr in p_attrs:
+            self.attr_strings['positional_attrs'][p_attr] = (self.corpus_name
+                                                             + '.'
+                                                             + p_attr)
+        for struct_attr in struct_attrs:
+            self.attr_strings['struct_attrs'][struct_attr] = (self.corpus_name
+                                                              + '.'
+                                                              + struct_attr)
+        print(('All positional and '
+                        'structural attributes: {}').format(self.attr_strings))
+
+    def select_corpus(self, corpus_name):
+        '''
+        Checks if given copus name exists. If it exists set it as the main
+        corpus name used to create the needed query attribute strings like
+        CORPUS_NAME.word.
+        '''
+        if corpus_name in self.corpus_list_coprora():
+            self.corpus_name = corpus_name
+            self.__create_attribute_strings()
+            print('{} does exist.'.format(corpus_name))
+        else:
+            print('{} does not exist.'.format(corpus_name))
+            raise Exception('Given Corpus Name is not in corpora list.')
+
+    def disconnect(self):
+        '''
+        Disconnect from CQP server
+
+        Disconnects from the CQP server. Closes used socket after disconnect.
+        '''
+        self.ctrl_bye()
+        super(CQiWrapper, self).teardown()
+        print('Disconnected from cqp server.')
+
+    def query_subcorpus(self, query, result_subcorpus_name='Query-results'):
+        '''
+        Create subcorpus
+
+        Input query will be used to create a subcorpus holding all cpos match
+        positions for that query.
+
+        Keyword arguments:
+        result_subcorpus_name -- set name of the subcorpus which holds all
+        cpos match positions, produced by the query
+        query -- query written in cqp query language
+        '''
+        self.query = query
+        self.cqp_query(self.corpus_name, result_subcorpus_name, query)
+        self.result_subcorpus = (self.corpus_name
+                                 + ':'
+                                 + result_subcorpus_name)
+        self.SUBCORPUS_NAMES.append(self.result_subcorpus)
+        self.match_count = self.cqp_subcorpus_size(self.result_subcorpus)
+        print('Nr of all matches is: {}'.format(self.match_count))
+
+    def show_subcorpora(self):
+        '''
+        Show all subcorpora currently saved by the cqp server.
+        '''
+        return self.cqp_list_subcorpora(self.corpus_name)
+
+    def show_query_results(self,
+                           context_len=10,
+                           result_len=1000,
+                           result_offset=0):
+        '''
+        Show query results
+
+        Shows the actual matched strings produce by the query. Uses the cpos
+        match indexes to grab those strings. saves them into an orderd
+        dictionary. Also saves coresponding tags, lemmas and context. Gets those
+        informations using the corresponding cpos.
+
+        Keyword arguments:
+        context_len -- defines how many words before and after a match will be
+        shown (default 10)
+        result_len -- defines for how many matches all informations like lemma
+        and POS are being grabbed
+        result_offset -- defines the offset of the matches being requested. If
+        the offset is 100 informations for matches 100 to result_len are being
+        grabbed
+        '''
+        t0 = time.time()
+        self.context_len = context_len
+        self.corpus_max_len = self.cl_attribute_size(
+                                   self.attr_strings['positional_attrs']['word']
+                              )
+        self.nr_matches = min(result_len, self.match_count)
+        if self.match_count == 0:
+            print('Query resulted in 0 matches.')
+            self.results = {'code': 0,
+                            'result': {'matches': [],
+                                       'match_count': self.match_count,
+                                       'cpos_lookup': {},
+                                       'text_lookup': {}}
+                            }
+            return self.results
+        else:
+            # Get match cpos boundries
+            # match_boundries shows the start and end cpos of one match as a
+            # pair of cpositions
+            # [(1355, 1357), (1477, 1479)] Example for two boundry pairs
+            offset_start = 0 if result_offset == 0 else result_offset
+            print('Offset start is: {}'.format(offset_start))
+            offset_end = min((self.nr_matches + result_offset - 1), self.match_count - 1)
+            print('Offset end is: {}'.format(offset_end))
+            match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus,
+                                                           CONST_FIELD_MATCH,
+                                                           offset_start,
+                                                           offset_end),
+                                   self.cqp_dump_subcorpus(self.result_subcorpus,
+                                                           CONST_FIELD_MATCHEND,
+                                                           offset_start,
+                                                           offset_end))
+
+        # Generate all cpos between match boundries including start and end
+        # boundries.
+        # Also generate cpos for left and right context.
+        # Save those cpos into dict as lists for the keys 'lc', 'hit' and 'rc'
+        # Also collect all cpos together in one list for the final request of
+        # all cpos informations
+        all_matches = []
+        all_cpos = []
+        for start, end in match_boundaries:
+            end += 1
+            lc_cpos = list(range(max([0, start - self.context_len]), start))
+            lc = {'lc': lc_cpos}
+            match_cpos = list(range(start, end))
+            match = {'hit': match_cpos}
+            rc_cpos = list(range(end, min([self.corpus_max_len,
+                                           end + self.context_len])))
+            rc = {'rc': rc_cpos}
+            lc.update(match)
+            lc.update(rc)
+            all_cpos.extend(lc_cpos + match_cpos + rc_cpos)
+            all_matches.append(lc)
+
+        all_cpos = list(set(all_cpos))  # get rid of cpos duplicates
+        len_all_cpos = len(all_cpos)
+        t1 = time.time()
+        t_total = t1 - t0
+        print('Time to create all CPOS for query: {}'.format(t_total))
+        print('Requesting {} CPOS with one query.'.format(len_all_cpos))
+
+        # Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for
+        # all cpos entries in all_cpos_list
+        # Also saves these informations into self.results dict
+        t2 = time.time()
+        all_cpos_infos, text_lookup = self.get_cpos_infos(all_cpos)
+        t3 = time.time()
+        t_final = t3 - t2
+        print('Got infos for {} CPOS in {} seconds:'.format(len_all_cpos,
+                                                                     t_final))
+        self.results = {'code': 0,
+                        'result': {'matches': all_matches,
+                                   'match_count': self.match_count,
+                                   'cpos_lookup': all_cpos_infos,
+                                   'text_lookup': text_lookup}
+                        }
+        return self.results
+
+    def get_cpos_infos(self, all_cpos):
+        '''
+        Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for
+        all cpos entries specified in the parameter all_cpos.
+        '''
+        # Get all positional attribute informations
+        cpos_infos = {}
+        for p_attr_key in self.attr_strings['positional_attrs'].keys():
+            match_strs = self.cl_cpos2str(self.attr_strings['positional_attrs'][p_attr_key], all_cpos)
+            cpos_infos[p_attr_key] = match_strs
+
+        # Get all strucutural attribute informations
+        tmp_info = {}
+        structs_to_check = []
+        for struct_attr_key in self.attr_strings['struct_attrs'].keys():
+            key = self.attr_strings['struct_attrs'][struct_attr_key]
+            has_value = self.corpus_structural_attribute_has_values(key)
+            struct_ids = self.cl_cpos2struc(key, all_cpos)
+            if has_value is False:  # Get IDs of strucutural elements without values (this means get IDs of XML tags. Struct elements only have values if they are XML attributes)
+                tmp_info[struct_attr_key] = []
+                for id in struct_ids:
+                    tmp_info[struct_attr_key].append(id)
+            else:
+                structs_to_check.append({key: struct_attr_key})
+        print('Structs to check: {}'.format(structs_to_check))
+        struct_attr_values = list(tmp_info.values())
+        # print('Struct attr value list: {}'.format(struct_attr_values))
+        struct_attr_keys = list(tmp_info.keys())
+        # print('Struct attr key list: {}'.format(struct_attr_keys))
+
+        # Build textlookup dictionary
+        text_lookup_ids = list(set(struct_attr_values[0]))  # every CPOS is associated with one text id. A set is build to only gather text_lookup informations for every unique text id
+        text_lookup = {}  # final dict containing all info of one text identified by its id
+        for d in structs_to_check:
+            s_key, s_value = zip(*d.items())
+            print('dict entries: {}: {}'.format(s_key, s_value))
+            s_value = s_value[0].split('_', 1)[-1]
+            print('S_VALUE: {}'.format(s_value))
+            struct_values = self.cl_struc2str(s_key[0], text_lookup_ids)
+            print('Extracted Value with key {}: {}'.format(s_key[0], struct_values))
+            zipped = dict(zip(text_lookup_ids, struct_values))
+            for zip_key, zip_value in zipped.items():
+                print('Text id as key is: {}'.format(zip_key))
+                print('Value of this text is: {}'.format(zip_value))
+                check = text_lookup.get(zip_key)
+                print('check: {}'.format(check))
+                if check is None:
+                    text_lookup[zip_key] = {s_value: zip_value}
+                else:
+                    text_lookup[zip_key].update({s_value: zip_value})
+
+        # zip keys and values together
+        attr_values_list = []
+        attr_keys_list = []
+        for key in cpos_infos.keys():
+            attr_values_list.append(cpos_infos[key])
+            attr_keys_list.append(key)
+        attr_keys_list.extend(struct_attr_keys)
+        attr_values_list.extend(struct_attr_values)
+        joined_cpos_infos = zip(all_cpos, *attr_values_list)
+        dict_cpos_infos = {}
+        for info in joined_cpos_infos:
+            dict_cpos_infos[info[0]] = dict(zip(attr_keys_list, info[1:]))
+        return dict_cpos_infos, text_lookup
+
+    def get_sentences(self,
+                      match_cpos_list,
+                      get_surrounding_s=False,
+                      l_r_s_context_additional_len=1):
+        '''
+        Get sentence informations for one match also set if and how much left
+        right context sentences should be grabbed surrounding the given CPOS.
+        '''
+        t0 = time.time()
+        key = self.corpus_name + '.s'
+        first_cpos, last_cpos = match_cpos_list[0], match_cpos_list[-1]
+        context_sentences = {}
+        s_ids = self.cl_cpos2struc(key, [first_cpos, last_cpos])
+        print('s id match: {}'.format(s_ids))
+        for s_id in s_ids:
+            s_start, s_end = self.cl_struc2cpos(key, s_id)
+            s_cpos = list(range(s_start, s_end + 1))
+            context_sentences[s_id] = s_cpos
+        if get_surrounding_s:
+            max_s_id = self.cl_attribute_size(key) - 1
+            print('max sid: {}'.format(max_s_id))
+            additional_s_ids = []
+            additional_s = list(range(max(s_ids[0]
+                                          - l_r_s_context_additional_len,
+                                          0),
+                                      min(s_ids[-1]
+                                          + l_r_s_context_additional_len,
+                                          max_s_id) + 1))
+            additional_s_ids.extend(additional_s)
+            for s_id in additional_s_ids:
+                print('s id additional: {}'.format(s_id))
+                s_start, s_end = self.cl_struc2cpos(key, s_id)
+                s_cpos = list(range(s_start, s_end + 1))
+                context_sentences[s_id] = s_cpos
+        all_cpos = []
+        for key in context_sentences.keys():
+            all_cpos.extend(context_sentences[key])
+        all_cpos = list(set(all_cpos))
+        all_cpos_infos, text_lookup = self.get_cpos_infos(all_cpos)
+        t1 = time.time()
+        t_total = t1 - t0
+        print('Got all sentences informations in {} seconds'. format(t_total))
+        match_context = {'context_s_cpos': context_sentences,
+                         'cpos_lookup': all_cpos_infos,
+                         'text_lookup': text_lookup,
+                         'match_cpos_list': match_cpos_list}
+        return match_context