diff --git a/app/corpora/CQiClient/CQi.py b/app/corpora/CQiClient/CQi.py
deleted file mode 100644
index 5d39395a..00000000
--- a/app/corpora/CQiClient/CQi.py
+++ /dev/null
@@ -1,406 +0,0 @@
-# ########################################################################### #
-# IMS CQi specification #
-# #
-# Version: 0.1a ;o) #
-# Author: Stefan Evert (evert@ims.uni-stuttgart.de) #
-# Modified by: Patrick Jentsch
#
-# ########################################################################### #
-
-
-""" 1. padding """
-PAD = 0x00
-
-
-""" 2. CQi responses """
-""" 2.1 CQI_STATUS_* """
-STATUS = 0x01
-STATUS_OK = 0x0101
-STATUS_CONNECT_OK = 0x0102
-STATUS_BYE_OK = 0x0103
-STATUS_PING_OK = 0x0104
-
-""" 2.2 CQI_ERROR_* """
-ERROR = 0x02
-ERROR_GENERAL_ERROR = 0x0201
-ERROR_CONNECT_REFUSED = 0x0202
-ERROR_USER_ABORT = 0x0203
-ERROR_SYNTAX_ERROR = 0x0204
-# includes corpus/attribute/subcorpus specifier syntax
-
-""" 2.3 CQI_DATA_* """
-DATA = 0x03
-DATA_BYTE = 0x0301
-DATA_BOOL = 0x0302
-DATA_INT = 0x0303
-DATA_STRING = 0x0304
-DATA_BYTE_LIST = 0x0305
-DATA_BOOL_LIST = 0x0306
-DATA_INT_LIST = 0x0307
-DATA_STRING_LIST = 0x0308
-DATA_INT_INT = 0x0309
-DATA_INT_INT_INT_INT = 0x030A
-DATA_INT_TABLE = 0x030B
-
-""" 2.4 CQI_CL_ERROR_* """
-"""
-" NOTE: some CL error codes are not represented in the CQi specs
-" - usually because they're not used in the CL any more
-" - CDA_ENOSTRING is not considered an error (returns -1)
-" - CDA_EARGS: dynamic attribute calls not yet supported
-"""
-CL_ERROR = 0x04
-CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401
-# returned if CQi server couldn't open attribute
-CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402
-# CDA_EATTTYPE
-CL_ERROR_OUT_OF_RANGE = 0x0403
-# CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG
-CL_ERROR_REGEX = 0x0404
-# CDA_EPATTERN (not used), CDA_EBADREGEX
-CL_ERROR_CORPUS_ACCESS = 0x0405
-# CDA_ENODATA
-CL_ERROR_OUT_OF_MEMORY = 0x0406
-# CDA_ENOMEM
-# this means the CQi server has run out of memory;
-# try discarding some other corpora and/or subcorpora
-CL_ERROR_INTERNAL = 0x0407
-# CDA_EOTHER, CDA_ENYI
-# this is the classical 'please contact technical support' error
-
-""" 2.5 CQI_CQP_ERROR_* """
-CQP_ERROR = 0x05
-# CQP error messages yet to be defined
-CQP_ERROR_GENERAL = 0x0501
-CQP_ERROR_NO_SUCH_CORPUS = 0x0502
-CQP_ERROR_INVALID_FIELD = 0x0503
-CQP_ERROR_OUT_OF_RANGE = 0x0504
-# various cases where a number is out of range
-
-
-""" 3. CQi commands """
-""" 3.1 CQI_CTRL_* """
-CTRL = 0x11
-CTRL_CONNECT = 0x1101
-# INPUT: (STRING username, STRING password)
-# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
-CTRL_BYE = 0x1102
-# INPUT: ()
-# OUTPUT: CQI_STATUS_BYE_OK
-CTRL_USER_ABORT = 0x1103
-# INPUT: ()
-# OUTPUT:
-CTRL_PING = 0x1104
-# INPUT: ()
-# OUTPUT: CQI_STATUS_PING_OK
-CTRL_LAST_GENERAL_ERROR = 0x1105
-# INPUT: ()
-# OUTPUT: CQI_DATA_STRING
-# full-text error message for the last general error reported by the CQi server
-
-""" 3.2 CQI_ASK_FEATURE_* """
-ASK_FEATURE = 0x12
-ASK_FEATURE_CQI_1_0 = 0x1201
-# INPUT: ()
-# OUTPUT: CQI_DATA_BOOL
-ASK_FEATURE_CL_2_3 = 0x1202
-# INPUT: ()
-# OUTPUT: CQI_DATA_BOOL
-ASK_FEATURE_CQP_2_3 = 0x1203
-# INPUT: ()
-# OUTPUT: CQI_DATA_BOOL
-
-""" 3.3 CQI_CORPUS_* """
-CORPUS = 0x13
-CORPUS_LIST_CORPORA = 0x1301
-# INPUT: ()
-# OUTPUT: CQI_DATA_STRING_LIST
-CORPUS_CHARSET = 0x1303
-# INPUT: (STRING corpus)
-# OUTPUT: CQI_DATA_STRING
-CORPUS_PROPERTIES = 0x1304
-# INPUT: (STRING corpus)
-# OUTPUT: CQI_DATA_STRING_LIST
-CORPUS_POSITIONAL_ATTRIBUTES = 0x1305
-# INPUT: (STRING corpus)
-# OUTPUT: CQI_DATA_STRING_LIST
-CORPUS_STRUCTURAL_ATTRIBUTES = 0x1306
-# INPUT: (STRING corpus)
-# OUTPUT: CQI_DATA_STRING_LIST
-CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES = 0x1307
-# INPUT: (STRING attribute)
-# OUTPUT: CQI_DATA_BOOL
-CORPUS_ALIGNMENT_ATTRIBUTES = 0x1308
-# INPUT: (STRING corpus)
-# OUTPUT: CQI_DATA_STRING_LIST
-CORPUS_FULL_NAME = 0x1309
-# INPUT: (STRING corpus)
-# OUTPUT: CQI_DATA_STRING
-# the full name of as specified in its registry entry
-CORPUS_INFO = 0x130A
-# INPUT: (STRING corpus)
-# OUTPUT: CQI_DATA_STRING_LIST
-# returns the contents of the .info file of as a list of lines
-CORPUS_DROP_CORPUS = 0x130B
-# INPUT: (STRING corpus)
-# OUTPUT: CQI_STATUS_OK
-# try to unload a corpus and all its attributes from memory
-
-""" 3.4 CQI_CL_* """
-CL = 0x14
-# low-level corpus access (CL functions)
-CL_ATTRIBUTE_SIZE = 0x1401
-# INPUT: (STRING attribute)
-# OUTPUT: CQI_DATA_INT
-# returns the size of :
-# - number of tokens (positional)
-# - number of regions (structural)
-# - number of alignments (alignment)
-CL_LEXICON_SIZE = 0x1402
-# INPUT: (STRING attribute)
-# OUTPUT: CQI_DATA_INT
-# returns the number of entries in the lexicon of a positional attribute;
-# valid lexicon IDs range from 0 .. (lexicon_size - 1)
-CL_DROP_ATTRIBUTE = 0x1403
-# INPUT: (STRING attribute)
-# OUTPUT: CQI_STATUS_OK
-# unload attribute from memory
-"""
-" NOTE: simple (scalar) mappings are applied to lists (the returned list has
-" exactly the same length as the list passed as an argument)
-"""
-CL_STR2ID = 0x1404
-# INPUT: (STRING attribute, STRING_LIST strings)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns -1 for every string in that is not found in the lexicon
-CL_ID2STR = 0x1405
-# INPUT: (STRING attribute, INT_LIST id)
-# OUTPUT: CQI_DATA_STRING_LIST
-# returns "" for every ID in that is out of range
-CL_ID2FREQ = 0x1406
-# INPUT: (STRING attribute, INT_LIST id)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns 0 for every ID in that is out of range
-CL_CPOS2ID = 0x1407
-# INPUT: (STRING attribute, INT_LIST cpos)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns -1 for every corpus position in that is out of range
-CL_CPOS2STR = 0x1408
-# INPUT: (STRING attribute, INT_LIST cpos)
-# OUTPUT: CQI_DATA_STRING_LIST
-# returns "" for every corpus position in that is out of range
-CL_CPOS2STRUC = 0x1409
-# INPUT: (STRING attribute, INT_LIST cpos)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns -1 for every corpus position not inside a structure region
-"""
-" NOTE: temporary addition for the Euralex2000 tutorial, but should probably be
-" included in CQi specs
-"""
-CL_CPOS2LBOUND = 0x1420
-# INPUT: (STRING attribute, INT_LIST cpos)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns left boundary of s-attribute region enclosing cpos, -1 if not in
-# region
-CL_CPOS2RBOUND = 0x1421
-# INPUT: (STRING attribute, INT_LIST cpos)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns right boundary of s-attribute region enclosing cpos, -1 if not in
-# region
-CL_CPOS2ALG = 0x140A
-# INPUT: (STRING attribute, INT_LIST cpos)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns -1 for every corpus position not inside an alignment
-CL_STRUC2STR = 0x140B
-# INPUT: (STRING attribute, INT_LIST strucs)
-# OUTPUT: CQI_DATA_STRING_LIST
-# returns annotated string values of structure regions in ; "" if out
-# of range
-# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES() first
-"""
-" NOTE: the following mappings take a single argument and return multiple
-" values, including lists of arbitrary size
-"""
-CL_ID2CPOS = 0x140C
-# INPUT: (STRING attribute, INT id)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns all corpus positions where the given token occurs
-CL_IDLIST2CPOS = 0x140D
-# INPUT: (STRING attribute, INT_LIST id_list)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns all corpus positions where one of the tokens in
-# occurs; the returned list is sorted as a whole, not per token id
-CL_REGEX2ID = 0x140E
-# INPUT: (STRING attribute, STRING regex)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns lexicon IDs of all tokens that match ; the returned
-# list may be empty (size 0);
-CL_STRUC2CPOS = 0x140F
-# INPUT: (STRING attribute, INT struc)
-# OUTPUT: CQI_DATA_INT_INT
-# returns start and end corpus positions of structure region
-CL_ALG2CPOS = 0x1410
-# INPUT: (STRING attribute, INT alg)
-# OUTPUT: CQI_DATA_INT_INT_INT_INT
-# returns (src_start, src_end, target_start, target_end)
-
-""" 3.5 CQI_CQP_* """
-CQP = 0x15
-CQP_QUERY = 0x1501
-# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
-# OUTPUT: CQI_STATUS_OK
-# must include the ';' character terminating the query.
-CQP_LIST_SUBCORPORA = 0x1502
-# INPUT: (STRING corpus)
-# OUTPUT: CQI_DATA_STRING_LIST
-CQP_SUBCORPUS_SIZE = 0x1503
-# INPUT: (STRING subcorpus)
-# OUTPUT: CQI_DATA_INT
-CQP_SUBCORPUS_HAS_FIELD = 0x1504
-# INPUT: (STRING subcorpus, BYTE field)
-# OUTPUT: CQI_DATA_BOOL
-CQP_DUMP_SUBCORPUS = 0x1505
-# INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
-# OUTPUT: CQI_DATA_INT_LIST
-# Dump the values of for match ranges .. in .
-# is one of the CQI_CONST_FIELD_* constants.
-CQP_DROP_SUBCORPUS = 0x1509
-# INPUT: (STRING subcorpus)
-# OUTPUT: CQI_STATUS_OK
-# delete a subcorpus from memory
-"""
-" NOTE: The following two functions are temporarily included for the Euralex
-" 2000 tutorial demo
-"""
-""" NOTE: frequency distribution of single tokens """
-CQP_FDIST_1 = 0x1510
-# INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns (id, frequency) pairs flattened into a list of size 2*
-# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
-# CQI_CONST_FIELD_KEYWORD
-# NB: pairs are sorted by frequency desc.
-""" NOTE: frequency distribution of pairs of tokens """
-CQP_FDIST_2 = 0x1511
-# INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
-# BYTE field2, STRING attribute2)
-# OUTPUT: CQI_DATA_INT_LIST
-# returns (id1, id2, frequency) pairs flattened into a list of size 3*
-# NB: triples are sorted by frequency desc.
-
-
-""" 4. Constant Definitions """
-CONST_FALSE = 0x00
-CONST_NO = 0x00
-CONST_TRUE = 0x01
-CONST_YES = 0x01
-"""
-" NOTE: The following constants specify which field will be returned by
-" CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands.
-"""
-CONST_FIELD_MATCH = 0x10
-CONST_FIELD_MATCHEND = 0x11
-"""
-" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the
-" numerical values 0 .. 9, so clients do not need to look up the constant
-" values if they're handling arbitrary targets.
-"""
-CONST_FIELD_TARGET_0 = 0x00
-CONST_FIELD_TARGET_1 = 0x01
-CONST_FIELD_TARGET_2 = 0x02
-CONST_FIELD_TARGET_3 = 0x03
-CONST_FIELD_TARGET_4 = 0x04
-CONST_FIELD_TARGET_5 = 0x05
-CONST_FIELD_TARGET_6 = 0x06
-CONST_FIELD_TARGET_7 = 0x07
-CONST_FIELD_TARGET_8 = 0x08
-CONST_FIELD_TARGET_9 = 0x09
-"""
-" NOTE: The following constants are provided for backward compatibility with
-" traditional CQP field names & while the generalised target concept
-" isn't yet implemented in the CQPserver.
-"""
-CONST_FIELD_TARGET = 0x00
-CONST_FIELD_KEYWORD = 0x09
-""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """
-MAJOR_VERSION = 0x00
-MINOR_VERSION = 0x01
-
-
-""" 5. CQi lookup dictionary. """
-lookup = {
- 257: 'CQI_STATUS_OK',
- 258: 'CQI_STATUS_CONNECT_OK',
- 259: 'CQI_STATUS_BYE_OK',
- 260: 'CQI_STATUS_PING_OK',
- 513: 'CQI_ERROR_GENERAL_ERROR',
- 514: 'CQI_ERROR_CONNECT_REFUSED',
- 515: 'CQI_ERROR_USER_ABORT',
- 516: 'CQI_ERROR_SYNTAX_ERROR',
- 769: 'CQI_DATA_BYTE',
- 770: 'CQI_DATA_BOOL',
- 771: 'CQI_DATA_INT',
- 772: 'CQI_DATA_STRING',
- 773: 'CQI_DATA_BYTE_LIST',
- 774: 'CQI_DATA_BOOL_LIST',
- 775: 'CQI_DATA_INT_LIST',
- 776: 'CQI_DATA_STRING_LIST',
- 777: 'CQI_DATA_INT_INT',
- 778: 'CQI_DATA_INT_INT_INT_INT',
- 779: 'CQI_DATA_INT_TABLE',
- 1025: 'CQI_CL_ERROR_NO_SUCH_ATTRIBUTE',
- 1026: 'CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE',
- 1027: 'CQI_CL_ERROR_OUT_OF_RANGE',
- 1028: 'CQI_CL_ERROR_REGEX',
- 1029: 'CQI_CL_ERROR_CORPUS_ACCESS',
- 1030: 'CQI_CL_ERROR_OUT_OF_MEMORY',
- 1031: 'CQI_CL_ERROR_INTERNAL',
- 1281: 'CQI_CQP_ERROR_GENERAL',
- 1282: 'CQI_CQP_ERROR_NO_SUCH_CORPUS',
- 1283: 'CQI_CQP_ERROR_INVALID_FIELD',
- 1284: 'CQI_CQP_ERROR_OUT_OF_RANGE',
- 4353: 'CQI_CTRL_CONNECT',
- 4354: 'CQI_CTRL_BYE',
- 4355: 'CQI_CTRL_USER_ABORT',
- 4356: 'CQI_CTRL_PING',
- 4357: 'CQI_CTRL_LAST_GENERAL_ERROR',
- 4609: 'CQI_ASK_FEATURE_CQI_1_0',
- 4610: 'CQI_ASK_FEATURE_CL_2_3',
- 4611: 'CQI_ASK_FEATURE_CQP_2_3',
- 4865: 'CQI_CORPUS_LIST_CORPORA',
- 4867: 'CQI_CORPUS_CHARSET',
- 4868: 'CQI_CORPUS_PROPERTIES',
- 4869: 'CQI_CORPUS_POSITIONAL_ATTRIBUTES',
- 4870: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTES',
- 4871: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES',
- 4872: 'CQI_CORPUS_ALIGNMENT_ATTRIBUTES',
- 4873: 'CQI_CORPUS_FULL_NAME',
- 4874: 'CQI_CORPUS_INFO',
- 4875: 'CQI_CORPUS_DROP_CORPUS',
- 5121: 'CQI_CL_ATTRIBUTE_SIZE',
- 5122: 'CQI_CL_LEXICON_SIZE',
- 5123: 'CQI_CL_DROP_ATTRIBUTE',
- 5124: 'CQI_CL_STR2ID',
- 5125: 'CQI_CL_ID2STR',
- 5126: 'CQI_CL_ID2FREQ',
- 5127: 'CQI_CL_CPOS2ID',
- 5128: 'CQI_CL_CPOS2STR',
- 5129: 'CQI_CL_CPOS2STRUC',
- 5130: 'CQI_CL_CPOS2ALG',
- 5131: 'CQI_CL_STRUC2STR',
- 5132: 'CQI_CL_ID2CPOS',
- 5133: 'CQI_CL_IDLIST2CPOS',
- 5134: 'CQI_CL_REGEX2ID',
- 5135: 'CQI_CL_STRUC2CPOS',
- 5136: 'CQI_CL_ALG2CPOS',
- 5152: 'CQI_CL_CPOS2LBOUND',
- 5153: 'CQI_CL_CPOS2RBOUND',
- 5377: 'CQI_CQP_QUERY',
- 5378: 'CQI_CQP_LIST_SUBCORPORA',
- 5379: 'CQI_CQP_SUBCORPUS_SIZE',
- 5380: 'CQI_CQP_SUBCORPUS_HAS_FIELD',
- 5381: 'CQI_CQP_DUMP_SUBCORPUS',
- 5385: 'CQI_CQP_DROP_SUBCORPUS',
- 5392: 'CQI_CQP_FDIST_1',
- 5393: 'CQI_CQP_FDIST_2'
-}
diff --git a/app/corpora/CQiClient/CQiClient.py b/app/corpora/CQiClient/CQiClient.py
deleted file mode 100644
index 39a24c4c..00000000
--- a/app/corpora/CQiClient/CQiClient.py
+++ /dev/null
@@ -1,611 +0,0 @@
-from . import CQi
-import socket
-import struct
-
-
-class CQiClient:
- def __init__(self, host='127.0.0.1', port=4877):
- self.host = host
- self.port = port
- self.connection = socket.socket()
- self.connection.connect((self.host, self.port))
-
- def ctrl_connect(self, username, password):
- # INPUT: (STRING username, STRING password)
- # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
- # print('CTRL_CONNECT')
- self.__send_WORD(CQi.CTRL_CONNECT)
- self.__send_STRING(username)
- self.__send_STRING(password)
- self.__recv_response()
-
- def ctrl_bye(self):
- # INPUT: ()
- # OUTPUT: CQI_STATUS_BYE_OK
- # print('CTRL_BYE')
- self.__send_WORD(CQi.CTRL_BYE)
- self.__recv_response()
-
- def ctrl_user_abort(self):
- # INPUT: ()
- # OUTPUT:
- # print('CTRL_USER_ABORT')
- self.__send_WORD(CQi.CTRL_USER_ABORT)
-
- def ctrl_ping(self):
- # INPUT: ()
- # OUTPUT: CQI_STATUS_PING_OK
- # print('CTRL_PING')
- self.__send_WORD(CQi.CTRL_PING)
- self.__recv_response()
-
- def ctrl_last_general_error(self):
- # INPUT: ()
- # OUTPUT: CQI_DATA_STRING
- # full-text error message for the last general error reported by the
- # CQi server
- # print('CTRL_LAST_GENERAL_ERROR')
- self.__send_WORD(CQi.CTRL_LAST_GENERAL_ERROR)
- return self.__recv_response()
-
- def ask_feature_cqi_1_0(self):
- # INPUT: ()
- # OUTPUT: CQI_DATA_BOOL
- # print('ASK_FEATURE_CQI_1_0')
- self.__send_WORD(CQi.ASK_FEATURE_CQI_1_0)
- return self.__recv_response()
-
- def ask_feature_cl_2_3(self):
- # INPUT: ()
- # OUTPUT: CQI_DATA_BOOL
- # print('ASK_FEATURE_CL_2_3')
- self.__send_WORD(CQi.ASK_FEATURE_CL_2_3)
- return self.__recv_response()
-
- def ask_feature_cqp_2_3(self):
- # INPUT: ()
- # OUTPUT: CQI_DATA_BOOL
- # print('ASK_FEATURE_CL_2_3')
- self.__send_WORD(CQi.ASK_FEATURE_CL_2_3)
- return self.__recv_response()
-
- def corpus_list_coprora(self):
- # INPUT: ()
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_LIST_CORPORA')
- self.__send_WORD(CQi.CORPUS_LIST_CORPORA)
- return self.__recv_response()
-
- def corpus_charset(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING
- # print('CORPUS_CHARSET')
- self.__send_WORD(CQi.CORPUS_CHARSET)
- self.__send_STRING(corpus)
- return self.__recv_response()
-
- def corpus_properties(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_PROPERTIES')
- self.__send_WORD(CQi.CORPUS_PROPERTIES)
- self.__send_STRING(corpus)
- return self.__recv_response()
-
- def corpus_positional_attributes(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_POSITIONAL_ATTRIBUTES')
- self.__send_WORD(CQi.CORPUS_POSITIONAL_ATTRIBUTES)
- self.__send_STRING(corpus)
- return self.__recv_response()
-
- def corpus_structural_attributes(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_STRUCTURAL_ATTRIBUTES')
- self.__send_WORD(CQi.CORPUS_STRUCTURAL_ATTRIBUTES)
- self.__send_STRING(corpus)
- return self.__recv_response()
-
- def corpus_structural_attribute_has_values(self, attribute):
- # INPUT: (STRING attribute)
- # OUTPUT: CQI_DATA_BOOL
- # print('CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES')
- self.__send_WORD(CQi.CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
- self.__send_STRING(attribute)
- return self.__recv_response()
-
- def corpus_alignment_attributes(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_ALIGNMENT_ATTRIBUTES')
- self.__send_WORD(CQi.CORPUS_ALIGNMENT_ATTRIBUTES)
- self.__send_STRING(corpus)
- return self.__recv_response()
-
- def corpus_full_name(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING
- # the full name of as specified in its registry entry
- # print('CORPUS_FULL_NAME')
- self.__send_WORD(CQi.CORPUS_FULL_NAME)
- self.__send_STRING(corpus)
- return self.__recv_response()
-
- def corpus_info(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # returns the contents of the .info file of as a list of lines
- # print('CORPUS_INFO')
- self.__send_WORD(CQi.CORPUS_INFO)
- self.__send_STRING(corpus)
- return self.__recv_response()
-
- def corpus_drop_corpus(self, corpus):
- '''
- ' Broken
- ' TODO: Check what type of return value is provided by the server.
- '''
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_STATUS_OK
- # try to unload a corpus and all its attributes from memory
- # print('CORPUS_DROP_CORPUS')
- self.__send_WORD(CQi.CORPUS_DROP_CORPUS)
- self.__send_STRING(corpus)
- self.__recv_response()
-
- def cl_attribute_size(self, attribute):
- # INPUT: (STRING attribute)
- # OUTPUT: CQI_DATA_INT
- # returns the size of :
- # number of tokens (positional)
- # number of regions (structural)
- # number of alignments (alignment)
- # print('CL_ATTRIBUTE_SIZE')
- self.__send_WORD(CQi.CL_ATTRIBUTE_SIZE)
- self.__send_STRING(attribute)
- return self.__recv_response()
-
- def cl_lexicon_size(self, attribute):
- # INPUT: (STRING attribute)
- # OUTPUT: CQI_DATA_INT
- # returns the number of entries in the lexicon of a positional
- # attribute;
- # valid lexicon IDs range from 0 .. (lexicon_size - 1)
- # print('CL_LEXICON_SIZE')
- self.__send_WORD(CQi.CL_LEXICON_SIZE)
- self.__send_STRING(attribute)
- return self.__recv_response()
-
- def cl_drop_attribute(self, attribute):
- # INPUT: (STRING attribute)
- # OUTPUT: CQI_STATUS_OK
- # unload attribute from memory
- # print('CL_DROP_ATTRIBUTE')
- self.__send_WORD(CQi.CL_LEXICON_SIZE)
- self.__send_STRING(attribute)
- self.__recv_response()
-
- """
- " NOTE: simple (scalar) mappings are applied to lists (the returned list
- " has exactly the same length as the list passed as an argument)
- """
-
- def cl_str2id(self, attribute, strings):
- # INPUT: (STRING attribute, STRING_LIST strings)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns -1 for every string in that is not found in the
- # lexicon
- # print('CL_STR2ID')
- self.__send_WORD(CQi.CL_LEXICON_SIZE)
- self.__send_STRING(attribute)
- self.__send_STRING_LIST(strings)
- return self.__recv_response()
-
- def cl_id2str(self, attribute, id):
- # INPUT: (STRING attribute, INT_LIST id)
- # OUTPUT: CQI_DATA_STRING_LIST
- # returns "" for every ID in that is out of range
- # print('CL_ID2STR')
- self.__send_WORD(CQi.CL_ID2STR)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(id)
- return self.__recv_response()
-
- def cl_id2freq(self, attribute, id):
- # INPUT: (STRING attribute, INT_LIST id)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns 0 for every ID in that is out of range
- # print('CL_ID2FREQ')
- self.__send_WORD(CQi.CL_ID2FREQ)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(id)
- return self.__recv_response()
-
- def cl_cpos2id(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns -1 for every corpus position in that is out of range
- # print('CL_CPOS2ID')
- self.__send_WORD(CQi.CL_ID2FREQ)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response()
-
- def cl_cpos2str(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_STRING_LIST
- # returns "" for every corpus position in that is out of range
- # print('CL_CPOS2STR')
- self.__send_WORD(CQi.CL_CPOS2STR)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response()
-
- def cl_cpos2struc(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns -1 for every corpus position not inside a structure region
- # print('CL_CPOS2STRUC')
- self.__send_WORD(CQi.CL_CPOS2STRUC)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response()
-
- """
- " NOTE: temporary addition for the Euralex2000 tutorial, but should
- " probably be included in CQi specs
- """
-
- def cl_cpos2lbound(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns left boundary of s-attribute region enclosing cpos, -1 if not
- # in region
- # print('CL_CPOS2LBOUND')
- self.__send_WORD(CQi.CL_CPOS2LBOUND)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response()
-
- def cl_cpos2rbound(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns right boundary of s-attribute region enclosing cpos, -1 if
- # not in region
- # print('CL_CPOS2RBOUND')
- self.__send_WORD(CQi.CL_CPOS2RBOUND)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response()
-
- def cl_cpos2alg(self, attribute, cpos):
- # INPUT: (STRING attribute, INT_LIST cpos)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns -1 for every corpus position not inside an alignment
- # print('CL_CPOS2ALG')
- self.__send_WORD(CQi.CL_CPOS2ALG)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(cpos)
- return self.__recv_response()
-
- def cl_struc2str(self, attribute, strucs):
- # INPUT: (STRING attribute, INT_LIST strucs)
- # OUTPUT: CQI_DATA_STRING_LIST
- # returns annotated string values of structure regions in ; ""
- # if out of range
- # check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES() first
- # print('CL_STRUC2STR')
- self.__send_WORD(CQi.CL_STRUC2STR)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(strucs)
- return self.__recv_response()
-
- """
- " NOTE: the following mappings take a single argument and return multiple
- " values, including lists of arbitrary size
- """
-
- def cl_id2cpos(self, attribute, id):
- # INPUT: (STRING attribute, INT id)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns all corpus positions where the given token occurs
- # print('CL_ID2CPOS')
- self.__send_WORD(CQi.CL_ID2CPOS)
- self.__send_STRING(attribute)
- self.__send_INT(id)
- return self.__recv_response()
-
- def cl_idlist2cpos(self, attribute, id_list):
- # INPUT: (STRING attribute, INT_LIST id_list)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns all corpus positions where one of the tokens in
- # occurs; the returned list is sorted as a whole, not per token id
- # print('CL_IDLIST2CPOS')
- self.__send_WORD(CQi.CL_IDLIST2CPOS)
- self.__send_STRING(attribute)
- self.__send_INT_LIST(id_list)
- return self.__recv_response()
-
- def cl_regex2id(self, attribute, regex):
- # INPUT: (STRING attribute, STRING regex)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns lexicon IDs of all tokens that match ; the returned
- # list may be empty (size 0);
- # print('CL_REGEX2ID')
- self.__send_WORD(CQi.CL_REGEX2ID)
- self.__send_STRING(attribute)
- self.__send_STRING(regex)
- return self.__recv_response()
-
- def cl_struc2cpos(self, attribute, struc):
- # INPUT: (STRING attribute, INT struc)
- # OUTPUT: CQI_DATA_INT_INT
- # returns start and end corpus positions of structure region
- # print('CL_STRUC2CPOS')
- self.__send_WORD(CQi.CL_STRUC2CPOS)
- self.__send_STRING(attribute)
- self.__send_INT(struc)
- return self.__recv_response()
-
- def cl_alg2cpos(self, attribute, alg):
- # INPUT: (STRING attribute, INT alg)
- # OUTPUT: CQI_DATA_INT_INT_INT_INT
- # returns (src_start, src_end, target_start, target_end)
- # print('CL_ALG2CPOS')
- self.__send_WORD(CQi.CL_ALG2CPOS)
- self.__send_STRING(attribute)
- self.__send_INT(alg)
- return self.__recv_response()
-
- def cqp_query(self, mother_corpus, subcorpus_name, query):
- # INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
- # OUTPUT: CQI_STATUS_OK
- # must include the ';' character terminating the query.
- # print('CQP_QUERY')
- self.__send_WORD(CQi.CQP_QUERY)
- self.__send_STRING(mother_corpus)
- self.__send_STRING(subcorpus_name)
- self.__send_STRING(query)
- self.__recv_WORD()
-
- def cqp_list_subcorpora(self, corpus):
- # INPUT: (STRING corpus)
- # OUTPUT: CQI_DATA_STRING_LIST
- # print('CQP_LIST_SUBCORPORA')
- self.__send_WORD(CQi.CQP_LIST_SUBCORPORA)
- self.__send_STRING(corpus)
- return self.__recv_response()
-
- def cqp_subcorpus_size(self, subcorpus):
- # INPUT: (STRING subcorpus)
- # OUTPUT: CQI_DATA_INT
- # print('CQP_SUBCORPUS_SIZE')
- self.__send_WORD(CQi.CQP_SUBCORPUS_SIZE)
- self.__send_STRING(subcorpus)
- return self.__recv_response()
-
- def cqp_subcorpus_has_field(self, subcorpus, field):
- # INPUT: (STRING subcorpus, BYTE field)
- # OUTPUT: CQI_DATA_BOOL
- # print('CQP_SUBCORPUS_HAS_FIELD')
- self.__send_WORD(CQi.CQP_SUBCORPUS_HAS_FIELD)
- self.__send_STRING(subcorpus)
- self.__send_BYTE(field)
- return self.__recv_response()
-
- def cqp_dump_subcorpus(self, subcorpus, field, first, last):
- # INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
- # OUTPUT: CQI_DATA_INT_LIST
- # Dump the values of for match ranges ..
- # in . is one of the CQI_CONST_FIELD_* constants.
- # print('CQP_DUMP_SUBCORPUS')
- self.__send_WORD(CQi.CQP_DUMP_SUBCORPUS)
- self.__send_STRING(subcorpus)
- self.__send_BYTE(field)
- self.__send_INT(first)
- self.__send_INT(last)
- return self.__recv_response()
-
- def cqp_drop_subcorpus(self, subcorpus):
- # INPUT: (STRING subcorpus)
- # OUTPUT: CQI_STATUS_OK
- # delete a subcorpus from memory
- # print('CQP_DROP_SUBCORPUS')
- self.__send_WORD(CQi.CQP_DROP_SUBCORPUS)
- self.__send_STRING(subcorpus)
- self.__recv_response()
-
- """
- " NOTE: The following two functions are temporarily included for the
- " Euralex 2000 tutorial demo
- """
-
- def cqp_fdist_1(self, subcorpus, cutoff, field, attribute):
- """ NOTE: frequency distribution of single tokens """
- # INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns (id, frequency) pairs flattened into a list of size 2*
- # field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
- # CQI_CONST_FIELD_KEYWORD
- # NB: pairs are sorted by frequency desc.
- # print('CQP_FDIST_1')
- self.__send_WORD(CQi.CQP_FDIST_1)
- self.__send_STRING(subcorpus)
- self.__send_INT(cutoff)
- self.__send_BYTE(field)
- self.__send_STRING(attribute)
- return self.__recv_response()
-
- def cqp_fdist_2(self, subcorpus, cutoff, field1, attribute1, field2,
- attribute2):
- """ NOTE: frequency distribution of pairs of tokens """
- # INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
- # BYTE field2, STRING attribute2)
- # OUTPUT: CQI_DATA_INT_LIST
- # returns (id1, id2, frequency) pairs flattened into a list of size
- # 3*
- # NB: triples are sorted by frequency desc.
- # print('CQP_FDIST_2')
- self.__send_WORD(CQi.CQP_FDIST_2)
- self.__send_STRING(subcorpus)
- self.__send_INT(cutoff)
- self.__send_BYTE(field1)
- self.__send_STRING(attribute1)
- self.__send_BYTE(field2)
- self.__send_STRING(attribute2)
- return self.__recv_response()
-
- def __recv_response(self):
- byte_data = self.__recv_WORD()
- response_type = byte_data >> 8
- if response_type == CQi.STATUS:
- response = byte_data
- elif response_type == CQi.ERROR:
- raise Exception(CQi.lookup[byte_data])
- elif response_type == CQi.DATA:
- response = self.__recv_DATA(byte_data)
- elif response_type == CQi.CL_ERROR:
- raise Exception(CQi.lookup[byte_data])
- elif response_type == CQi.CQP_ERROR:
- raise Exception(CQi.lookup[byte_data])
- else:
- raise Exception(
- 'Unknown response type: {}'.format(hex(response_type))
- )
- return response
-
- def __recv_DATA(self, data_type):
- if data_type == CQi.DATA_BYTE:
- data = self.__recv_DATA_BYTE()
- elif data_type == CQi.DATA_BOOL:
- data = self.__recv_DATA_BOOL()
- elif data_type == CQi.DATA_INT:
- data = self.__recv_DATA_INT()
- elif data_type == CQi.DATA_STRING:
- data = self.__recv_DATA_STRING()
- elif data_type == CQi.DATA_BYTE_LIST:
- data = self.__recv_DATA_BYTE_LIST()
- elif data_type == CQi.DATA_BOOL_LIST:
- data = self.__recv_DATA_BOOL_LIST()
- elif data_type == CQi.DATA_INT_LIST:
- data = self.__recv_DATA_INT_LIST()
- elif data_type == CQi.DATA_STRING_LIST:
- data = self.__recv_DATA_STRING_LIST()
- elif data_type == CQi.DATA_INT_INT:
- data = self.__recv_DATA_INT_INT()
- elif data_type == CQi.DATA_INT_INT_INT_INT:
- data = self.__recv_DATA_INT_INT_INT_INT()
- elif data_type == CQi.DATA_INT_TABLE:
- data = self.__recv_DATA_INT_TABLE()
- else:
- raise Exception('Unknown data type: {}'.format(hex(data_type)))
- return data
-
- def __recv_DATA_BYTE(self):
- byte_data = self.connection.recv(1)
- return struct.unpack('!B', byte_data)[0]
-
- def __recv_DATA_BOOL(self):
- byte_data = self.connection.recv(1)
- return struct.unpack('!?', byte_data)[0]
-
- def __recv_DATA_INT(self):
- byte_data = self.connection.recv(4)
- return struct.unpack('!i', byte_data)[0]
-
- def __recv_DATA_STRING(self):
- n = self.__recv_WORD()
- byte_data = self.connection.recv(n)
- return struct.unpack('!{}s'.format(n), byte_data)[0].decode()
-
- def __recv_DATA_BYTE_LIST(self):
- data = []
- n = self.__recv_DATA_INT()
- while n > 0:
- data.append(self.__recv_DATA_BYTE())
- n -= 1
- return data
-
- def __recv_DATA_BOOL_LIST(self):
- data = []
- n = self.__recv_DATA_INT()
- while n > 0:
- data.append(self.__recv_DATA_BOOL())
- n -= 1
- return data
-
- def __recv_DATA_INT_LIST(self):
- data = []
- n = self.__recv_DATA_INT()
- while n > 0:
- data.append(self.__recv_DATA_INT())
- n -= 1
- return data
-
- def __recv_DATA_STRING_LIST(self):
- data = []
- n = self.__recv_DATA_INT()
- while n > 0:
- data.append(self.__recv_DATA_STRING())
- n -= 1
- return data
-
- def __recv_DATA_INT_INT(self):
- return (self.__recv_INT(), self.__recv_INT())
-
- def __recv_DATA_INT_INT_INT_INT(self):
- return (self.__recv_INT(),
- self.__recv_INT(),
- self.__recv_INT(),
- self.__recv_INT())
-
- def __recv_DATA_INT_TABLE(self):
- rows = self.__recv_DATA_INT()
- columns = self.__recv_DATA_INT()
- data = []
- for i in range(0, rows):
- row = []
- for j in range(0, columns):
- row.append(self.__recv_DATA_INT())
- data.append(row)
- return data
-
- def __recv_WORD(self):
- byte_data = self.connection.recv(2)
- return struct.unpack('!H', byte_data)[0]
-
- def __send_BYTE(self, byte_data):
- data = struct.pack('!B', byte_data)
- self.connection.sendall(data)
-
- def __send_BOOL(self, bool_data):
- data = struct.pack('!?', bool_data)
- self.connection.sendall(data)
-
- def __send_INT(self, int_data):
- data = struct.pack('!i', int_data)
- self.connection.sendall(data)
-
- def __send_STRING(self, string_data):
- encoded_string_data = string_data.encode('utf-8')
- n = len(encoded_string_data)
- data = struct.pack('!H{}s'.format(n), n, encoded_string_data)
- self.connection.sendall(data)
-
- def __send_INT_LIST(self, int_list_data):
- n = len(int_list_data)
- self.__send_INT(n)
- for int_data in int_list_data:
- self.__send_INT(int_data)
-
- def __send_STRING_LIST(self, string_list_data):
- n = len(string_list_data)
- self.__send_INT(n)
- for string_data in string_list_data:
- self.__send_STRING(string_data)
-
- def __send_WORD(self, word_data):
- data = struct.pack('!H', word_data)
- self.connection.sendall(data)