diff --git a/app/corpora/cqi/api/client.py b/app/corpora/cqi/api/client.py index 6a9460db..7df30997 100644 --- a/app/corpora/cqi/api/client.py +++ b/app/corpora/cqi/api/client.py @@ -1,421 +1,28 @@ from time import sleep +from .. import errors, specification import socket import struct -# ########################################################################### # -# IMS CQi specification # -# # -# Version: 0.1a ;o) # -# Author: Stefan Evert (evert@ims.uni-stuttgart.de) # -# Modified by (codestyle): Patrick Jentsch (p.jentsch@uni-bielefeld.de) # -# Modified date: Thurs Oct 10 # -# ########################################################################### # -""" 1. padding """ -PAD = 0x00 - - -""" 2. CQi responses """ -""" 2.1 CQI_STATUS_* """ -STATUS = 0x01 -STATUS_OK = 0x0101 -STATUS_CONNECT_OK = 0x0102 -STATUS_BYE_OK = 0x0103 -STATUS_PING_OK = 0x0104 - -""" 2.2 CQI_ERROR_* """ -ERROR = 0x02 -ERROR_GENERAL_ERROR = 0x0201 -ERROR_CONNECT_REFUSED = 0x0202 -ERROR_USER_ABORT = 0x0203 -ERROR_SYNTAX_ERROR = 0x0204 -# includes corpus/attribute/subcorpus specifier syntax - -""" 2.3 CQI_DATA_* """ -DATA = 0x03 -DATA_BYTE = 0x0301 -DATA_BOOL = 0x0302 -DATA_INT = 0x0303 -DATA_STRING = 0x0304 -DATA_BYTE_LIST = 0x0305 -DATA_BOOL_LIST = 0x0306 -DATA_INT_LIST = 0x0307 -DATA_STRING_LIST = 0x0308 -DATA_INT_INT = 0x0309 -DATA_INT_INT_INT_INT = 0x030A -DATA_INT_TABLE = 0x030B - -""" 2.4 CQI_CL_ERROR_* """ -""" -" NOTE: some CL error codes are not represented in the CQi specs -" - usually because they're not used in the CL any more -" - CDA_ENOSTRING is not considered an error (returns -1) -" - CDA_EARGS: dynamic attribute calls not yet supported -""" -CL_ERROR = 0x04 -CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401 -# returned if CQi server couldn't open attribute -CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402 -# CDA_EATTTYPE -CL_ERROR_OUT_OF_RANGE = 0x0403 -# CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG -CL_ERROR_REGEX = 0x0404 -# CDA_EPATTERN (not used), CDA_EBADREGEX -CL_ERROR_CORPUS_ACCESS = 0x0405 -# CDA_ENODATA -CL_ERROR_OUT_OF_MEMORY = 0x0406 -# CDA_ENOMEM -# this means the CQi server has run out of memory; -# try discarding some other corpora and/or subcorpora -CL_ERROR_INTERNAL = 0x0407 -# CDA_EOTHER, CDA_ENYI -# this is the classical 'please contact technical support' error - -""" 2.5 CQI_CQP_ERROR_* """ -CQP_ERROR = 0x05 -# CQP error messages yet to be defined -CQP_ERROR_GENERAL = 0x0501 -CQP_ERROR_NO_SUCH_CORPUS = 0x0502 -CQP_ERROR_INVALID_FIELD = 0x0503 -CQP_ERROR_OUT_OF_RANGE = 0x0504 -# various cases where a number is out of range - - -""" 3. CQi commands """ -""" 3.1 CQI_CTRL_* """ -CTRL = 0x11 -CTRL_CONNECT = 0x1101 -# INPUT: (STRING username, STRING password) -# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED -CTRL_BYE = 0x1102 -# INPUT: () -# OUTPUT: CQI_STATUS_BYE_OK -CTRL_USER_ABORT = 0x1103 -# INPUT: () -# OUTPUT: -CTRL_PING = 0x1104 -# INPUT: () -# OUTPUT: CQI_STATUS_PING_OK -CTRL_LAST_GENERAL_ERROR = 0x1105 -# INPUT: () -# OUTPUT: CQI_DATA_STRING -# full-text error message for the last general error reported by the CQi server - -""" 3.2 CQI_ASK_FEATURE_* """ -ASK_FEATURE = 0x12 -ASK_FEATURE_CQI_1_0 = 0x1201 -# INPUT: () -# OUTPUT: CQI_DATA_BOOL -ASK_FEATURE_CL_2_3 = 0x1202 -# INPUT: () -# OUTPUT: CQI_DATA_BOOL -ASK_FEATURE_CQP_2_3 = 0x1203 -# INPUT: () -# OUTPUT: CQI_DATA_BOOL - -""" 3.3 CQI_CORPUS_* """ -CORPUS = 0x13 -CORPUS_LIST_CORPORA = 0x1301 -# INPUT: () -# OUTPUT: CQI_DATA_STRING_LIST -CORPUS_CHARSET = 0x1303 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING -CORPUS_PROPERTIES = 0x1304 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -CORPUS_POSITIONAL_ATTRIBUTES = 0x1305 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -CORPUS_STRUCTURAL_ATTRIBUTES = 0x1306 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES = 0x1307 -# INPUT: (STRING attribute) -# OUTPUT: CQI_DATA_BOOL -CORPUS_ALIGNMENT_ATTRIBUTES = 0x1308 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -CORPUS_FULL_NAME = 0x1309 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING -# the full name of as specified in its registry entry -CORPUS_INFO = 0x130A -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -# returns the contents of the .info file of as a list of lines -CORPUS_DROP_CORPUS = 0x130B -# INPUT: (STRING corpus) -# OUTPUT: CQI_STATUS_OK -# try to unload a corpus and all its attributes from memory - -""" 3.4 CQI_CL_* """ -CL = 0x14 -# low-level corpus access (CL functions) -CL_ATTRIBUTE_SIZE = 0x1401 -# INPUT: (STRING attribute) -# OUTPUT: CQI_DATA_INT -# returns the size of : -# - number of tokens (positional) -# - number of regions (structural) -# - number of alignments (alignment) -CL_LEXICON_SIZE = 0x1402 -# INPUT: (STRING attribute) -# OUTPUT: CQI_DATA_INT -# returns the number of entries in the lexicon of a positional attribute; -# valid lexicon IDs range from 0 .. (lexicon_size - 1) -CL_DROP_ATTRIBUTE = 0x1403 -# INPUT: (STRING attribute) -# OUTPUT: CQI_STATUS_OK -# unload attribute from memory -""" -" NOTE: simple (scalar) mappings are applied to lists (the returned list has -" exactly the same length as the list passed as an argument) -""" -CL_STR2ID = 0x1404 -# INPUT: (STRING attribute, STRING_LIST strings) -# OUTPUT: CQI_DATA_INT_LIST -# returns -1 for every string in that is not found in the lexicon -CL_ID2STR = 0x1405 -# INPUT: (STRING attribute, INT_LIST id) -# OUTPUT: CQI_DATA_STRING_LIST -# returns "" for every ID in that is out of range -CL_ID2FREQ = 0x1406 -# INPUT: (STRING attribute, INT_LIST id) -# OUTPUT: CQI_DATA_INT_LIST -# returns 0 for every ID in that is out of range -CL_CPOS2ID = 0x1407 -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_INT_LIST -# returns -1 for every corpus position in that is out of range -CL_CPOS2STR = 0x1408 -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_STRING_LIST -# returns "" for every corpus position in that is out of range -CL_CPOS2STRUC = 0x1409 -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_INT_LIST -# returns -1 for every corpus position not inside a structure region -""" -" NOTE: temporary addition for the Euralex2000 tutorial, but should probably be -" included in CQi specs -""" -CL_CPOS2LBOUND = 0x1420 -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_INT_LIST -# returns left boundary of s-attribute region enclosing cpos, -1 if not in -# region -CL_CPOS2RBOUND = 0x1421 -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_INT_LIST -# returns right boundary of s-attribute region enclosing cpos, -1 if not in -# region -CL_CPOS2ALG = 0x140A -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_INT_LIST -# returns -1 for every corpus position not inside an alignment -CL_STRUC2STR = 0x140B -# INPUT: (STRING attribute, INT_LIST strucs) -# OUTPUT: CQI_DATA_STRING_LIST -# returns annotated string values of structure regions in ; "" if out -# of range -# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES() first -""" -" NOTE: the following mappings take a single argument and return multiple -" values, including lists of arbitrary size -""" -CL_ID2CPOS = 0x140C -# INPUT: (STRING attribute, INT id) -# OUTPUT: CQI_DATA_INT_LIST -# returns all corpus positions where the given token occurs -CL_IDLIST2CPOS = 0x140D -# INPUT: (STRING attribute, INT_LIST id_list) -# OUTPUT: CQI_DATA_INT_LIST -# returns all corpus positions where one of the tokens in -# occurs; the returned list is sorted as a whole, not per token id -CL_REGEX2ID = 0x140E -# INPUT: (STRING attribute, STRING regex) -# OUTPUT: CQI_DATA_INT_LIST -# returns lexicon IDs of all tokens that match ; the returned -# list may be empty (size 0); -CL_STRUC2CPOS = 0x140F -# INPUT: (STRING attribute, INT struc) -# OUTPUT: CQI_DATA_INT_INT -# returns start and end corpus positions of structure region -CL_ALG2CPOS = 0x1410 -# INPUT: (STRING attribute, INT alg) -# OUTPUT: CQI_DATA_INT_INT_INT_INT -# returns (src_start, src_end, target_start, target_end) - -""" 3.5 CQI_CQP_* """ -CQP = 0x15 -CQP_QUERY = 0x1501 -# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query) -# OUTPUT: CQI_STATUS_OK -# must include the ';' character terminating the query. -CQP_LIST_SUBCORPORA = 0x1502 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -CQP_SUBCORPUS_SIZE = 0x1503 -# INPUT: (STRING subcorpus) -# OUTPUT: CQI_DATA_INT -CQP_SUBCORPUS_HAS_FIELD = 0x1504 -# INPUT: (STRING subcorpus, BYTE field) -# OUTPUT: CQI_DATA_BOOL -CQP_DUMP_SUBCORPUS = 0x1505 -# INPUT: (STRING subcorpus, BYTE field, INT first, INT last) -# OUTPUT: CQI_DATA_INT_LIST -# Dump the values of for match ranges .. in . -# is one of the CQI_CONST_FIELD_* constants. -CQP_DROP_SUBCORPUS = 0x1509 -# INPUT: (STRING subcorpus) -# OUTPUT: CQI_STATUS_OK -# delete a subcorpus from memory -""" -" NOTE: The following two functions are temporarily included for the Euralex -" 2000 tutorial demo -""" -""" NOTE: frequency distribution of single tokens """ -CQP_FDIST_1 = 0x1510 -# INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute) -# OUTPUT: CQI_DATA_INT_LIST -# returns (id, frequency) pairs flattened into a list of size 2* -# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET, -# CQI_CONST_FIELD_KEYWORD -# NB: pairs are sorted by frequency desc. -""" NOTE: frequency distribution of pairs of tokens """ -CQP_FDIST_2 = 0x1511 -# INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1, -# BYTE field2, STRING attribute2) -# OUTPUT: CQI_DATA_INT_LIST -# returns (id1, id2, frequency) pairs flattened into a list of size 3* -# NB: triples are sorted by frequency desc. - - -""" 4. Constant Definitions """ -CONST_FALSE = 0x00 -CONST_NO = 0x00 -CONST_TRUE = 0x01 -CONST_YES = 0x01 -""" -" NOTE: The following constants specify which field will be returned by -" CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands. -""" -CONST_FIELD_MATCH = 0x10 -CONST_FIELD_MATCHEND = 0x11 -""" -" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the -" numerical values 0 .. 9, so clients do not need to look up the constant -" values if they're handling arbitrary targets. -""" -CONST_FIELD_TARGET_0 = 0x00 -CONST_FIELD_TARGET_1 = 0x01 -CONST_FIELD_TARGET_2 = 0x02 -CONST_FIELD_TARGET_3 = 0x03 -CONST_FIELD_TARGET_4 = 0x04 -CONST_FIELD_TARGET_5 = 0x05 -CONST_FIELD_TARGET_6 = 0x06 -CONST_FIELD_TARGET_7 = 0x07 -CONST_FIELD_TARGET_8 = 0x08 -CONST_FIELD_TARGET_9 = 0x09 -""" -" NOTE: The following constants are provided for backward compatibility with -" traditional CQP field names & while the generalised target concept -" isn't yet implemented in the CQPserver. -""" -CONST_FIELD_TARGET = 0x00 -CONST_FIELD_KEYWORD = 0x09 -""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """ -MAJOR_VERSION = 0x00 -MINOR_VERSION = 0x01 - - -""" 5. CQi lookup dictionary. """ -lookup = { - 257: 'CQI_STATUS_OK', - 258: 'CQI_STATUS_CONNECT_OK', - 259: 'CQI_STATUS_BYE_OK', - 260: 'CQI_STATUS_PING_OK', - 513: 'CQI_ERROR_GENERAL_ERROR', - 514: 'CQI_ERROR_CONNECT_REFUSED', - 515: 'CQI_ERROR_USER_ABORT', - 516: 'CQI_ERROR_SYNTAX_ERROR', - 769: 'CQI_DATA_BYTE', - 770: 'CQI_DATA_BOOL', - 771: 'CQI_DATA_INT', - 772: 'CQI_DATA_STRING', - 773: 'CQI_DATA_BYTE_LIST', - 774: 'CQI_DATA_BOOL_LIST', - 775: 'CQI_DATA_INT_LIST', - 776: 'CQI_DATA_STRING_LIST', - 777: 'CQI_DATA_INT_INT', - 778: 'CQI_DATA_INT_INT_INT_INT', - 779: 'CQI_DATA_INT_TABLE', - 1025: 'CQI_CL_ERROR_NO_SUCH_ATTRIBUTE', - 1026: 'CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE', - 1027: 'CQI_CL_ERROR_OUT_OF_RANGE', - 1028: 'CQI_CL_ERROR_REGEX', - 1029: 'CQI_CL_ERROR_CORPUS_ACCESS', - 1030: 'CQI_CL_ERROR_OUT_OF_MEMORY', - 1031: 'CQI_CL_ERROR_INTERNAL', - 1281: 'CQI_CQP_ERROR_GENERAL', - 1282: 'CQI_CQP_ERROR_NO_SUCH_CORPUS', - 1283: 'CQI_CQP_ERROR_INVALID_FIELD', - 1284: 'CQI_CQP_ERROR_OUT_OF_RANGE', - 4353: 'CQI_CTRL_CONNECT', - 4354: 'CQI_CTRL_BYE', - 4355: 'CQI_CTRL_USER_ABORT', - 4356: 'CQI_CTRL_PING', - 4357: 'CQI_CTRL_LAST_GENERAL_ERROR', - 4609: 'CQI_ASK_FEATURE_CQI_1_0', - 4610: 'CQI_ASK_FEATURE_CL_2_3', - 4611: 'CQI_ASK_FEATURE_CQP_2_3', - 4865: 'CQI_CORPUS_LIST_CORPORA', - 4867: 'CQI_CORPUS_CHARSET', - 4868: 'CQI_CORPUS_PROPERTIES', - 4869: 'CQI_CORPUS_POSITIONAL_ATTRIBUTES', - 4870: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTES', - 4871: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES', - 4872: 'CQI_CORPUS_ALIGNMENT_ATTRIBUTES', - 4873: 'CQI_CORPUS_FULL_NAME', - 4874: 'CQI_CORPUS_INFO', - 4875: 'CQI_CORPUS_DROP_CORPUS', - 5121: 'CQI_CL_ATTRIBUTE_SIZE', - 5122: 'CQI_CL_LEXICON_SIZE', - 5123: 'CQI_CL_DROP_ATTRIBUTE', - 5124: 'CQI_CL_STR2ID', - 5125: 'CQI_CL_ID2STR', - 5126: 'CQI_CL_ID2FREQ', - 5127: 'CQI_CL_CPOS2ID', - 5128: 'CQI_CL_CPOS2STR', - 5129: 'CQI_CL_CPOS2STRUC', - 5130: 'CQI_CL_CPOS2ALG', - 5131: 'CQI_CL_STRUC2STR', - 5132: 'CQI_CL_ID2CPOS', - 5133: 'CQI_CL_IDLIST2CPOS', - 5134: 'CQI_CL_REGEX2ID', - 5135: 'CQI_CL_STRUC2CPOS', - 5136: 'CQI_CL_ALG2CPOS', - 5152: 'CQI_CL_CPOS2LBOUND', - 5153: 'CQI_CL_CPOS2RBOUND', - 5377: 'CQI_CQP_QUERY', - 5378: 'CQI_CQP_LIST_SUBCORPORA', - 5379: 'CQI_CQP_SUBCORPUS_SIZE', - 5380: 'CQI_CQP_SUBCORPUS_HAS_FIELD', - 5381: 'CQI_CQP_DUMP_SUBCORPUS', - 5385: 'CQI_CQP_DROP_SUBCORPUS', - 5392: 'CQI_CQP_FDIST_1', - 5393: 'CQI_CQP_FDIST_2' -} - - -# ########################################################################### # -# IMS CQi client # -# # -# Version: 0.1a # -# Author: Patrick Jentsch (p.jentsch@uni-bielefeld.de) # -# ########################################################################### # class APIClient: + """ + A low-level client for the IMS Open Corpus Workbench (CWB) corpus query + interface (CQi) API. + + Example: + >>> import cqi + >>> client = cqi.APIClient('127.0.0.1') + >>> client.ctrl_connect('user', 'password') + {'code': 258, 'msg': 'CQI_STATUS_CONNECT_OK'} + >>> client.ctrl_bye() + {'code': 259, 'msg': 'CQI_STATUS_BYE_OK'} + + Args: + host (str): URL to the CQP server. For example, + ``cqpserver.localhost`` or ``127.0.0.1``. + port (int): Port the CQP server listens on. Default: ``4877`` + """ + def __init__(self, host, port=4877): self.host = host self.port = port @@ -425,7 +32,7 @@ class APIClient: self.socket.connect((self.host, self.port)) # INPUT: (STRING username, STRING password) # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED - self.__send_WORD(CTRL_CONNECT) + self.__send_WORD(specification.CTRL_CONNECT) self.__send_STRING(username) self.__send_STRING(password) return self.__recv_response() @@ -433,7 +40,7 @@ class APIClient: def ctrl_bye(self): # INPUT: () # OUTPUT: CQI_STATUS_BYE_OK - self.__send_WORD(CTRL_BYE) + self.__send_WORD(specification.CTRL_BYE) response = self.__recv_response() self.socket.close() return response @@ -441,12 +48,12 @@ class APIClient: def ctrl_user_abort(self): # INPUT: () # OUTPUT: - self.__send_WORD(CTRL_USER_ABORT) + self.__send_WORD(specification.CTRL_USER_ABORT) def ctrl_ping(self): # INPUT: () # OUTPUT: CQI_STATUS_PING_OK - self.__send_WORD(CTRL_PING) + self.__send_WORD(specification.CTRL_PING) return self.__recv_response() def ctrl_last_general_error(self): @@ -454,72 +61,72 @@ class APIClient: # OUTPUT: CQI_DATA_STRING # full-text error message for the last general error reported by the # CQi server - self.__send_WORD(CTRL_LAST_GENERAL_ERROR) + self.__send_WORD(specification.CTRL_LAST_GENERAL_ERROR) return self.__recv_response() def ask_feature_cqi_1_0(self): # INPUT: () # OUTPUT: CQI_DATA_BOOL - self.__send_WORD(ASK_FEATURE_CQI_1_0) + self.__send_WORD(specification.ASK_FEATURE_CQI_1_0) return self.__recv_response() def ask_feature_cl_2_3(self): # INPUT: () # OUTPUT: CQI_DATA_BOOL - self.__send_WORD(ASK_FEATURE_CL_2_3) + self.__send_WORD(specification.ASK_FEATURE_CL_2_3) return self.__recv_response() def ask_feature_cqp_2_3(self): # INPUT: () # OUTPUT: CQI_DATA_BOOL - self.__send_WORD(ASK_FEATURE_CL_2_3) + self.__send_WORD(specification.ASK_FEATURE_CL_2_3) return self.__recv_response() def corpus_list_coprora(self): # INPUT: () # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(CORPUS_LIST_CORPORA) + self.__send_WORD(specification.CORPUS_LIST_CORPORA) return self.__recv_response() def corpus_charset(self, corpus): # INPUT: (STRING corpus) # OUTPUT: CQI_DATA_STRING - self.__send_WORD(CORPUS_CHARSET) + self.__send_WORD(specification.CORPUS_CHARSET) self.__send_STRING(corpus) return self.__recv_response() def corpus_properties(self, corpus): # INPUT: (STRING corpus) # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(CORPUS_PROPERTIES) + self.__send_WORD(specification.CORPUS_PROPERTIES) self.__send_STRING(corpus) return self.__recv_response() def corpus_positional_attributes(self, corpus): # INPUT: (STRING corpus) # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(CORPUS_POSITIONAL_ATTRIBUTES) + self.__send_WORD(specification.CORPUS_POSITIONAL_ATTRIBUTES) self.__send_STRING(corpus) return self.__recv_response() def corpus_structural_attributes(self, corpus): # INPUT: (STRING corpus) # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTES) + self.__send_WORD(specification.CORPUS_STRUCTURAL_ATTRIBUTES) self.__send_STRING(corpus) return self.__recv_response() def corpus_structural_attribute_has_values(self, attribute): # INPUT: (STRING attribute) # OUTPUT: CQI_DATA_BOOL - self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES) + self.__send_WORD(specification.CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES) self.__send_STRING(attribute) return self.__recv_response() def corpus_alignment_attributes(self, corpus): # INPUT: (STRING corpus) # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(CORPUS_ALIGNMENT_ATTRIBUTES) + self.__send_WORD(specification.CORPUS_ALIGNMENT_ATTRIBUTES) self.__send_STRING(corpus) return self.__recv_response() @@ -527,7 +134,7 @@ class APIClient: # INPUT: (STRING corpus) # OUTPUT: CQI_DATA_STRING # the full name of as specified in its registry entry - self.__send_WORD(CORPUS_FULL_NAME) + self.__send_WORD(specification.CORPUS_FULL_NAME) self.__send_STRING(corpus) return self.__recv_response() @@ -535,7 +142,7 @@ class APIClient: # INPUT: (STRING corpus) # OUTPUT: CQI_DATA_STRING_LIST # returns the contents of the .info file of as a list of lines - self.__send_WORD(CORPUS_INFO) + self.__send_WORD(specification.CORPUS_INFO) self.__send_STRING(corpus) return self.__recv_response() @@ -543,7 +150,7 @@ class APIClient: # INPUT: (STRING corpus) # OUTPUT: CQI_STATUS_OK # try to unload a corpus and all its attributes from memory - self.__send_WORD(CORPUS_DROP_CORPUS) + self.__send_WORD(specification.CORPUS_DROP_CORPUS) self.__send_STRING(corpus) return self.__recv_response() @@ -554,7 +161,7 @@ class APIClient: # number of tokens (positional) # number of regions (structural) # number of alignments (alignment) - self.__send_WORD(CL_ATTRIBUTE_SIZE) + self.__send_WORD(specification.CL_ATTRIBUTE_SIZE) self.__send_STRING(attribute) return self.__recv_response() @@ -564,7 +171,7 @@ class APIClient: # returns the number of entries in the lexicon of a positional # attribute; # valid lexicon IDs range from 0 .. (lexicon_size - 1) - self.__send_WORD(CL_LEXICON_SIZE) + self.__send_WORD(specification.CL_LEXICON_SIZE) self.__send_STRING(attribute) return self.__recv_response() @@ -572,7 +179,7 @@ class APIClient: # INPUT: (STRING attribute) # OUTPUT: CQI_STATUS_OK # unload attribute from memory - self.__send_WORD(CL_DROP_ATTRIBUTE) + self.__send_WORD(specification.CL_DROP_ATTRIBUTE) self.__send_STRING(attribute) return self.__recv_response() @@ -586,7 +193,7 @@ class APIClient: # OUTPUT: CQI_DATA_INT_LIST # returns -1 for every string in that is not found in the # lexicon - self.__send_WORD(CL_STR2ID) + self.__send_WORD(specification.CL_STR2ID) self.__send_STRING(attribute) self.__send_STRING_LIST(strings) return self.__recv_response() @@ -595,7 +202,7 @@ class APIClient: # INPUT: (STRING attribute, INT_LIST id) # OUTPUT: CQI_DATA_STRING_LIST # returns "" for every ID in that is out of range - self.__send_WORD(CL_ID2STR) + self.__send_WORD(specification.CL_ID2STR) self.__send_STRING(attribute) self.__send_INT_LIST(id) return self.__recv_response() @@ -604,7 +211,7 @@ class APIClient: # INPUT: (STRING attribute, INT_LIST id) # OUTPUT: CQI_DATA_INT_LIST # returns 0 for every ID in that is out of range - self.__send_WORD(CL_ID2FREQ) + self.__send_WORD(specification.CL_ID2FREQ) self.__send_STRING(attribute) self.__send_INT_LIST(id) return self.__recv_response() @@ -613,7 +220,7 @@ class APIClient: # INPUT: (STRING attribute, INT_LIST cpos) # OUTPUT: CQI_DATA_INT_LIST # returns -1 for every corpus position in that is out of range - self.__send_WORD(CL_ID2FREQ) + self.__send_WORD(specification.CL_ID2FREQ) self.__send_STRING(attribute) self.__send_INT_LIST(cpos) return self.__recv_response() @@ -622,7 +229,7 @@ class APIClient: # INPUT: (STRING attribute, INT_LIST cpos) # OUTPUT: CQI_DATA_STRING_LIST # returns "" for every corpus position in that is out of range - self.__send_WORD(CL_CPOS2STR) + self.__send_WORD(specification.CL_CPOS2STR) self.__send_STRING(attribute) self.__send_INT_LIST(cpos) return self.__recv_response() @@ -631,7 +238,7 @@ class APIClient: # INPUT: (STRING attribute, INT_LIST cpos) # OUTPUT: CQI_DATA_INT_LIST # returns -1 for every corpus position not inside a structure region - self.__send_WORD(CL_CPOS2STRUC) + self.__send_WORD(specification.CL_CPOS2STRUC) self.__send_STRING(attribute) self.__send_INT_LIST(cpos) return self.__recv_response() @@ -646,7 +253,7 @@ class APIClient: # OUTPUT: CQI_DATA_INT_LIST # returns left boundary of s-attribute region enclosing cpos, -1 if not # in region - self.__send_WORD(CL_CPOS2LBOUND) + self.__send_WORD(specification.CL_CPOS2LBOUND) self.__send_STRING(attribute) self.__send_INT_LIST(cpos) return self.__recv_response() @@ -656,7 +263,7 @@ class APIClient: # OUTPUT: CQI_DATA_INT_LIST # returns right boundary of s-attribute region enclosing cpos, -1 if # not in region - self.__send_WORD(CL_CPOS2RBOUND) + self.__send_WORD(specification.CL_CPOS2RBOUND) self.__send_STRING(attribute) self.__send_INT_LIST(cpos) return self.__recv_response() @@ -665,7 +272,7 @@ class APIClient: # INPUT: (STRING attribute, INT_LIST cpos) # OUTPUT: CQI_DATA_INT_LIST # returns -1 for every corpus position not inside an alignment - self.__send_WORD(CL_CPOS2ALG) + self.__send_WORD(specification.CL_CPOS2ALG) self.__send_STRING(attribute) self.__send_INT_LIST(cpos) return self.__recv_response() @@ -676,7 +283,7 @@ class APIClient: # returns annotated string values of structure regions in ; "" # if out of range # check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES() first - self.__send_WORD(CL_STRUC2STR) + self.__send_WORD(specification.CL_STRUC2STR) self.__send_STRING(attribute) self.__send_INT_LIST(strucs) return self.__recv_response() @@ -690,7 +297,7 @@ class APIClient: # INPUT: (STRING attribute, INT id) # OUTPUT: CQI_DATA_INT_LIST # returns all corpus positions where the given token occurs - self.__send_WORD(CL_ID2CPOS) + self.__send_WORD(specification.CL_ID2CPOS) self.__send_STRING(attribute) self.__send_INT(id) return self.__recv_response() @@ -700,7 +307,7 @@ class APIClient: # OUTPUT: CQI_DATA_INT_LIST # returns all corpus positions where one of the tokens in # occurs; the returned list is sorted as a whole, not per token id - self.__send_WORD(CL_IDLIST2CPOS) + self.__send_WORD(specification.CL_IDLIST2CPOS) self.__send_STRING(attribute) self.__send_INT_LIST(id_list) return self.__recv_response() @@ -710,7 +317,7 @@ class APIClient: # OUTPUT: CQI_DATA_INT_LIST # returns lexicon IDs of all tokens that match ; the returned # list may be empty (size 0); - self.__send_WORD(CL_REGEX2ID) + self.__send_WORD(specification.CL_REGEX2ID) self.__send_STRING(attribute) self.__send_STRING(regex) return self.__recv_response() @@ -719,7 +326,7 @@ class APIClient: # INPUT: (STRING attribute, INT struc) # OUTPUT: CQI_DATA_INT_INT # returns start and end corpus positions of structure region - self.__send_WORD(CL_STRUC2CPOS) + self.__send_WORD(specification.CL_STRUC2CPOS) self.__send_STRING(attribute) self.__send_INT(struc) return self.__recv_response() @@ -728,7 +335,7 @@ class APIClient: # INPUT: (STRING attribute, INT alg) # OUTPUT: CQI_DATA_INT_INT_INT_INT # returns (src_start, src_end, target_start, target_end) - self.__send_WORD(CL_ALG2CPOS) + self.__send_WORD(specification.CL_ALG2CPOS) self.__send_STRING(attribute) self.__send_INT(alg) return self.__recv_response() @@ -737,7 +344,7 @@ class APIClient: # INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query) # OUTPUT: CQI_STATUS_OK # must include the ';' character terminating the query. - self.__send_WORD(CQP_QUERY) + self.__send_WORD(specification.CQP_QUERY) self.__send_STRING(mother_corpus) self.__send_STRING(subcorpus_name) self.__send_STRING(query) @@ -746,21 +353,21 @@ class APIClient: def cqp_list_subcorpora(self, corpus): # INPUT: (STRING corpus) # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(CQP_LIST_SUBCORPORA) + self.__send_WORD(specification.CQP_LIST_SUBCORPORA) self.__send_STRING(corpus) return self.__recv_response() def cqp_subcorpus_size(self, subcorpus): # INPUT: (STRING subcorpus) # OUTPUT: CQI_DATA_INT - self.__send_WORD(CQP_SUBCORPUS_SIZE) + self.__send_WORD(specification.CQP_SUBCORPUS_SIZE) self.__send_STRING(subcorpus) return self.__recv_response() def cqp_subcorpus_has_field(self, subcorpus, field): # INPUT: (STRING subcorpus, BYTE field) # OUTPUT: CQI_DATA_BOOL - self.__send_WORD(CQP_SUBCORPUS_HAS_FIELD) + self.__send_WORD(specification.CQP_SUBCORPUS_HAS_FIELD) self.__send_STRING(subcorpus) self.__send_BYTE(field) return self.__recv_response() @@ -770,7 +377,7 @@ class APIClient: # OUTPUT: CQI_DATA_INT_LIST # Dump the values of for match ranges .. # in . is one of the CQI_CONST_FIELD_* constants. - self.__send_WORD(CQP_DUMP_SUBCORPUS) + self.__send_WORD(specification.CQP_DUMP_SUBCORPUS) self.__send_STRING(subcorpus) self.__send_BYTE(field) self.__send_INT(first) @@ -781,7 +388,7 @@ class APIClient: # INPUT: (STRING subcorpus) # OUTPUT: CQI_STATUS_OK # delete a subcorpus from memory - self.__send_WORD(CQP_DROP_SUBCORPUS) + self.__send_WORD(specification.CQP_DROP_SUBCORPUS) self.__send_STRING(subcorpus) return self.__recv_response() @@ -798,7 +405,7 @@ class APIClient: # field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET, # CQI_CONST_FIELD_KEYWORD # NB: pairs are sorted by frequency desc. - self.__send_WORD(CQP_FDIST_1) + self.__send_WORD(specification.CQP_FDIST_1) self.__send_STRING(subcorpus) self.__send_INT(cutoff) self.__send_BYTE(field) @@ -814,7 +421,7 @@ class APIClient: # returns (id1, id2, frequency) pairs flattened into a list of size # 3* # NB: triples are sorted by frequency desc. - self.__send_WORD(CQP_FDIST_2) + self.__send_WORD(specification.CQP_FDIST_2) self.__send_STRING(subcorpus) self.__send_INT(cutoff) self.__send_BYTE(field1) @@ -826,46 +433,84 @@ class APIClient: def __recv_response(self): byte_data = self.__recv_WORD() response_type = byte_data >> 8 - if response_type == CL_ERROR: - raise Exception(lookup[byte_data]) - elif response_type == CQP_ERROR: - raise Exception(lookup[byte_data]) - elif response_type == DATA: + if response_type == specification.CL_ERROR: + raise self.__create_cl_error(byte_data) + elif response_type == specification.CQP_ERROR: + raise self.__create_cqp_error(byte_data) + elif response_type == specification.DATA: return self.__recv_DATA(byte_data) - elif response_type == ERROR: - raise Exception(lookup[byte_data]) - elif response_type == STATUS: - return byte_data + elif response_type == specification.ERROR: + raise self.__create_error(byte_data) + elif response_type == specification.STATUS: + return {'code': byte_data, 'msg': specification.lookup[byte_data]} else: - raise Exception( - 'Unknown response type: {}'.format(hex(response_type)) - ) + raise Exception('Unknown response type: {}'.format(response_type)) + + def __create_cl_error(self, error_type): + if error_type == specification.CL_ERROR_NO_SUCH_ATTRIBUTE: + return errors.CLErrorNoSuchAttribute() + elif error_type == specification.CL_ERROR_WRONG_ATTRIBUTE_TYPE: + return errors.CLErrorWrongAttributeType() + elif error_type == specification.CL_ERROR_OUT_OF_RANGE: + return errors.CLErrorOutOfRange() + elif error_type == specification.CL_ERROR_REGEX: + return errors.CLErrorRegex() + elif error_type == specification.CL_ERROR_CORPUS_ACCESS: + return errors.CLErrorCorpusAccess() + elif error_type == specification.CL_ERROR_OUT_OF_MEMORY: + return errors.CLErrorOutOfMemory() + elif error_type == specification.CL_ERROR_INTERNAL: + return errors.CLErrorInternal() + else: + return errors.CLError(error_type) + + def __create_cqp_error(self, error_type): + if error_type == specification.CQP_ERROR_GENERAL: + return errors.CQPErrorGeneral() + elif error_type == specification.CQP_ERROR_INVALID_FIELD: + return errors.CQPErrorInvalidField() + elif error_type == specification.CQP_ERROR_OUT_OF_RANGE: + return errors.CQPErrorOutOfRange() + else: + return errors.CQPError(error_type) + + def __create_error(self, error_type): + if error_type == specification.ERROR_GENERAL_ERROR: + return errors.ErrorGeneralError() + elif error_type == specification.ERROR_CONNECT_REFUSED: + return errors.ErrorConnectRefused() + elif error_type == specification.ERROR_USER_ABORT: + return errors.ErrorUserAbort() + elif error_type == specification.ERROR_SYNTAX_ERROR: + return errors.ErrorSyntaxError() + else: + return errors.Error(error_type) def __recv_DATA(self, data_type): - if data_type == DATA_BYTE: + if data_type == specification.DATA_BYTE: data = self.__recv_DATA_BYTE() - elif data_type == DATA_BOOL: + elif data_type == specification.DATA_BOOL: data = self.__recv_DATA_BOOL() - elif data_type == DATA_INT: + elif data_type == specification.DATA_INT: data = self.__recv_DATA_INT() - elif data_type == DATA_STRING: + elif data_type == specification.DATA_STRING: data = self.__recv_DATA_STRING() - elif data_type == DATA_BYTE_LIST: + elif data_type == specification.DATA_BYTE_LIST: data = self.__recv_DATA_BYTE_LIST() - elif data_type == DATA_BOOL_LIST: + elif data_type == specification.DATA_BOOL_LIST: data = self.__recv_DATA_BOOL_LIST() - elif data_type == DATA_INT_LIST: + elif data_type == specification.DATA_INT_LIST: data = self.__recv_DATA_INT_LIST() - elif data_type == DATA_STRING_LIST: + elif data_type == specification.DATA_STRING_LIST: data = self.__recv_DATA_STRING_LIST() - elif data_type == DATA_INT_INT: + elif data_type == specification.DATA_INT_INT: data = self.__recv_DATA_INT_INT() - elif data_type == DATA_INT_INT_INT_INT: + elif data_type == specification.DATA_INT_INT_INT_INT: data = self.__recv_DATA_INT_INT_INT_INT() - elif data_type == DATA_INT_TABLE: + elif data_type == specification.DATA_INT_TABLE: data = self.__recv_DATA_INT_TABLE() else: - raise Exception('Unknown data type: {}'.format(hex(data_type))) + raise Exception('Unknown data type: {}'.format(data_type)) return data def __recv_DATA_BYTE(self): diff --git a/app/corpora/cqi/client.py b/app/corpora/cqi/client.py index 50de46b4..b4662c4c 100644 --- a/app/corpora/cqi/client.py +++ b/app/corpora/cqi/client.py @@ -7,9 +7,10 @@ class CQiClient: self.api = APIClient(host, port=port) def connect(self, username='anonymous', password=''): - self.api.ctrl_connect(username, password) + status = self.api.ctrl_connect(username, password) self.corpora = CorpusCollection(self) + return status def disconnect(self): del self.corpora - self.api.ctrl_bye() + return self.api.ctrl_bye() diff --git a/app/corpora/cqi/constants.py b/app/corpora/cqi/constants.py deleted file mode 100644 index df2842b0..00000000 --- a/app/corpora/cqi/constants.py +++ /dev/null @@ -1,36 +0,0 @@ -""" 4. Constant Definitions """ -CONST_FALSE = 0x00 -CONST_NO = 0x00 -CONST_TRUE = 0x01 -CONST_YES = 0x01 -""" -" NOTE: The following constants specify which field will be returned by -" CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands. -""" -CONST_FIELD_MATCH = 0x10 -CONST_FIELD_MATCHEND = 0x11 -""" -" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the -" numerical values 0 .. 9, so clients do not need to look up the constant -" values if they're handling arbitrary targets. -""" -CONST_FIELD_TARGET_0 = 0x00 -CONST_FIELD_TARGET_1 = 0x01 -CONST_FIELD_TARGET_2 = 0x02 -CONST_FIELD_TARGET_3 = 0x03 -CONST_FIELD_TARGET_4 = 0x04 -CONST_FIELD_TARGET_5 = 0x05 -CONST_FIELD_TARGET_6 = 0x06 -CONST_FIELD_TARGET_7 = 0x07 -CONST_FIELD_TARGET_8 = 0x08 -CONST_FIELD_TARGET_9 = 0x09 -""" -" NOTE: The following constants are provided for backward compatibility with -" traditional CQP field names & while the generalised target concept -" isn't yet implemented in the CQPserver. -""" -CONST_FIELD_TARGET = 0x00 -CONST_FIELD_KEYWORD = 0x09 -""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """ -MAJOR_VERSION = 0x00 -MINOR_VERSION = 0x01 diff --git a/app/corpora/cqi/errors.py b/app/corpora/cqi/errors.py new file mode 100644 index 00000000..086f4981 --- /dev/null +++ b/app/corpora/cqi/errors.py @@ -0,0 +1,104 @@ +class CQiException(Exception): + """ + A base class from which all other exceptions inherit. + If you want to catch all errors that the CQi package might raise, + catch this base exception. + """ + + +class Error(CQiException): + # ERROR = 0x02 + pass + + +class ErrorGeneralError(Error): + # ERROR_GENERAL_ERROR = 0x0201 + pass + + +class ErrorConnectRefused(Error): + # ERROR_CONNECT_REFUSED = 0x0202 + pass + + +class ErrorUserAbort(Error): + # ERROR_USER_ABORT = 0x0203 + pass + + +class ErrorSyntaxError(Error): + # ERROR_SYNTAX_ERROR = 0x0204 + pass + + +class CLError(CQiException): + # CL_ERROR = 0x04 + pass + + +class CLErrorNoSuchAttribute(CLError): + # CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401 + # returned if CQi server couldn't open attribute + pass + + +class CLErrorWrongAttributeType(CLError): + # CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402 + # CDA_EATTTYPE + pass + + +class CLErrorOutOfRange(CLError): + # CL_ERROR_OUT_OF_RANGE = 0x0403 + # CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG + pass + + +class CLErrorRegex(CLError): + # CL_ERROR_REGEX = 0x0404 + # CDA_EPATTERN (not used), CDA_EBADREGEX + pass + + +class CLErrorCorpusAccess(CLError): + # CL_ERROR_CORPUS_ACCESS = 0x0405 + # CDA_ENODATA + pass + + +class CLErrorOutOfMemory(CLError): + # CL_ERROR_OUT_OF_MEMORY = 0x0406 + # CDA_ENOMEM + # this means the CQi server has run out of memory; + # try discarding some other corpora and/or subcorpora + pass + + +class CLErrorInternal(CLError): + # CL_ERROR_INTERNAL = 0x0407 + # CDA_EOTHER, CDA_ENYI + # this is the classical 'please contact technical support' error + pass + + +class CQPError(CQiException): + # CQP_ERROR = 0x05 + # CQP error messages yet to be defined + pass + + +class CQPErrorGeneral(CQPError): + # CQP_ERROR_GENERAL = 0x0501 + pass + # CQP_ERROR_NO_SUCH_CORPUS = 0x0502 + + +class CQPErrorInvalidField(CQPError): + # CQP_ERROR_INVALID_FIELD = 0x0503 + pass + + +class CQPErrorOutOfRange(CQPError): + # CQP_ERROR_OUT_OF_RANGE = 0x0504 + # various cases where a number is out of range + pass diff --git a/app/corpora/cqi/models/subcorpora.py b/app/corpora/cqi/models/subcorpora.py index c34245d3..d7698eba 100644 --- a/app/corpora/cqi/models/subcorpora.py +++ b/app/corpora/cqi/models/subcorpora.py @@ -1,5 +1,5 @@ -from ..constants import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH, - CONST_FIELD_MATCHEND, CONST_FIELD_TARGET) +from ..specification import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH, + CONST_FIELD_MATCHEND, CONST_FIELD_TARGET) class SubcorpusCollection: diff --git a/app/corpora/cqi/specification.py b/app/corpora/cqi/specification.py new file mode 100644 index 00000000..1147d0d1 --- /dev/null +++ b/app/corpora/cqi/specification.py @@ -0,0 +1,404 @@ +# ########################################################################### # +# IMS CQi specification # +# # +# Version: 0.1a ;o) # +# Author: Stefan Evert (evert@ims.uni-stuttgart.de) # +# Modified by (codestyle): Patrick Jentsch (p.jentsch@uni-bielefeld.de) # +# Modified date: Thurs Oct 10 # +# ########################################################################### # +""" 1. padding """ +PAD = 0x00 + + +""" 2. CQi responses """ +""" 2.1 CQI_STATUS_* """ +STATUS = 0x01 +STATUS_OK = 0x0101 +STATUS_CONNECT_OK = 0x0102 +STATUS_BYE_OK = 0x0103 +STATUS_PING_OK = 0x0104 + +""" 2.2 CQI_ERROR_* """ +ERROR = 0x02 +ERROR_GENERAL_ERROR = 0x0201 +ERROR_CONNECT_REFUSED = 0x0202 +ERROR_USER_ABORT = 0x0203 +ERROR_SYNTAX_ERROR = 0x0204 +# includes corpus/attribute/subcorpus specifier syntax + +""" 2.3 CQI_DATA_* """ +DATA = 0x03 +DATA_BYTE = 0x0301 +DATA_BOOL = 0x0302 +DATA_INT = 0x0303 +DATA_STRING = 0x0304 +DATA_BYTE_LIST = 0x0305 +DATA_BOOL_LIST = 0x0306 +DATA_INT_LIST = 0x0307 +DATA_STRING_LIST = 0x0308 +DATA_INT_INT = 0x0309 +DATA_INT_INT_INT_INT = 0x030A +DATA_INT_TABLE = 0x030B + +""" 2.4 CQI_CL_ERROR_* """ +""" +" NOTE: some CL error codes are not represented in the CQi specs +" - usually because they're not used in the CL any more +" - CDA_ENOSTRING is not considered an error (returns -1) +" - CDA_EARGS: dynamic attribute calls not yet supported +""" +CL_ERROR = 0x04 +CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401 +# returned if CQi server couldn't open attribute +CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402 +# CDA_EATTTYPE +CL_ERROR_OUT_OF_RANGE = 0x0403 +# CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG +CL_ERROR_REGEX = 0x0404 +# CDA_EPATTERN (not used), CDA_EBADREGEX +CL_ERROR_CORPUS_ACCESS = 0x0405 +# CDA_ENODATA +CL_ERROR_OUT_OF_MEMORY = 0x0406 +# CDA_ENOMEM +# this means the CQi server has run out of memory; +# try discarding some other corpora and/or subcorpora +CL_ERROR_INTERNAL = 0x0407 +# CDA_EOTHER, CDA_ENYI +# this is the classical 'please contact technical support' error + +""" 2.5 CQI_CQP_ERROR_* """ +CQP_ERROR = 0x05 +# CQP error messages yet to be defined +CQP_ERROR_GENERAL = 0x0501 +CQP_ERROR_NO_SUCH_CORPUS = 0x0502 +CQP_ERROR_INVALID_FIELD = 0x0503 +CQP_ERROR_OUT_OF_RANGE = 0x0504 +# various cases where a number is out of range + + +""" 3. CQi commands """ +""" 3.1 CQI_CTRL_* """ +CTRL = 0x11 +CTRL_CONNECT = 0x1101 +# INPUT: (STRING username, STRING password) +# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED +CTRL_BYE = 0x1102 +# INPUT: () +# OUTPUT: CQI_STATUS_BYE_OK +CTRL_USER_ABORT = 0x1103 +# INPUT: () +# OUTPUT: +CTRL_PING = 0x1104 +# INPUT: () +# OUTPUT: CQI_STATUS_PING_OK +CTRL_LAST_GENERAL_ERROR = 0x1105 +# INPUT: () +# OUTPUT: CQI_DATA_STRING +# full-text error message for the last general error reported by the CQi server + +""" 3.2 CQI_ASK_FEATURE_* """ +ASK_FEATURE = 0x12 +ASK_FEATURE_CQI_1_0 = 0x1201 +# INPUT: () +# OUTPUT: CQI_DATA_BOOL +ASK_FEATURE_CL_2_3 = 0x1202 +# INPUT: () +# OUTPUT: CQI_DATA_BOOL +ASK_FEATURE_CQP_2_3 = 0x1203 +# INPUT: () +# OUTPUT: CQI_DATA_BOOL + +""" 3.3 CQI_CORPUS_* """ +CORPUS = 0x13 +CORPUS_LIST_CORPORA = 0x1301 +# INPUT: () +# OUTPUT: CQI_DATA_STRING_LIST +CORPUS_CHARSET = 0x1303 +# INPUT: (STRING corpus) +# OUTPUT: CQI_DATA_STRING +CORPUS_PROPERTIES = 0x1304 +# INPUT: (STRING corpus) +# OUTPUT: CQI_DATA_STRING_LIST +CORPUS_POSITIONAL_ATTRIBUTES = 0x1305 +# INPUT: (STRING corpus) +# OUTPUT: CQI_DATA_STRING_LIST +CORPUS_STRUCTURAL_ATTRIBUTES = 0x1306 +# INPUT: (STRING corpus) +# OUTPUT: CQI_DATA_STRING_LIST +CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES = 0x1307 +# INPUT: (STRING attribute) +# OUTPUT: CQI_DATA_BOOL +CORPUS_ALIGNMENT_ATTRIBUTES = 0x1308 +# INPUT: (STRING corpus) +# OUTPUT: CQI_DATA_STRING_LIST +CORPUS_FULL_NAME = 0x1309 +# INPUT: (STRING corpus) +# OUTPUT: CQI_DATA_STRING +# the full name of as specified in its registry entry +CORPUS_INFO = 0x130A +# INPUT: (STRING corpus) +# OUTPUT: CQI_DATA_STRING_LIST +# returns the contents of the .info file of as a list of lines +CORPUS_DROP_CORPUS = 0x130B +# INPUT: (STRING corpus) +# OUTPUT: CQI_STATUS_OK +# try to unload a corpus and all its attributes from memory + +""" 3.4 CQI_CL_* """ +CL = 0x14 +# low-level corpus access (CL functions) +CL_ATTRIBUTE_SIZE = 0x1401 +# INPUT: (STRING attribute) +# OUTPUT: CQI_DATA_INT +# returns the size of : +# - number of tokens (positional) +# - number of regions (structural) +# - number of alignments (alignment) +CL_LEXICON_SIZE = 0x1402 +# INPUT: (STRING attribute) +# OUTPUT: CQI_DATA_INT +# returns the number of entries in the lexicon of a positional attribute; +# valid lexicon IDs range from 0 .. (lexicon_size - 1) +CL_DROP_ATTRIBUTE = 0x1403 +# INPUT: (STRING attribute) +# OUTPUT: CQI_STATUS_OK +# unload attribute from memory +""" +" NOTE: simple (scalar) mappings are applied to lists (the returned list has +" exactly the same length as the list passed as an argument) +""" +CL_STR2ID = 0x1404 +# INPUT: (STRING attribute, STRING_LIST strings) +# OUTPUT: CQI_DATA_INT_LIST +# returns -1 for every string in that is not found in the lexicon +CL_ID2STR = 0x1405 +# INPUT: (STRING attribute, INT_LIST id) +# OUTPUT: CQI_DATA_STRING_LIST +# returns "" for every ID in that is out of range +CL_ID2FREQ = 0x1406 +# INPUT: (STRING attribute, INT_LIST id) +# OUTPUT: CQI_DATA_INT_LIST +# returns 0 for every ID in that is out of range +CL_CPOS2ID = 0x1407 +# INPUT: (STRING attribute, INT_LIST cpos) +# OUTPUT: CQI_DATA_INT_LIST +# returns -1 for every corpus position in that is out of range +CL_CPOS2STR = 0x1408 +# INPUT: (STRING attribute, INT_LIST cpos) +# OUTPUT: CQI_DATA_STRING_LIST +# returns "" for every corpus position in that is out of range +CL_CPOS2STRUC = 0x1409 +# INPUT: (STRING attribute, INT_LIST cpos) +# OUTPUT: CQI_DATA_INT_LIST +# returns -1 for every corpus position not inside a structure region +""" +" NOTE: temporary addition for the Euralex2000 tutorial, but should probably be +" included in CQi specs +""" +CL_CPOS2LBOUND = 0x1420 +# INPUT: (STRING attribute, INT_LIST cpos) +# OUTPUT: CQI_DATA_INT_LIST +# returns left boundary of s-attribute region enclosing cpos, -1 if not in +# region +CL_CPOS2RBOUND = 0x1421 +# INPUT: (STRING attribute, INT_LIST cpos) +# OUTPUT: CQI_DATA_INT_LIST +# returns right boundary of s-attribute region enclosing cpos, -1 if not in +# region +CL_CPOS2ALG = 0x140A +# INPUT: (STRING attribute, INT_LIST cpos) +# OUTPUT: CQI_DATA_INT_LIST +# returns -1 for every corpus position not inside an alignment +CL_STRUC2STR = 0x140B +# INPUT: (STRING attribute, INT_LIST strucs) +# OUTPUT: CQI_DATA_STRING_LIST +# returns annotated string values of structure regions in ; "" if out +# of range +# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES() first +""" +" NOTE: the following mappings take a single argument and return multiple +" values, including lists of arbitrary size +""" +CL_ID2CPOS = 0x140C +# INPUT: (STRING attribute, INT id) +# OUTPUT: CQI_DATA_INT_LIST +# returns all corpus positions where the given token occurs +CL_IDLIST2CPOS = 0x140D +# INPUT: (STRING attribute, INT_LIST id_list) +# OUTPUT: CQI_DATA_INT_LIST +# returns all corpus positions where one of the tokens in +# occurs; the returned list is sorted as a whole, not per token id +CL_REGEX2ID = 0x140E +# INPUT: (STRING attribute, STRING regex) +# OUTPUT: CQI_DATA_INT_LIST +# returns lexicon IDs of all tokens that match ; the returned +# list may be empty (size 0); +CL_STRUC2CPOS = 0x140F +# INPUT: (STRING attribute, INT struc) +# OUTPUT: CQI_DATA_INT_INT +# returns start and end corpus positions of structure region +CL_ALG2CPOS = 0x1410 +# INPUT: (STRING attribute, INT alg) +# OUTPUT: CQI_DATA_INT_INT_INT_INT +# returns (src_start, src_end, target_start, target_end) + +""" 3.5 CQI_CQP_* """ +CQP = 0x15 +CQP_QUERY = 0x1501 +# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query) +# OUTPUT: CQI_STATUS_OK +# must include the ';' character terminating the query. +CQP_LIST_SUBCORPORA = 0x1502 +# INPUT: (STRING corpus) +# OUTPUT: CQI_DATA_STRING_LIST +CQP_SUBCORPUS_SIZE = 0x1503 +# INPUT: (STRING subcorpus) +# OUTPUT: CQI_DATA_INT +CQP_SUBCORPUS_HAS_FIELD = 0x1504 +# INPUT: (STRING subcorpus, BYTE field) +# OUTPUT: CQI_DATA_BOOL +CQP_DUMP_SUBCORPUS = 0x1505 +# INPUT: (STRING subcorpus, BYTE field, INT first, INT last) +# OUTPUT: CQI_DATA_INT_LIST +# Dump the values of for match ranges .. in . +# is one of the CQI_CONST_FIELD_* constants. +CQP_DROP_SUBCORPUS = 0x1509 +# INPUT: (STRING subcorpus) +# OUTPUT: CQI_STATUS_OK +# delete a subcorpus from memory +""" +" NOTE: The following two functions are temporarily included for the Euralex +" 2000 tutorial demo +""" +""" NOTE: frequency distribution of single tokens """ +CQP_FDIST_1 = 0x1510 +# INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute) +# OUTPUT: CQI_DATA_INT_LIST +# returns (id, frequency) pairs flattened into a list of size 2* +# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET, +# CQI_CONST_FIELD_KEYWORD +# NB: pairs are sorted by frequency desc. +""" NOTE: frequency distribution of pairs of tokens """ +CQP_FDIST_2 = 0x1511 +# INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1, +# BYTE field2, STRING attribute2) +# OUTPUT: CQI_DATA_INT_LIST +# returns (id1, id2, frequency) pairs flattened into a list of size 3* +# NB: triples are sorted by frequency desc. + + +""" 4. Constant Definitions """ +CONST_FALSE = 0x00 +CONST_NO = 0x00 +CONST_TRUE = 0x01 +CONST_YES = 0x01 +""" +" NOTE: The following constants specify which field will be returned by +" CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands. +""" +CONST_FIELD_MATCH = 0x10 +CONST_FIELD_MATCHEND = 0x11 +""" +" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the +" numerical values 0 .. 9, so clients do not need to look up the constant +" values if they're handling arbitrary targets. +""" +CONST_FIELD_TARGET_0 = 0x00 +CONST_FIELD_TARGET_1 = 0x01 +CONST_FIELD_TARGET_2 = 0x02 +CONST_FIELD_TARGET_3 = 0x03 +CONST_FIELD_TARGET_4 = 0x04 +CONST_FIELD_TARGET_5 = 0x05 +CONST_FIELD_TARGET_6 = 0x06 +CONST_FIELD_TARGET_7 = 0x07 +CONST_FIELD_TARGET_8 = 0x08 +CONST_FIELD_TARGET_9 = 0x09 +""" +" NOTE: The following constants are provided for backward compatibility with +" traditional CQP field names & while the generalised target concept +" isn't yet implemented in the CQPserver. +""" +CONST_FIELD_TARGET = 0x00 +CONST_FIELD_KEYWORD = 0x09 +""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """ +MAJOR_VERSION = 0x00 +MINOR_VERSION = 0x01 + + +""" 5. CQi lookup dictionary. """ +lookup = { + 257: 'CQI_STATUS_OK', + 258: 'CQI_STATUS_CONNECT_OK', + 259: 'CQI_STATUS_BYE_OK', + 260: 'CQI_STATUS_PING_OK', + 513: 'CQI_ERROR_GENERAL_ERROR', + 514: 'CQI_ERROR_CONNECT_REFUSED', + 515: 'CQI_ERROR_USER_ABORT', + 516: 'CQI_ERROR_SYNTAX_ERROR', + 769: 'CQI_DATA_BYTE', + 770: 'CQI_DATA_BOOL', + 771: 'CQI_DATA_INT', + 772: 'CQI_DATA_STRING', + 773: 'CQI_DATA_BYTE_LIST', + 774: 'CQI_DATA_BOOL_LIST', + 775: 'CQI_DATA_INT_LIST', + 776: 'CQI_DATA_STRING_LIST', + 777: 'CQI_DATA_INT_INT', + 778: 'CQI_DATA_INT_INT_INT_INT', + 779: 'CQI_DATA_INT_TABLE', + 1025: 'CQI_CL_ERROR_NO_SUCH_ATTRIBUTE', + 1026: 'CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE', + 1027: 'CQI_CL_ERROR_OUT_OF_RANGE', + 1028: 'CQI_CL_ERROR_REGEX', + 1029: 'CQI_CL_ERROR_CORPUS_ACCESS', + 1030: 'CQI_CL_ERROR_OUT_OF_MEMORY', + 1031: 'CQI_CL_ERROR_INTERNAL', + 1281: 'CQI_CQP_ERROR_GENERAL', + 1282: 'CQI_CQP_ERROR_NO_SUCH_CORPUS', + 1283: 'CQI_CQP_ERROR_INVALID_FIELD', + 1284: 'CQI_CQP_ERROR_OUT_OF_RANGE', + 4353: 'CQI_CTRL_CONNECT', + 4354: 'CQI_CTRL_BYE', + 4355: 'CQI_CTRL_USER_ABORT', + 4356: 'CQI_CTRL_PING', + 4357: 'CQI_CTRL_LAST_GENERAL_ERROR', + 4609: 'CQI_ASK_FEATURE_CQI_1_0', + 4610: 'CQI_ASK_FEATURE_CL_2_3', + 4611: 'CQI_ASK_FEATURE_CQP_2_3', + 4865: 'CQI_CORPUS_LIST_CORPORA', + 4867: 'CQI_CORPUS_CHARSET', + 4868: 'CQI_CORPUS_PROPERTIES', + 4869: 'CQI_CORPUS_POSITIONAL_ATTRIBUTES', + 4870: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTES', + 4871: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES', + 4872: 'CQI_CORPUS_ALIGNMENT_ATTRIBUTES', + 4873: 'CQI_CORPUS_FULL_NAME', + 4874: 'CQI_CORPUS_INFO', + 4875: 'CQI_CORPUS_DROP_CORPUS', + 5121: 'CQI_CL_ATTRIBUTE_SIZE', + 5122: 'CQI_CL_LEXICON_SIZE', + 5123: 'CQI_CL_DROP_ATTRIBUTE', + 5124: 'CQI_CL_STR2ID', + 5125: 'CQI_CL_ID2STR', + 5126: 'CQI_CL_ID2FREQ', + 5127: 'CQI_CL_CPOS2ID', + 5128: 'CQI_CL_CPOS2STR', + 5129: 'CQI_CL_CPOS2STRUC', + 5130: 'CQI_CL_CPOS2ALG', + 5131: 'CQI_CL_STRUC2STR', + 5132: 'CQI_CL_ID2CPOS', + 5133: 'CQI_CL_IDLIST2CPOS', + 5134: 'CQI_CL_REGEX2ID', + 5135: 'CQI_CL_STRUC2CPOS', + 5136: 'CQI_CL_ALG2CPOS', + 5152: 'CQI_CL_CPOS2LBOUND', + 5153: 'CQI_CL_CPOS2RBOUND', + 5377: 'CQI_CQP_QUERY', + 5378: 'CQI_CQP_LIST_SUBCORPORA', + 5379: 'CQI_CQP_SUBCORPUS_SIZE', + 5380: 'CQI_CQP_SUBCORPUS_HAS_FIELD', + 5381: 'CQI_CQP_DUMP_SUBCORPUS', + 5385: 'CQI_CQP_DROP_SUBCORPUS', + 5392: 'CQI_CQP_FDIST_1', + 5393: 'CQI_CQP_FDIST_2' +} diff --git a/app/corpora/cqi/version.py b/app/corpora/cqi/version.py index 9a2af455..b00e2679 100644 --- a/app/corpora/cqi/version.py +++ b/app/corpora/cqi/version.py @@ -1,4 +1,4 @@ -from .constants import MAJOR_VERSION, MINOR_VERSION +from .specification import MAJOR_VERSION, MINOR_VERSION version = '{}.{}'.format(MAJOR_VERSION, MINOR_VERSION) diff --git a/app/corpora/cqi/wrapper.py b/app/corpora/cqi/wrapper.py index 3dcfc2d5..daba87c8 100644 --- a/app/corpora/cqi/wrapper.py +++ b/app/corpora/cqi/wrapper.py @@ -1,5 +1,5 @@ from .api import APIClient -from .constants import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND +from .specification import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND import time diff --git a/app/corpora/pj_events.py b/app/corpora/pj_events.py index 0cdad4d8..0a5d2ca7 100644 --- a/app/corpora/pj_events.py +++ b/app/corpora/pj_events.py @@ -1,6 +1,6 @@ from flask import current_app, request from flask_login import current_user -from .cqi import CQiClient +from . import cqi from .. import db, socketio from ..decorators import socketio_login_required from ..events import connected_sessions @@ -39,7 +39,7 @@ def pj_corpus_analysis_query(query): corpus = client.corpora.get('CORPUS') try: results = corpus.query(query) - except Exception as e: + except cqi.errors.CQiException as e: response = {'code': 1, 'msg': str(e)} socketio.emit('pj_corpus_analysis_query', response, room=request.sid) else: @@ -82,10 +82,10 @@ def pj_corpus_analysis_session_handler(app, corpus_id, user_id, session_id): while corpus.status != 'analysing': db.session.refresh(corpus) socketio.sleep(3) - client = CQiClient('corpus_{}_analysis'.format(corpus_id)) + client = cqi.CQiClient('corpus_{}_analysis'.format(corpus_id)) try: client.connect() - except Exception: + except cqi.errors.CQiException: response = {'code': 500, 'msg': 'Internal Server Error'} socketio.emit('pj_corpus_analysis_init', response, room=session_id) return @@ -102,7 +102,7 @@ def pj_corpus_analysis_session_handler(app, corpus_id, user_id, session_id): ''' Teardown analysis session ''' try: client.disconnect() - except Exception: + except cqi.errors.CQiException: pass pj_corpus_analysis_clients.pop(session_id, None) pj_corpus_analysis_sessions[corpus_id].remove(session_id)