mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-02-22 13:40:36 +00:00
Update
This commit is contained in:
parent
2db9677659
commit
f7692102a5
@ -1,421 +1,28 @@
|
||||
from time import sleep
|
||||
from .. import errors, specification
|
||||
import socket
|
||||
import struct
|
||||
|
||||
|
||||
# ########################################################################### #
|
||||
# IMS CQi specification #
|
||||
# #
|
||||
# Version: 0.1a ;o) #
|
||||
# Author: Stefan Evert (evert@ims.uni-stuttgart.de) #
|
||||
# Modified by (codestyle): Patrick Jentsch (p.jentsch@uni-bielefeld.de) #
|
||||
# Modified date: Thurs Oct 10 #
|
||||
# ########################################################################### #
|
||||
""" 1. padding """
|
||||
PAD = 0x00
|
||||
|
||||
|
||||
""" 2. CQi responses """
|
||||
""" 2.1 CQI_STATUS_* """
|
||||
STATUS = 0x01
|
||||
STATUS_OK = 0x0101
|
||||
STATUS_CONNECT_OK = 0x0102
|
||||
STATUS_BYE_OK = 0x0103
|
||||
STATUS_PING_OK = 0x0104
|
||||
|
||||
""" 2.2 CQI_ERROR_* """
|
||||
ERROR = 0x02
|
||||
ERROR_GENERAL_ERROR = 0x0201
|
||||
ERROR_CONNECT_REFUSED = 0x0202
|
||||
ERROR_USER_ABORT = 0x0203
|
||||
ERROR_SYNTAX_ERROR = 0x0204
|
||||
# includes corpus/attribute/subcorpus specifier syntax
|
||||
|
||||
""" 2.3 CQI_DATA_* """
|
||||
DATA = 0x03
|
||||
DATA_BYTE = 0x0301
|
||||
DATA_BOOL = 0x0302
|
||||
DATA_INT = 0x0303
|
||||
DATA_STRING = 0x0304
|
||||
DATA_BYTE_LIST = 0x0305
|
||||
DATA_BOOL_LIST = 0x0306
|
||||
DATA_INT_LIST = 0x0307
|
||||
DATA_STRING_LIST = 0x0308
|
||||
DATA_INT_INT = 0x0309
|
||||
DATA_INT_INT_INT_INT = 0x030A
|
||||
DATA_INT_TABLE = 0x030B
|
||||
|
||||
""" 2.4 CQI_CL_ERROR_* """
|
||||
"""
|
||||
" NOTE: some CL error codes are not represented in the CQi specs
|
||||
" - usually because they're not used in the CL any more
|
||||
" - CDA_ENOSTRING is not considered an error (returns -1)
|
||||
" - CDA_EARGS: dynamic attribute calls not yet supported
|
||||
"""
|
||||
CL_ERROR = 0x04
|
||||
CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401
|
||||
# returned if CQi server couldn't open attribute
|
||||
CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402
|
||||
# CDA_EATTTYPE
|
||||
CL_ERROR_OUT_OF_RANGE = 0x0403
|
||||
# CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG
|
||||
CL_ERROR_REGEX = 0x0404
|
||||
# CDA_EPATTERN (not used), CDA_EBADREGEX
|
||||
CL_ERROR_CORPUS_ACCESS = 0x0405
|
||||
# CDA_ENODATA
|
||||
CL_ERROR_OUT_OF_MEMORY = 0x0406
|
||||
# CDA_ENOMEM
|
||||
# this means the CQi server has run out of memory;
|
||||
# try discarding some other corpora and/or subcorpora
|
||||
CL_ERROR_INTERNAL = 0x0407
|
||||
# CDA_EOTHER, CDA_ENYI
|
||||
# this is the classical 'please contact technical support' error
|
||||
|
||||
""" 2.5 CQI_CQP_ERROR_* """
|
||||
CQP_ERROR = 0x05
|
||||
# CQP error messages yet to be defined
|
||||
CQP_ERROR_GENERAL = 0x0501
|
||||
CQP_ERROR_NO_SUCH_CORPUS = 0x0502
|
||||
CQP_ERROR_INVALID_FIELD = 0x0503
|
||||
CQP_ERROR_OUT_OF_RANGE = 0x0504
|
||||
# various cases where a number is out of range
|
||||
|
||||
|
||||
""" 3. CQi commands """
|
||||
""" 3.1 CQI_CTRL_* """
|
||||
CTRL = 0x11
|
||||
CTRL_CONNECT = 0x1101
|
||||
# INPUT: (STRING username, STRING password)
|
||||
# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
|
||||
CTRL_BYE = 0x1102
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_STATUS_BYE_OK
|
||||
CTRL_USER_ABORT = 0x1103
|
||||
# INPUT: ()
|
||||
# OUTPUT:
|
||||
CTRL_PING = 0x1104
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_STATUS_PING_OK
|
||||
CTRL_LAST_GENERAL_ERROR = 0x1105
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_STRING
|
||||
# full-text error message for the last general error reported by the CQi server
|
||||
|
||||
""" 3.2 CQI_ASK_FEATURE_* """
|
||||
ASK_FEATURE = 0x12
|
||||
ASK_FEATURE_CQI_1_0 = 0x1201
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
ASK_FEATURE_CL_2_3 = 0x1202
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
ASK_FEATURE_CQP_2_3 = 0x1203
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
|
||||
""" 3.3 CQI_CORPUS_* """
|
||||
CORPUS = 0x13
|
||||
CORPUS_LIST_CORPORA = 0x1301
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CORPUS_CHARSET = 0x1303
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING
|
||||
CORPUS_PROPERTIES = 0x1304
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CORPUS_POSITIONAL_ATTRIBUTES = 0x1305
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CORPUS_STRUCTURAL_ATTRIBUTES = 0x1306
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES = 0x1307
|
||||
# INPUT: (STRING attribute)
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
CORPUS_ALIGNMENT_ATTRIBUTES = 0x1308
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CORPUS_FULL_NAME = 0x1309
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING
|
||||
# the full name of <corpus> as specified in its registry entry
|
||||
CORPUS_INFO = 0x130A
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns the contents of the .info file of <corpus> as a list of lines
|
||||
CORPUS_DROP_CORPUS = 0x130B
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# try to unload a corpus and all its attributes from memory
|
||||
|
||||
""" 3.4 CQI_CL_* """
|
||||
CL = 0x14
|
||||
# low-level corpus access (CL functions)
|
||||
CL_ATTRIBUTE_SIZE = 0x1401
|
||||
# INPUT: (STRING attribute)
|
||||
# OUTPUT: CQI_DATA_INT
|
||||
# returns the size of <attribute>:
|
||||
# - number of tokens (positional)
|
||||
# - number of regions (structural)
|
||||
# - number of alignments (alignment)
|
||||
CL_LEXICON_SIZE = 0x1402
|
||||
# INPUT: (STRING attribute)
|
||||
# OUTPUT: CQI_DATA_INT
|
||||
# returns the number of entries in the lexicon of a positional attribute;
|
||||
# valid lexicon IDs range from 0 .. (lexicon_size - 1)
|
||||
CL_DROP_ATTRIBUTE = 0x1403
|
||||
# INPUT: (STRING attribute)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# unload attribute from memory
|
||||
"""
|
||||
" NOTE: simple (scalar) mappings are applied to lists (the returned list has
|
||||
" exactly the same length as the list passed as an argument)
|
||||
"""
|
||||
CL_STR2ID = 0x1404
|
||||
# INPUT: (STRING attribute, STRING_LIST strings)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every string in <strings> that is not found in the lexicon
|
||||
CL_ID2STR = 0x1405
|
||||
# INPUT: (STRING attribute, INT_LIST id)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns "" for every ID in <id> that is out of range
|
||||
CL_ID2FREQ = 0x1406
|
||||
# INPUT: (STRING attribute, INT_LIST id)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns 0 for every ID in <id> that is out of range
|
||||
CL_CPOS2ID = 0x1407
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every corpus position in <cpos> that is out of range
|
||||
CL_CPOS2STR = 0x1408
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns "" for every corpus position in <cpos> that is out of range
|
||||
CL_CPOS2STRUC = 0x1409
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every corpus position not inside a structure region
|
||||
"""
|
||||
" NOTE: temporary addition for the Euralex2000 tutorial, but should probably be
|
||||
" included in CQi specs
|
||||
"""
|
||||
CL_CPOS2LBOUND = 0x1420
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns left boundary of s-attribute region enclosing cpos, -1 if not in
|
||||
# region
|
||||
CL_CPOS2RBOUND = 0x1421
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns right boundary of s-attribute region enclosing cpos, -1 if not in
|
||||
# region
|
||||
CL_CPOS2ALG = 0x140A
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every corpus position not inside an alignment
|
||||
CL_STRUC2STR = 0x140B
|
||||
# INPUT: (STRING attribute, INT_LIST strucs)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns annotated string values of structure regions in <strucs>; "" if out
|
||||
# of range
|
||||
# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
|
||||
"""
|
||||
" NOTE: the following mappings take a single argument and return multiple
|
||||
" values, including lists of arbitrary size
|
||||
"""
|
||||
CL_ID2CPOS = 0x140C
|
||||
# INPUT: (STRING attribute, INT id)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns all corpus positions where the given token occurs
|
||||
CL_IDLIST2CPOS = 0x140D
|
||||
# INPUT: (STRING attribute, INT_LIST id_list)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns all corpus positions where one of the tokens in <id_list>
|
||||
# occurs; the returned list is sorted as a whole, not per token id
|
||||
CL_REGEX2ID = 0x140E
|
||||
# INPUT: (STRING attribute, STRING regex)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns lexicon IDs of all tokens that match <regex>; the returned
|
||||
# list may be empty (size 0);
|
||||
CL_STRUC2CPOS = 0x140F
|
||||
# INPUT: (STRING attribute, INT struc)
|
||||
# OUTPUT: CQI_DATA_INT_INT
|
||||
# returns start and end corpus positions of structure region <struc>
|
||||
CL_ALG2CPOS = 0x1410
|
||||
# INPUT: (STRING attribute, INT alg)
|
||||
# OUTPUT: CQI_DATA_INT_INT_INT_INT
|
||||
# returns (src_start, src_end, target_start, target_end)
|
||||
|
||||
""" 3.5 CQI_CQP_* """
|
||||
CQP = 0x15
|
||||
CQP_QUERY = 0x1501
|
||||
# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# <query> must include the ';' character terminating the query.
|
||||
CQP_LIST_SUBCORPORA = 0x1502
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CQP_SUBCORPUS_SIZE = 0x1503
|
||||
# INPUT: (STRING subcorpus)
|
||||
# OUTPUT: CQI_DATA_INT
|
||||
CQP_SUBCORPUS_HAS_FIELD = 0x1504
|
||||
# INPUT: (STRING subcorpus, BYTE field)
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
CQP_DUMP_SUBCORPUS = 0x1505
|
||||
# INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# Dump the values of <field> for match ranges <first> .. <last> in <subcorpus>.
|
||||
# <field> is one of the CQI_CONST_FIELD_* constants.
|
||||
CQP_DROP_SUBCORPUS = 0x1509
|
||||
# INPUT: (STRING subcorpus)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# delete a subcorpus from memory
|
||||
"""
|
||||
" NOTE: The following two functions are temporarily included for the Euralex
|
||||
" 2000 tutorial demo
|
||||
"""
|
||||
""" NOTE: frequency distribution of single tokens """
|
||||
CQP_FDIST_1 = 0x1510
|
||||
# INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
|
||||
# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
|
||||
# CQI_CONST_FIELD_KEYWORD
|
||||
# NB: pairs are sorted by frequency desc.
|
||||
""" NOTE: frequency distribution of pairs of tokens """
|
||||
CQP_FDIST_2 = 0x1511
|
||||
# INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
|
||||
# BYTE field2, STRING attribute2)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns <n> (id1, id2, frequency) pairs flattened into a list of size 3*<n>
|
||||
# NB: triples are sorted by frequency desc.
|
||||
|
||||
|
||||
""" 4. Constant Definitions """
|
||||
CONST_FALSE = 0x00
|
||||
CONST_NO = 0x00
|
||||
CONST_TRUE = 0x01
|
||||
CONST_YES = 0x01
|
||||
"""
|
||||
" NOTE: The following constants specify which field will be returned by
|
||||
" CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands.
|
||||
"""
|
||||
CONST_FIELD_MATCH = 0x10
|
||||
CONST_FIELD_MATCHEND = 0x11
|
||||
"""
|
||||
" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the
|
||||
" numerical values 0 .. 9, so clients do not need to look up the constant
|
||||
" values if they're handling arbitrary targets.
|
||||
"""
|
||||
CONST_FIELD_TARGET_0 = 0x00
|
||||
CONST_FIELD_TARGET_1 = 0x01
|
||||
CONST_FIELD_TARGET_2 = 0x02
|
||||
CONST_FIELD_TARGET_3 = 0x03
|
||||
CONST_FIELD_TARGET_4 = 0x04
|
||||
CONST_FIELD_TARGET_5 = 0x05
|
||||
CONST_FIELD_TARGET_6 = 0x06
|
||||
CONST_FIELD_TARGET_7 = 0x07
|
||||
CONST_FIELD_TARGET_8 = 0x08
|
||||
CONST_FIELD_TARGET_9 = 0x09
|
||||
"""
|
||||
" NOTE: The following constants are provided for backward compatibility with
|
||||
" traditional CQP field names & while the generalised target concept
|
||||
" isn't yet implemented in the CQPserver.
|
||||
"""
|
||||
CONST_FIELD_TARGET = 0x00
|
||||
CONST_FIELD_KEYWORD = 0x09
|
||||
""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """
|
||||
MAJOR_VERSION = 0x00
|
||||
MINOR_VERSION = 0x01
|
||||
|
||||
|
||||
""" 5. CQi lookup dictionary. """
|
||||
lookup = {
|
||||
257: 'CQI_STATUS_OK',
|
||||
258: 'CQI_STATUS_CONNECT_OK',
|
||||
259: 'CQI_STATUS_BYE_OK',
|
||||
260: 'CQI_STATUS_PING_OK',
|
||||
513: 'CQI_ERROR_GENERAL_ERROR',
|
||||
514: 'CQI_ERROR_CONNECT_REFUSED',
|
||||
515: 'CQI_ERROR_USER_ABORT',
|
||||
516: 'CQI_ERROR_SYNTAX_ERROR',
|
||||
769: 'CQI_DATA_BYTE',
|
||||
770: 'CQI_DATA_BOOL',
|
||||
771: 'CQI_DATA_INT',
|
||||
772: 'CQI_DATA_STRING',
|
||||
773: 'CQI_DATA_BYTE_LIST',
|
||||
774: 'CQI_DATA_BOOL_LIST',
|
||||
775: 'CQI_DATA_INT_LIST',
|
||||
776: 'CQI_DATA_STRING_LIST',
|
||||
777: 'CQI_DATA_INT_INT',
|
||||
778: 'CQI_DATA_INT_INT_INT_INT',
|
||||
779: 'CQI_DATA_INT_TABLE',
|
||||
1025: 'CQI_CL_ERROR_NO_SUCH_ATTRIBUTE',
|
||||
1026: 'CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE',
|
||||
1027: 'CQI_CL_ERROR_OUT_OF_RANGE',
|
||||
1028: 'CQI_CL_ERROR_REGEX',
|
||||
1029: 'CQI_CL_ERROR_CORPUS_ACCESS',
|
||||
1030: 'CQI_CL_ERROR_OUT_OF_MEMORY',
|
||||
1031: 'CQI_CL_ERROR_INTERNAL',
|
||||
1281: 'CQI_CQP_ERROR_GENERAL',
|
||||
1282: 'CQI_CQP_ERROR_NO_SUCH_CORPUS',
|
||||
1283: 'CQI_CQP_ERROR_INVALID_FIELD',
|
||||
1284: 'CQI_CQP_ERROR_OUT_OF_RANGE',
|
||||
4353: 'CQI_CTRL_CONNECT',
|
||||
4354: 'CQI_CTRL_BYE',
|
||||
4355: 'CQI_CTRL_USER_ABORT',
|
||||
4356: 'CQI_CTRL_PING',
|
||||
4357: 'CQI_CTRL_LAST_GENERAL_ERROR',
|
||||
4609: 'CQI_ASK_FEATURE_CQI_1_0',
|
||||
4610: 'CQI_ASK_FEATURE_CL_2_3',
|
||||
4611: 'CQI_ASK_FEATURE_CQP_2_3',
|
||||
4865: 'CQI_CORPUS_LIST_CORPORA',
|
||||
4867: 'CQI_CORPUS_CHARSET',
|
||||
4868: 'CQI_CORPUS_PROPERTIES',
|
||||
4869: 'CQI_CORPUS_POSITIONAL_ATTRIBUTES',
|
||||
4870: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTES',
|
||||
4871: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES',
|
||||
4872: 'CQI_CORPUS_ALIGNMENT_ATTRIBUTES',
|
||||
4873: 'CQI_CORPUS_FULL_NAME',
|
||||
4874: 'CQI_CORPUS_INFO',
|
||||
4875: 'CQI_CORPUS_DROP_CORPUS',
|
||||
5121: 'CQI_CL_ATTRIBUTE_SIZE',
|
||||
5122: 'CQI_CL_LEXICON_SIZE',
|
||||
5123: 'CQI_CL_DROP_ATTRIBUTE',
|
||||
5124: 'CQI_CL_STR2ID',
|
||||
5125: 'CQI_CL_ID2STR',
|
||||
5126: 'CQI_CL_ID2FREQ',
|
||||
5127: 'CQI_CL_CPOS2ID',
|
||||
5128: 'CQI_CL_CPOS2STR',
|
||||
5129: 'CQI_CL_CPOS2STRUC',
|
||||
5130: 'CQI_CL_CPOS2ALG',
|
||||
5131: 'CQI_CL_STRUC2STR',
|
||||
5132: 'CQI_CL_ID2CPOS',
|
||||
5133: 'CQI_CL_IDLIST2CPOS',
|
||||
5134: 'CQI_CL_REGEX2ID',
|
||||
5135: 'CQI_CL_STRUC2CPOS',
|
||||
5136: 'CQI_CL_ALG2CPOS',
|
||||
5152: 'CQI_CL_CPOS2LBOUND',
|
||||
5153: 'CQI_CL_CPOS2RBOUND',
|
||||
5377: 'CQI_CQP_QUERY',
|
||||
5378: 'CQI_CQP_LIST_SUBCORPORA',
|
||||
5379: 'CQI_CQP_SUBCORPUS_SIZE',
|
||||
5380: 'CQI_CQP_SUBCORPUS_HAS_FIELD',
|
||||
5381: 'CQI_CQP_DUMP_SUBCORPUS',
|
||||
5385: 'CQI_CQP_DROP_SUBCORPUS',
|
||||
5392: 'CQI_CQP_FDIST_1',
|
||||
5393: 'CQI_CQP_FDIST_2'
|
||||
}
|
||||
|
||||
|
||||
# ########################################################################### #
|
||||
# IMS CQi client #
|
||||
# #
|
||||
# Version: 0.1a #
|
||||
# Author: Patrick Jentsch (p.jentsch@uni-bielefeld.de) #
|
||||
# ########################################################################### #
|
||||
class APIClient:
|
||||
"""
|
||||
A low-level client for the IMS Open Corpus Workbench (CWB) corpus query
|
||||
interface (CQi) API.
|
||||
|
||||
Example:
|
||||
>>> import cqi
|
||||
>>> client = cqi.APIClient('127.0.0.1')
|
||||
>>> client.ctrl_connect('user', 'password')
|
||||
{'code': 258, 'msg': 'CQI_STATUS_CONNECT_OK'}
|
||||
>>> client.ctrl_bye()
|
||||
{'code': 259, 'msg': 'CQI_STATUS_BYE_OK'}
|
||||
|
||||
Args:
|
||||
host (str): URL to the CQP server. For example,
|
||||
``cqpserver.localhost`` or ``127.0.0.1``.
|
||||
port (int): Port the CQP server listens on. Default: ``4877``
|
||||
"""
|
||||
|
||||
def __init__(self, host, port=4877):
|
||||
self.host = host
|
||||
self.port = port
|
||||
@ -425,7 +32,7 @@ class APIClient:
|
||||
self.socket.connect((self.host, self.port))
|
||||
# INPUT: (STRING username, STRING password)
|
||||
# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
|
||||
self.__send_WORD(CTRL_CONNECT)
|
||||
self.__send_WORD(specification.CTRL_CONNECT)
|
||||
self.__send_STRING(username)
|
||||
self.__send_STRING(password)
|
||||
return self.__recv_response()
|
||||
@ -433,7 +40,7 @@ class APIClient:
|
||||
def ctrl_bye(self):
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_STATUS_BYE_OK
|
||||
self.__send_WORD(CTRL_BYE)
|
||||
self.__send_WORD(specification.CTRL_BYE)
|
||||
response = self.__recv_response()
|
||||
self.socket.close()
|
||||
return response
|
||||
@ -441,12 +48,12 @@ class APIClient:
|
||||
def ctrl_user_abort(self):
|
||||
# INPUT: ()
|
||||
# OUTPUT:
|
||||
self.__send_WORD(CTRL_USER_ABORT)
|
||||
self.__send_WORD(specification.CTRL_USER_ABORT)
|
||||
|
||||
def ctrl_ping(self):
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_STATUS_PING_OK
|
||||
self.__send_WORD(CTRL_PING)
|
||||
self.__send_WORD(specification.CTRL_PING)
|
||||
return self.__recv_response()
|
||||
|
||||
def ctrl_last_general_error(self):
|
||||
@ -454,72 +61,72 @@ class APIClient:
|
||||
# OUTPUT: CQI_DATA_STRING
|
||||
# full-text error message for the last general error reported by the
|
||||
# CQi server
|
||||
self.__send_WORD(CTRL_LAST_GENERAL_ERROR)
|
||||
self.__send_WORD(specification.CTRL_LAST_GENERAL_ERROR)
|
||||
return self.__recv_response()
|
||||
|
||||
def ask_feature_cqi_1_0(self):
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
self.__send_WORD(ASK_FEATURE_CQI_1_0)
|
||||
self.__send_WORD(specification.ASK_FEATURE_CQI_1_0)
|
||||
return self.__recv_response()
|
||||
|
||||
def ask_feature_cl_2_3(self):
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
self.__send_WORD(ASK_FEATURE_CL_2_3)
|
||||
self.__send_WORD(specification.ASK_FEATURE_CL_2_3)
|
||||
return self.__recv_response()
|
||||
|
||||
def ask_feature_cqp_2_3(self):
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
self.__send_WORD(ASK_FEATURE_CL_2_3)
|
||||
self.__send_WORD(specification.ASK_FEATURE_CL_2_3)
|
||||
return self.__recv_response()
|
||||
|
||||
def corpus_list_coprora(self):
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
self.__send_WORD(CORPUS_LIST_CORPORA)
|
||||
self.__send_WORD(specification.CORPUS_LIST_CORPORA)
|
||||
return self.__recv_response()
|
||||
|
||||
def corpus_charset(self, corpus):
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING
|
||||
self.__send_WORD(CORPUS_CHARSET)
|
||||
self.__send_WORD(specification.CORPUS_CHARSET)
|
||||
self.__send_STRING(corpus)
|
||||
return self.__recv_response()
|
||||
|
||||
def corpus_properties(self, corpus):
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
self.__send_WORD(CORPUS_PROPERTIES)
|
||||
self.__send_WORD(specification.CORPUS_PROPERTIES)
|
||||
self.__send_STRING(corpus)
|
||||
return self.__recv_response()
|
||||
|
||||
def corpus_positional_attributes(self, corpus):
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
self.__send_WORD(CORPUS_POSITIONAL_ATTRIBUTES)
|
||||
self.__send_WORD(specification.CORPUS_POSITIONAL_ATTRIBUTES)
|
||||
self.__send_STRING(corpus)
|
||||
return self.__recv_response()
|
||||
|
||||
def corpus_structural_attributes(self, corpus):
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTES)
|
||||
self.__send_WORD(specification.CORPUS_STRUCTURAL_ATTRIBUTES)
|
||||
self.__send_STRING(corpus)
|
||||
return self.__recv_response()
|
||||
|
||||
def corpus_structural_attribute_has_values(self, attribute):
|
||||
# INPUT: (STRING attribute)
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
|
||||
self.__send_WORD(specification.CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
|
||||
self.__send_STRING(attribute)
|
||||
return self.__recv_response()
|
||||
|
||||
def corpus_alignment_attributes(self, corpus):
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
self.__send_WORD(CORPUS_ALIGNMENT_ATTRIBUTES)
|
||||
self.__send_WORD(specification.CORPUS_ALIGNMENT_ATTRIBUTES)
|
||||
self.__send_STRING(corpus)
|
||||
return self.__recv_response()
|
||||
|
||||
@ -527,7 +134,7 @@ class APIClient:
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING
|
||||
# the full name of <corpus> as specified in its registry entry
|
||||
self.__send_WORD(CORPUS_FULL_NAME)
|
||||
self.__send_WORD(specification.CORPUS_FULL_NAME)
|
||||
self.__send_STRING(corpus)
|
||||
return self.__recv_response()
|
||||
|
||||
@ -535,7 +142,7 @@ class APIClient:
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns the contents of the .info file of <corpus> as a list of lines
|
||||
self.__send_WORD(CORPUS_INFO)
|
||||
self.__send_WORD(specification.CORPUS_INFO)
|
||||
self.__send_STRING(corpus)
|
||||
return self.__recv_response()
|
||||
|
||||
@ -543,7 +150,7 @@ class APIClient:
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# try to unload a corpus and all its attributes from memory
|
||||
self.__send_WORD(CORPUS_DROP_CORPUS)
|
||||
self.__send_WORD(specification.CORPUS_DROP_CORPUS)
|
||||
self.__send_STRING(corpus)
|
||||
return self.__recv_response()
|
||||
|
||||
@ -554,7 +161,7 @@ class APIClient:
|
||||
# number of tokens (positional)
|
||||
# number of regions (structural)
|
||||
# number of alignments (alignment)
|
||||
self.__send_WORD(CL_ATTRIBUTE_SIZE)
|
||||
self.__send_WORD(specification.CL_ATTRIBUTE_SIZE)
|
||||
self.__send_STRING(attribute)
|
||||
return self.__recv_response()
|
||||
|
||||
@ -564,7 +171,7 @@ class APIClient:
|
||||
# returns the number of entries in the lexicon of a positional
|
||||
# attribute;
|
||||
# valid lexicon IDs range from 0 .. (lexicon_size - 1)
|
||||
self.__send_WORD(CL_LEXICON_SIZE)
|
||||
self.__send_WORD(specification.CL_LEXICON_SIZE)
|
||||
self.__send_STRING(attribute)
|
||||
return self.__recv_response()
|
||||
|
||||
@ -572,7 +179,7 @@ class APIClient:
|
||||
# INPUT: (STRING attribute)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# unload attribute from memory
|
||||
self.__send_WORD(CL_DROP_ATTRIBUTE)
|
||||
self.__send_WORD(specification.CL_DROP_ATTRIBUTE)
|
||||
self.__send_STRING(attribute)
|
||||
return self.__recv_response()
|
||||
|
||||
@ -586,7 +193,7 @@ class APIClient:
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every string in <strings> that is not found in the
|
||||
# lexicon
|
||||
self.__send_WORD(CL_STR2ID)
|
||||
self.__send_WORD(specification.CL_STR2ID)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_STRING_LIST(strings)
|
||||
return self.__recv_response()
|
||||
@ -595,7 +202,7 @@ class APIClient:
|
||||
# INPUT: (STRING attribute, INT_LIST id)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns "" for every ID in <id> that is out of range
|
||||
self.__send_WORD(CL_ID2STR)
|
||||
self.__send_WORD(specification.CL_ID2STR)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT_LIST(id)
|
||||
return self.__recv_response()
|
||||
@ -604,7 +211,7 @@ class APIClient:
|
||||
# INPUT: (STRING attribute, INT_LIST id)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns 0 for every ID in <id> that is out of range
|
||||
self.__send_WORD(CL_ID2FREQ)
|
||||
self.__send_WORD(specification.CL_ID2FREQ)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT_LIST(id)
|
||||
return self.__recv_response()
|
||||
@ -613,7 +220,7 @@ class APIClient:
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every corpus position in <cpos> that is out of range
|
||||
self.__send_WORD(CL_ID2FREQ)
|
||||
self.__send_WORD(specification.CL_ID2FREQ)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT_LIST(cpos)
|
||||
return self.__recv_response()
|
||||
@ -622,7 +229,7 @@ class APIClient:
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns "" for every corpus position in <cpos> that is out of range
|
||||
self.__send_WORD(CL_CPOS2STR)
|
||||
self.__send_WORD(specification.CL_CPOS2STR)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT_LIST(cpos)
|
||||
return self.__recv_response()
|
||||
@ -631,7 +238,7 @@ class APIClient:
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every corpus position not inside a structure region
|
||||
self.__send_WORD(CL_CPOS2STRUC)
|
||||
self.__send_WORD(specification.CL_CPOS2STRUC)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT_LIST(cpos)
|
||||
return self.__recv_response()
|
||||
@ -646,7 +253,7 @@ class APIClient:
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns left boundary of s-attribute region enclosing cpos, -1 if not
|
||||
# in region
|
||||
self.__send_WORD(CL_CPOS2LBOUND)
|
||||
self.__send_WORD(specification.CL_CPOS2LBOUND)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT_LIST(cpos)
|
||||
return self.__recv_response()
|
||||
@ -656,7 +263,7 @@ class APIClient:
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns right boundary of s-attribute region enclosing cpos, -1 if
|
||||
# not in region
|
||||
self.__send_WORD(CL_CPOS2RBOUND)
|
||||
self.__send_WORD(specification.CL_CPOS2RBOUND)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT_LIST(cpos)
|
||||
return self.__recv_response()
|
||||
@ -665,7 +272,7 @@ class APIClient:
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every corpus position not inside an alignment
|
||||
self.__send_WORD(CL_CPOS2ALG)
|
||||
self.__send_WORD(specification.CL_CPOS2ALG)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT_LIST(cpos)
|
||||
return self.__recv_response()
|
||||
@ -676,7 +283,7 @@ class APIClient:
|
||||
# returns annotated string values of structure regions in <strucs>; ""
|
||||
# if out of range
|
||||
# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
|
||||
self.__send_WORD(CL_STRUC2STR)
|
||||
self.__send_WORD(specification.CL_STRUC2STR)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT_LIST(strucs)
|
||||
return self.__recv_response()
|
||||
@ -690,7 +297,7 @@ class APIClient:
|
||||
# INPUT: (STRING attribute, INT id)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns all corpus positions where the given token occurs
|
||||
self.__send_WORD(CL_ID2CPOS)
|
||||
self.__send_WORD(specification.CL_ID2CPOS)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT(id)
|
||||
return self.__recv_response()
|
||||
@ -700,7 +307,7 @@ class APIClient:
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns all corpus positions where one of the tokens in <id_list>
|
||||
# occurs; the returned list is sorted as a whole, not per token id
|
||||
self.__send_WORD(CL_IDLIST2CPOS)
|
||||
self.__send_WORD(specification.CL_IDLIST2CPOS)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT_LIST(id_list)
|
||||
return self.__recv_response()
|
||||
@ -710,7 +317,7 @@ class APIClient:
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns lexicon IDs of all tokens that match <regex>; the returned
|
||||
# list may be empty (size 0);
|
||||
self.__send_WORD(CL_REGEX2ID)
|
||||
self.__send_WORD(specification.CL_REGEX2ID)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_STRING(regex)
|
||||
return self.__recv_response()
|
||||
@ -719,7 +326,7 @@ class APIClient:
|
||||
# INPUT: (STRING attribute, INT struc)
|
||||
# OUTPUT: CQI_DATA_INT_INT
|
||||
# returns start and end corpus positions of structure region <struc>
|
||||
self.__send_WORD(CL_STRUC2CPOS)
|
||||
self.__send_WORD(specification.CL_STRUC2CPOS)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT(struc)
|
||||
return self.__recv_response()
|
||||
@ -728,7 +335,7 @@ class APIClient:
|
||||
# INPUT: (STRING attribute, INT alg)
|
||||
# OUTPUT: CQI_DATA_INT_INT_INT_INT
|
||||
# returns (src_start, src_end, target_start, target_end)
|
||||
self.__send_WORD(CL_ALG2CPOS)
|
||||
self.__send_WORD(specification.CL_ALG2CPOS)
|
||||
self.__send_STRING(attribute)
|
||||
self.__send_INT(alg)
|
||||
return self.__recv_response()
|
||||
@ -737,7 +344,7 @@ class APIClient:
|
||||
# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# <query> must include the ';' character terminating the query.
|
||||
self.__send_WORD(CQP_QUERY)
|
||||
self.__send_WORD(specification.CQP_QUERY)
|
||||
self.__send_STRING(mother_corpus)
|
||||
self.__send_STRING(subcorpus_name)
|
||||
self.__send_STRING(query)
|
||||
@ -746,21 +353,21 @@ class APIClient:
|
||||
def cqp_list_subcorpora(self, corpus):
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
self.__send_WORD(CQP_LIST_SUBCORPORA)
|
||||
self.__send_WORD(specification.CQP_LIST_SUBCORPORA)
|
||||
self.__send_STRING(corpus)
|
||||
return self.__recv_response()
|
||||
|
||||
def cqp_subcorpus_size(self, subcorpus):
|
||||
# INPUT: (STRING subcorpus)
|
||||
# OUTPUT: CQI_DATA_INT
|
||||
self.__send_WORD(CQP_SUBCORPUS_SIZE)
|
||||
self.__send_WORD(specification.CQP_SUBCORPUS_SIZE)
|
||||
self.__send_STRING(subcorpus)
|
||||
return self.__recv_response()
|
||||
|
||||
def cqp_subcorpus_has_field(self, subcorpus, field):
|
||||
# INPUT: (STRING subcorpus, BYTE field)
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
self.__send_WORD(CQP_SUBCORPUS_HAS_FIELD)
|
||||
self.__send_WORD(specification.CQP_SUBCORPUS_HAS_FIELD)
|
||||
self.__send_STRING(subcorpus)
|
||||
self.__send_BYTE(field)
|
||||
return self.__recv_response()
|
||||
@ -770,7 +377,7 @@ class APIClient:
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# Dump the values of <field> for match ranges <first> .. <last>
|
||||
# in <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants.
|
||||
self.__send_WORD(CQP_DUMP_SUBCORPUS)
|
||||
self.__send_WORD(specification.CQP_DUMP_SUBCORPUS)
|
||||
self.__send_STRING(subcorpus)
|
||||
self.__send_BYTE(field)
|
||||
self.__send_INT(first)
|
||||
@ -781,7 +388,7 @@ class APIClient:
|
||||
# INPUT: (STRING subcorpus)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# delete a subcorpus from memory
|
||||
self.__send_WORD(CQP_DROP_SUBCORPUS)
|
||||
self.__send_WORD(specification.CQP_DROP_SUBCORPUS)
|
||||
self.__send_STRING(subcorpus)
|
||||
return self.__recv_response()
|
||||
|
||||
@ -798,7 +405,7 @@ class APIClient:
|
||||
# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
|
||||
# CQI_CONST_FIELD_KEYWORD
|
||||
# NB: pairs are sorted by frequency desc.
|
||||
self.__send_WORD(CQP_FDIST_1)
|
||||
self.__send_WORD(specification.CQP_FDIST_1)
|
||||
self.__send_STRING(subcorpus)
|
||||
self.__send_INT(cutoff)
|
||||
self.__send_BYTE(field)
|
||||
@ -814,7 +421,7 @@ class APIClient:
|
||||
# returns <n> (id1, id2, frequency) pairs flattened into a list of size
|
||||
# 3*<n>
|
||||
# NB: triples are sorted by frequency desc.
|
||||
self.__send_WORD(CQP_FDIST_2)
|
||||
self.__send_WORD(specification.CQP_FDIST_2)
|
||||
self.__send_STRING(subcorpus)
|
||||
self.__send_INT(cutoff)
|
||||
self.__send_BYTE(field1)
|
||||
@ -826,46 +433,84 @@ class APIClient:
|
||||
def __recv_response(self):
|
||||
byte_data = self.__recv_WORD()
|
||||
response_type = byte_data >> 8
|
||||
if response_type == CL_ERROR:
|
||||
raise Exception(lookup[byte_data])
|
||||
elif response_type == CQP_ERROR:
|
||||
raise Exception(lookup[byte_data])
|
||||
elif response_type == DATA:
|
||||
if response_type == specification.CL_ERROR:
|
||||
raise self.__create_cl_error(byte_data)
|
||||
elif response_type == specification.CQP_ERROR:
|
||||
raise self.__create_cqp_error(byte_data)
|
||||
elif response_type == specification.DATA:
|
||||
return self.__recv_DATA(byte_data)
|
||||
elif response_type == ERROR:
|
||||
raise Exception(lookup[byte_data])
|
||||
elif response_type == STATUS:
|
||||
return byte_data
|
||||
elif response_type == specification.ERROR:
|
||||
raise self.__create_error(byte_data)
|
||||
elif response_type == specification.STATUS:
|
||||
return {'code': byte_data, 'msg': specification.lookup[byte_data]}
|
||||
else:
|
||||
raise Exception(
|
||||
'Unknown response type: {}'.format(hex(response_type))
|
||||
)
|
||||
raise Exception('Unknown response type: {}'.format(response_type))
|
||||
|
||||
def __create_cl_error(self, error_type):
|
||||
if error_type == specification.CL_ERROR_NO_SUCH_ATTRIBUTE:
|
||||
return errors.CLErrorNoSuchAttribute()
|
||||
elif error_type == specification.CL_ERROR_WRONG_ATTRIBUTE_TYPE:
|
||||
return errors.CLErrorWrongAttributeType()
|
||||
elif error_type == specification.CL_ERROR_OUT_OF_RANGE:
|
||||
return errors.CLErrorOutOfRange()
|
||||
elif error_type == specification.CL_ERROR_REGEX:
|
||||
return errors.CLErrorRegex()
|
||||
elif error_type == specification.CL_ERROR_CORPUS_ACCESS:
|
||||
return errors.CLErrorCorpusAccess()
|
||||
elif error_type == specification.CL_ERROR_OUT_OF_MEMORY:
|
||||
return errors.CLErrorOutOfMemory()
|
||||
elif error_type == specification.CL_ERROR_INTERNAL:
|
||||
return errors.CLErrorInternal()
|
||||
else:
|
||||
return errors.CLError(error_type)
|
||||
|
||||
def __create_cqp_error(self, error_type):
|
||||
if error_type == specification.CQP_ERROR_GENERAL:
|
||||
return errors.CQPErrorGeneral()
|
||||
elif error_type == specification.CQP_ERROR_INVALID_FIELD:
|
||||
return errors.CQPErrorInvalidField()
|
||||
elif error_type == specification.CQP_ERROR_OUT_OF_RANGE:
|
||||
return errors.CQPErrorOutOfRange()
|
||||
else:
|
||||
return errors.CQPError(error_type)
|
||||
|
||||
def __create_error(self, error_type):
|
||||
if error_type == specification.ERROR_GENERAL_ERROR:
|
||||
return errors.ErrorGeneralError()
|
||||
elif error_type == specification.ERROR_CONNECT_REFUSED:
|
||||
return errors.ErrorConnectRefused()
|
||||
elif error_type == specification.ERROR_USER_ABORT:
|
||||
return errors.ErrorUserAbort()
|
||||
elif error_type == specification.ERROR_SYNTAX_ERROR:
|
||||
return errors.ErrorSyntaxError()
|
||||
else:
|
||||
return errors.Error(error_type)
|
||||
|
||||
def __recv_DATA(self, data_type):
|
||||
if data_type == DATA_BYTE:
|
||||
if data_type == specification.DATA_BYTE:
|
||||
data = self.__recv_DATA_BYTE()
|
||||
elif data_type == DATA_BOOL:
|
||||
elif data_type == specification.DATA_BOOL:
|
||||
data = self.__recv_DATA_BOOL()
|
||||
elif data_type == DATA_INT:
|
||||
elif data_type == specification.DATA_INT:
|
||||
data = self.__recv_DATA_INT()
|
||||
elif data_type == DATA_STRING:
|
||||
elif data_type == specification.DATA_STRING:
|
||||
data = self.__recv_DATA_STRING()
|
||||
elif data_type == DATA_BYTE_LIST:
|
||||
elif data_type == specification.DATA_BYTE_LIST:
|
||||
data = self.__recv_DATA_BYTE_LIST()
|
||||
elif data_type == DATA_BOOL_LIST:
|
||||
elif data_type == specification.DATA_BOOL_LIST:
|
||||
data = self.__recv_DATA_BOOL_LIST()
|
||||
elif data_type == DATA_INT_LIST:
|
||||
elif data_type == specification.DATA_INT_LIST:
|
||||
data = self.__recv_DATA_INT_LIST()
|
||||
elif data_type == DATA_STRING_LIST:
|
||||
elif data_type == specification.DATA_STRING_LIST:
|
||||
data = self.__recv_DATA_STRING_LIST()
|
||||
elif data_type == DATA_INT_INT:
|
||||
elif data_type == specification.DATA_INT_INT:
|
||||
data = self.__recv_DATA_INT_INT()
|
||||
elif data_type == DATA_INT_INT_INT_INT:
|
||||
elif data_type == specification.DATA_INT_INT_INT_INT:
|
||||
data = self.__recv_DATA_INT_INT_INT_INT()
|
||||
elif data_type == DATA_INT_TABLE:
|
||||
elif data_type == specification.DATA_INT_TABLE:
|
||||
data = self.__recv_DATA_INT_TABLE()
|
||||
else:
|
||||
raise Exception('Unknown data type: {}'.format(hex(data_type)))
|
||||
raise Exception('Unknown data type: {}'.format(data_type))
|
||||
return data
|
||||
|
||||
def __recv_DATA_BYTE(self):
|
||||
|
@ -7,9 +7,10 @@ class CQiClient:
|
||||
self.api = APIClient(host, port=port)
|
||||
|
||||
def connect(self, username='anonymous', password=''):
|
||||
self.api.ctrl_connect(username, password)
|
||||
status = self.api.ctrl_connect(username, password)
|
||||
self.corpora = CorpusCollection(self)
|
||||
return status
|
||||
|
||||
def disconnect(self):
|
||||
del self.corpora
|
||||
self.api.ctrl_bye()
|
||||
return self.api.ctrl_bye()
|
||||
|
@ -1,36 +0,0 @@
|
||||
""" 4. Constant Definitions """
|
||||
CONST_FALSE = 0x00
|
||||
CONST_NO = 0x00
|
||||
CONST_TRUE = 0x01
|
||||
CONST_YES = 0x01
|
||||
"""
|
||||
" NOTE: The following constants specify which field will be returned by
|
||||
" CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands.
|
||||
"""
|
||||
CONST_FIELD_MATCH = 0x10
|
||||
CONST_FIELD_MATCHEND = 0x11
|
||||
"""
|
||||
" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the
|
||||
" numerical values 0 .. 9, so clients do not need to look up the constant
|
||||
" values if they're handling arbitrary targets.
|
||||
"""
|
||||
CONST_FIELD_TARGET_0 = 0x00
|
||||
CONST_FIELD_TARGET_1 = 0x01
|
||||
CONST_FIELD_TARGET_2 = 0x02
|
||||
CONST_FIELD_TARGET_3 = 0x03
|
||||
CONST_FIELD_TARGET_4 = 0x04
|
||||
CONST_FIELD_TARGET_5 = 0x05
|
||||
CONST_FIELD_TARGET_6 = 0x06
|
||||
CONST_FIELD_TARGET_7 = 0x07
|
||||
CONST_FIELD_TARGET_8 = 0x08
|
||||
CONST_FIELD_TARGET_9 = 0x09
|
||||
"""
|
||||
" NOTE: The following constants are provided for backward compatibility with
|
||||
" traditional CQP field names & while the generalised target concept
|
||||
" isn't yet implemented in the CQPserver.
|
||||
"""
|
||||
CONST_FIELD_TARGET = 0x00
|
||||
CONST_FIELD_KEYWORD = 0x09
|
||||
""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """
|
||||
MAJOR_VERSION = 0x00
|
||||
MINOR_VERSION = 0x01
|
104
app/corpora/cqi/errors.py
Normal file
104
app/corpora/cqi/errors.py
Normal file
@ -0,0 +1,104 @@
|
||||
class CQiException(Exception):
|
||||
"""
|
||||
A base class from which all other exceptions inherit.
|
||||
If you want to catch all errors that the CQi package might raise,
|
||||
catch this base exception.
|
||||
"""
|
||||
|
||||
|
||||
class Error(CQiException):
|
||||
# ERROR = 0x02
|
||||
pass
|
||||
|
||||
|
||||
class ErrorGeneralError(Error):
|
||||
# ERROR_GENERAL_ERROR = 0x0201
|
||||
pass
|
||||
|
||||
|
||||
class ErrorConnectRefused(Error):
|
||||
# ERROR_CONNECT_REFUSED = 0x0202
|
||||
pass
|
||||
|
||||
|
||||
class ErrorUserAbort(Error):
|
||||
# ERROR_USER_ABORT = 0x0203
|
||||
pass
|
||||
|
||||
|
||||
class ErrorSyntaxError(Error):
|
||||
# ERROR_SYNTAX_ERROR = 0x0204
|
||||
pass
|
||||
|
||||
|
||||
class CLError(CQiException):
|
||||
# CL_ERROR = 0x04
|
||||
pass
|
||||
|
||||
|
||||
class CLErrorNoSuchAttribute(CLError):
|
||||
# CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401
|
||||
# returned if CQi server couldn't open attribute
|
||||
pass
|
||||
|
||||
|
||||
class CLErrorWrongAttributeType(CLError):
|
||||
# CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402
|
||||
# CDA_EATTTYPE
|
||||
pass
|
||||
|
||||
|
||||
class CLErrorOutOfRange(CLError):
|
||||
# CL_ERROR_OUT_OF_RANGE = 0x0403
|
||||
# CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG
|
||||
pass
|
||||
|
||||
|
||||
class CLErrorRegex(CLError):
|
||||
# CL_ERROR_REGEX = 0x0404
|
||||
# CDA_EPATTERN (not used), CDA_EBADREGEX
|
||||
pass
|
||||
|
||||
|
||||
class CLErrorCorpusAccess(CLError):
|
||||
# CL_ERROR_CORPUS_ACCESS = 0x0405
|
||||
# CDA_ENODATA
|
||||
pass
|
||||
|
||||
|
||||
class CLErrorOutOfMemory(CLError):
|
||||
# CL_ERROR_OUT_OF_MEMORY = 0x0406
|
||||
# CDA_ENOMEM
|
||||
# this means the CQi server has run out of memory;
|
||||
# try discarding some other corpora and/or subcorpora
|
||||
pass
|
||||
|
||||
|
||||
class CLErrorInternal(CLError):
|
||||
# CL_ERROR_INTERNAL = 0x0407
|
||||
# CDA_EOTHER, CDA_ENYI
|
||||
# this is the classical 'please contact technical support' error
|
||||
pass
|
||||
|
||||
|
||||
class CQPError(CQiException):
|
||||
# CQP_ERROR = 0x05
|
||||
# CQP error messages yet to be defined
|
||||
pass
|
||||
|
||||
|
||||
class CQPErrorGeneral(CQPError):
|
||||
# CQP_ERROR_GENERAL = 0x0501
|
||||
pass
|
||||
# CQP_ERROR_NO_SUCH_CORPUS = 0x0502
|
||||
|
||||
|
||||
class CQPErrorInvalidField(CQPError):
|
||||
# CQP_ERROR_INVALID_FIELD = 0x0503
|
||||
pass
|
||||
|
||||
|
||||
class CQPErrorOutOfRange(CQPError):
|
||||
# CQP_ERROR_OUT_OF_RANGE = 0x0504
|
||||
# various cases where a number is out of range
|
||||
pass
|
@ -1,4 +1,4 @@
|
||||
from ..constants import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH,
|
||||
from ..specification import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH,
|
||||
CONST_FIELD_MATCHEND, CONST_FIELD_TARGET)
|
||||
|
||||
|
||||
|
404
app/corpora/cqi/specification.py
Normal file
404
app/corpora/cqi/specification.py
Normal file
@ -0,0 +1,404 @@
|
||||
# ########################################################################### #
|
||||
# IMS CQi specification #
|
||||
# #
|
||||
# Version: 0.1a ;o) #
|
||||
# Author: Stefan Evert (evert@ims.uni-stuttgart.de) #
|
||||
# Modified by (codestyle): Patrick Jentsch (p.jentsch@uni-bielefeld.de) #
|
||||
# Modified date: Thurs Oct 10 #
|
||||
# ########################################################################### #
|
||||
""" 1. padding """
|
||||
PAD = 0x00
|
||||
|
||||
|
||||
""" 2. CQi responses """
|
||||
""" 2.1 CQI_STATUS_* """
|
||||
STATUS = 0x01
|
||||
STATUS_OK = 0x0101
|
||||
STATUS_CONNECT_OK = 0x0102
|
||||
STATUS_BYE_OK = 0x0103
|
||||
STATUS_PING_OK = 0x0104
|
||||
|
||||
""" 2.2 CQI_ERROR_* """
|
||||
ERROR = 0x02
|
||||
ERROR_GENERAL_ERROR = 0x0201
|
||||
ERROR_CONNECT_REFUSED = 0x0202
|
||||
ERROR_USER_ABORT = 0x0203
|
||||
ERROR_SYNTAX_ERROR = 0x0204
|
||||
# includes corpus/attribute/subcorpus specifier syntax
|
||||
|
||||
""" 2.3 CQI_DATA_* """
|
||||
DATA = 0x03
|
||||
DATA_BYTE = 0x0301
|
||||
DATA_BOOL = 0x0302
|
||||
DATA_INT = 0x0303
|
||||
DATA_STRING = 0x0304
|
||||
DATA_BYTE_LIST = 0x0305
|
||||
DATA_BOOL_LIST = 0x0306
|
||||
DATA_INT_LIST = 0x0307
|
||||
DATA_STRING_LIST = 0x0308
|
||||
DATA_INT_INT = 0x0309
|
||||
DATA_INT_INT_INT_INT = 0x030A
|
||||
DATA_INT_TABLE = 0x030B
|
||||
|
||||
""" 2.4 CQI_CL_ERROR_* """
|
||||
"""
|
||||
" NOTE: some CL error codes are not represented in the CQi specs
|
||||
" - usually because they're not used in the CL any more
|
||||
" - CDA_ENOSTRING is not considered an error (returns -1)
|
||||
" - CDA_EARGS: dynamic attribute calls not yet supported
|
||||
"""
|
||||
CL_ERROR = 0x04
|
||||
CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401
|
||||
# returned if CQi server couldn't open attribute
|
||||
CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402
|
||||
# CDA_EATTTYPE
|
||||
CL_ERROR_OUT_OF_RANGE = 0x0403
|
||||
# CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG
|
||||
CL_ERROR_REGEX = 0x0404
|
||||
# CDA_EPATTERN (not used), CDA_EBADREGEX
|
||||
CL_ERROR_CORPUS_ACCESS = 0x0405
|
||||
# CDA_ENODATA
|
||||
CL_ERROR_OUT_OF_MEMORY = 0x0406
|
||||
# CDA_ENOMEM
|
||||
# this means the CQi server has run out of memory;
|
||||
# try discarding some other corpora and/or subcorpora
|
||||
CL_ERROR_INTERNAL = 0x0407
|
||||
# CDA_EOTHER, CDA_ENYI
|
||||
# this is the classical 'please contact technical support' error
|
||||
|
||||
""" 2.5 CQI_CQP_ERROR_* """
|
||||
CQP_ERROR = 0x05
|
||||
# CQP error messages yet to be defined
|
||||
CQP_ERROR_GENERAL = 0x0501
|
||||
CQP_ERROR_NO_SUCH_CORPUS = 0x0502
|
||||
CQP_ERROR_INVALID_FIELD = 0x0503
|
||||
CQP_ERROR_OUT_OF_RANGE = 0x0504
|
||||
# various cases where a number is out of range
|
||||
|
||||
|
||||
""" 3. CQi commands """
|
||||
""" 3.1 CQI_CTRL_* """
|
||||
CTRL = 0x11
|
||||
CTRL_CONNECT = 0x1101
|
||||
# INPUT: (STRING username, STRING password)
|
||||
# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
|
||||
CTRL_BYE = 0x1102
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_STATUS_BYE_OK
|
||||
CTRL_USER_ABORT = 0x1103
|
||||
# INPUT: ()
|
||||
# OUTPUT:
|
||||
CTRL_PING = 0x1104
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_STATUS_PING_OK
|
||||
CTRL_LAST_GENERAL_ERROR = 0x1105
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_STRING
|
||||
# full-text error message for the last general error reported by the CQi server
|
||||
|
||||
""" 3.2 CQI_ASK_FEATURE_* """
|
||||
ASK_FEATURE = 0x12
|
||||
ASK_FEATURE_CQI_1_0 = 0x1201
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
ASK_FEATURE_CL_2_3 = 0x1202
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
ASK_FEATURE_CQP_2_3 = 0x1203
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
|
||||
""" 3.3 CQI_CORPUS_* """
|
||||
CORPUS = 0x13
|
||||
CORPUS_LIST_CORPORA = 0x1301
|
||||
# INPUT: ()
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CORPUS_CHARSET = 0x1303
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING
|
||||
CORPUS_PROPERTIES = 0x1304
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CORPUS_POSITIONAL_ATTRIBUTES = 0x1305
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CORPUS_STRUCTURAL_ATTRIBUTES = 0x1306
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES = 0x1307
|
||||
# INPUT: (STRING attribute)
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
CORPUS_ALIGNMENT_ATTRIBUTES = 0x1308
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CORPUS_FULL_NAME = 0x1309
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING
|
||||
# the full name of <corpus> as specified in its registry entry
|
||||
CORPUS_INFO = 0x130A
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns the contents of the .info file of <corpus> as a list of lines
|
||||
CORPUS_DROP_CORPUS = 0x130B
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# try to unload a corpus and all its attributes from memory
|
||||
|
||||
""" 3.4 CQI_CL_* """
|
||||
CL = 0x14
|
||||
# low-level corpus access (CL functions)
|
||||
CL_ATTRIBUTE_SIZE = 0x1401
|
||||
# INPUT: (STRING attribute)
|
||||
# OUTPUT: CQI_DATA_INT
|
||||
# returns the size of <attribute>:
|
||||
# - number of tokens (positional)
|
||||
# - number of regions (structural)
|
||||
# - number of alignments (alignment)
|
||||
CL_LEXICON_SIZE = 0x1402
|
||||
# INPUT: (STRING attribute)
|
||||
# OUTPUT: CQI_DATA_INT
|
||||
# returns the number of entries in the lexicon of a positional attribute;
|
||||
# valid lexicon IDs range from 0 .. (lexicon_size - 1)
|
||||
CL_DROP_ATTRIBUTE = 0x1403
|
||||
# INPUT: (STRING attribute)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# unload attribute from memory
|
||||
"""
|
||||
" NOTE: simple (scalar) mappings are applied to lists (the returned list has
|
||||
" exactly the same length as the list passed as an argument)
|
||||
"""
|
||||
CL_STR2ID = 0x1404
|
||||
# INPUT: (STRING attribute, STRING_LIST strings)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every string in <strings> that is not found in the lexicon
|
||||
CL_ID2STR = 0x1405
|
||||
# INPUT: (STRING attribute, INT_LIST id)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns "" for every ID in <id> that is out of range
|
||||
CL_ID2FREQ = 0x1406
|
||||
# INPUT: (STRING attribute, INT_LIST id)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns 0 for every ID in <id> that is out of range
|
||||
CL_CPOS2ID = 0x1407
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every corpus position in <cpos> that is out of range
|
||||
CL_CPOS2STR = 0x1408
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns "" for every corpus position in <cpos> that is out of range
|
||||
CL_CPOS2STRUC = 0x1409
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every corpus position not inside a structure region
|
||||
"""
|
||||
" NOTE: temporary addition for the Euralex2000 tutorial, but should probably be
|
||||
" included in CQi specs
|
||||
"""
|
||||
CL_CPOS2LBOUND = 0x1420
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns left boundary of s-attribute region enclosing cpos, -1 if not in
|
||||
# region
|
||||
CL_CPOS2RBOUND = 0x1421
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns right boundary of s-attribute region enclosing cpos, -1 if not in
|
||||
# region
|
||||
CL_CPOS2ALG = 0x140A
|
||||
# INPUT: (STRING attribute, INT_LIST cpos)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns -1 for every corpus position not inside an alignment
|
||||
CL_STRUC2STR = 0x140B
|
||||
# INPUT: (STRING attribute, INT_LIST strucs)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
# returns annotated string values of structure regions in <strucs>; "" if out
|
||||
# of range
|
||||
# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
|
||||
"""
|
||||
" NOTE: the following mappings take a single argument and return multiple
|
||||
" values, including lists of arbitrary size
|
||||
"""
|
||||
CL_ID2CPOS = 0x140C
|
||||
# INPUT: (STRING attribute, INT id)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns all corpus positions where the given token occurs
|
||||
CL_IDLIST2CPOS = 0x140D
|
||||
# INPUT: (STRING attribute, INT_LIST id_list)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns all corpus positions where one of the tokens in <id_list>
|
||||
# occurs; the returned list is sorted as a whole, not per token id
|
||||
CL_REGEX2ID = 0x140E
|
||||
# INPUT: (STRING attribute, STRING regex)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns lexicon IDs of all tokens that match <regex>; the returned
|
||||
# list may be empty (size 0);
|
||||
CL_STRUC2CPOS = 0x140F
|
||||
# INPUT: (STRING attribute, INT struc)
|
||||
# OUTPUT: CQI_DATA_INT_INT
|
||||
# returns start and end corpus positions of structure region <struc>
|
||||
CL_ALG2CPOS = 0x1410
|
||||
# INPUT: (STRING attribute, INT alg)
|
||||
# OUTPUT: CQI_DATA_INT_INT_INT_INT
|
||||
# returns (src_start, src_end, target_start, target_end)
|
||||
|
||||
""" 3.5 CQI_CQP_* """
|
||||
CQP = 0x15
|
||||
CQP_QUERY = 0x1501
|
||||
# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# <query> must include the ';' character terminating the query.
|
||||
CQP_LIST_SUBCORPORA = 0x1502
|
||||
# INPUT: (STRING corpus)
|
||||
# OUTPUT: CQI_DATA_STRING_LIST
|
||||
CQP_SUBCORPUS_SIZE = 0x1503
|
||||
# INPUT: (STRING subcorpus)
|
||||
# OUTPUT: CQI_DATA_INT
|
||||
CQP_SUBCORPUS_HAS_FIELD = 0x1504
|
||||
# INPUT: (STRING subcorpus, BYTE field)
|
||||
# OUTPUT: CQI_DATA_BOOL
|
||||
CQP_DUMP_SUBCORPUS = 0x1505
|
||||
# INPUT: (STRING subcorpus, BYTE field, INT first, INT last)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# Dump the values of <field> for match ranges <first> .. <last> in <subcorpus>.
|
||||
# <field> is one of the CQI_CONST_FIELD_* constants.
|
||||
CQP_DROP_SUBCORPUS = 0x1509
|
||||
# INPUT: (STRING subcorpus)
|
||||
# OUTPUT: CQI_STATUS_OK
|
||||
# delete a subcorpus from memory
|
||||
"""
|
||||
" NOTE: The following two functions are temporarily included for the Euralex
|
||||
" 2000 tutorial demo
|
||||
"""
|
||||
""" NOTE: frequency distribution of single tokens """
|
||||
CQP_FDIST_1 = 0x1510
|
||||
# INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
|
||||
# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
|
||||
# CQI_CONST_FIELD_KEYWORD
|
||||
# NB: pairs are sorted by frequency desc.
|
||||
""" NOTE: frequency distribution of pairs of tokens """
|
||||
CQP_FDIST_2 = 0x1511
|
||||
# INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1,
|
||||
# BYTE field2, STRING attribute2)
|
||||
# OUTPUT: CQI_DATA_INT_LIST
|
||||
# returns <n> (id1, id2, frequency) pairs flattened into a list of size 3*<n>
|
||||
# NB: triples are sorted by frequency desc.
|
||||
|
||||
|
||||
""" 4. Constant Definitions """
|
||||
CONST_FALSE = 0x00
|
||||
CONST_NO = 0x00
|
||||
CONST_TRUE = 0x01
|
||||
CONST_YES = 0x01
|
||||
"""
|
||||
" NOTE: The following constants specify which field will be returned by
|
||||
" CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands.
|
||||
"""
|
||||
CONST_FIELD_MATCH = 0x10
|
||||
CONST_FIELD_MATCHEND = 0x11
|
||||
"""
|
||||
" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the
|
||||
" numerical values 0 .. 9, so clients do not need to look up the constant
|
||||
" values if they're handling arbitrary targets.
|
||||
"""
|
||||
CONST_FIELD_TARGET_0 = 0x00
|
||||
CONST_FIELD_TARGET_1 = 0x01
|
||||
CONST_FIELD_TARGET_2 = 0x02
|
||||
CONST_FIELD_TARGET_3 = 0x03
|
||||
CONST_FIELD_TARGET_4 = 0x04
|
||||
CONST_FIELD_TARGET_5 = 0x05
|
||||
CONST_FIELD_TARGET_6 = 0x06
|
||||
CONST_FIELD_TARGET_7 = 0x07
|
||||
CONST_FIELD_TARGET_8 = 0x08
|
||||
CONST_FIELD_TARGET_9 = 0x09
|
||||
"""
|
||||
" NOTE: The following constants are provided for backward compatibility with
|
||||
" traditional CQP field names & while the generalised target concept
|
||||
" isn't yet implemented in the CQPserver.
|
||||
"""
|
||||
CONST_FIELD_TARGET = 0x00
|
||||
CONST_FIELD_KEYWORD = 0x09
|
||||
""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """
|
||||
MAJOR_VERSION = 0x00
|
||||
MINOR_VERSION = 0x01
|
||||
|
||||
|
||||
""" 5. CQi lookup dictionary. """
|
||||
lookup = {
|
||||
257: 'CQI_STATUS_OK',
|
||||
258: 'CQI_STATUS_CONNECT_OK',
|
||||
259: 'CQI_STATUS_BYE_OK',
|
||||
260: 'CQI_STATUS_PING_OK',
|
||||
513: 'CQI_ERROR_GENERAL_ERROR',
|
||||
514: 'CQI_ERROR_CONNECT_REFUSED',
|
||||
515: 'CQI_ERROR_USER_ABORT',
|
||||
516: 'CQI_ERROR_SYNTAX_ERROR',
|
||||
769: 'CQI_DATA_BYTE',
|
||||
770: 'CQI_DATA_BOOL',
|
||||
771: 'CQI_DATA_INT',
|
||||
772: 'CQI_DATA_STRING',
|
||||
773: 'CQI_DATA_BYTE_LIST',
|
||||
774: 'CQI_DATA_BOOL_LIST',
|
||||
775: 'CQI_DATA_INT_LIST',
|
||||
776: 'CQI_DATA_STRING_LIST',
|
||||
777: 'CQI_DATA_INT_INT',
|
||||
778: 'CQI_DATA_INT_INT_INT_INT',
|
||||
779: 'CQI_DATA_INT_TABLE',
|
||||
1025: 'CQI_CL_ERROR_NO_SUCH_ATTRIBUTE',
|
||||
1026: 'CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE',
|
||||
1027: 'CQI_CL_ERROR_OUT_OF_RANGE',
|
||||
1028: 'CQI_CL_ERROR_REGEX',
|
||||
1029: 'CQI_CL_ERROR_CORPUS_ACCESS',
|
||||
1030: 'CQI_CL_ERROR_OUT_OF_MEMORY',
|
||||
1031: 'CQI_CL_ERROR_INTERNAL',
|
||||
1281: 'CQI_CQP_ERROR_GENERAL',
|
||||
1282: 'CQI_CQP_ERROR_NO_SUCH_CORPUS',
|
||||
1283: 'CQI_CQP_ERROR_INVALID_FIELD',
|
||||
1284: 'CQI_CQP_ERROR_OUT_OF_RANGE',
|
||||
4353: 'CQI_CTRL_CONNECT',
|
||||
4354: 'CQI_CTRL_BYE',
|
||||
4355: 'CQI_CTRL_USER_ABORT',
|
||||
4356: 'CQI_CTRL_PING',
|
||||
4357: 'CQI_CTRL_LAST_GENERAL_ERROR',
|
||||
4609: 'CQI_ASK_FEATURE_CQI_1_0',
|
||||
4610: 'CQI_ASK_FEATURE_CL_2_3',
|
||||
4611: 'CQI_ASK_FEATURE_CQP_2_3',
|
||||
4865: 'CQI_CORPUS_LIST_CORPORA',
|
||||
4867: 'CQI_CORPUS_CHARSET',
|
||||
4868: 'CQI_CORPUS_PROPERTIES',
|
||||
4869: 'CQI_CORPUS_POSITIONAL_ATTRIBUTES',
|
||||
4870: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTES',
|
||||
4871: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES',
|
||||
4872: 'CQI_CORPUS_ALIGNMENT_ATTRIBUTES',
|
||||
4873: 'CQI_CORPUS_FULL_NAME',
|
||||
4874: 'CQI_CORPUS_INFO',
|
||||
4875: 'CQI_CORPUS_DROP_CORPUS',
|
||||
5121: 'CQI_CL_ATTRIBUTE_SIZE',
|
||||
5122: 'CQI_CL_LEXICON_SIZE',
|
||||
5123: 'CQI_CL_DROP_ATTRIBUTE',
|
||||
5124: 'CQI_CL_STR2ID',
|
||||
5125: 'CQI_CL_ID2STR',
|
||||
5126: 'CQI_CL_ID2FREQ',
|
||||
5127: 'CQI_CL_CPOS2ID',
|
||||
5128: 'CQI_CL_CPOS2STR',
|
||||
5129: 'CQI_CL_CPOS2STRUC',
|
||||
5130: 'CQI_CL_CPOS2ALG',
|
||||
5131: 'CQI_CL_STRUC2STR',
|
||||
5132: 'CQI_CL_ID2CPOS',
|
||||
5133: 'CQI_CL_IDLIST2CPOS',
|
||||
5134: 'CQI_CL_REGEX2ID',
|
||||
5135: 'CQI_CL_STRUC2CPOS',
|
||||
5136: 'CQI_CL_ALG2CPOS',
|
||||
5152: 'CQI_CL_CPOS2LBOUND',
|
||||
5153: 'CQI_CL_CPOS2RBOUND',
|
||||
5377: 'CQI_CQP_QUERY',
|
||||
5378: 'CQI_CQP_LIST_SUBCORPORA',
|
||||
5379: 'CQI_CQP_SUBCORPUS_SIZE',
|
||||
5380: 'CQI_CQP_SUBCORPUS_HAS_FIELD',
|
||||
5381: 'CQI_CQP_DUMP_SUBCORPUS',
|
||||
5385: 'CQI_CQP_DROP_SUBCORPUS',
|
||||
5392: 'CQI_CQP_FDIST_1',
|
||||
5393: 'CQI_CQP_FDIST_2'
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
from .constants import MAJOR_VERSION, MINOR_VERSION
|
||||
from .specification import MAJOR_VERSION, MINOR_VERSION
|
||||
|
||||
|
||||
version = '{}.{}'.format(MAJOR_VERSION, MINOR_VERSION)
|
||||
|
@ -1,5 +1,5 @@
|
||||
from .api import APIClient
|
||||
from .constants import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND
|
||||
from .specification import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND
|
||||
import time
|
||||
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
from flask import current_app, request
|
||||
from flask_login import current_user
|
||||
from .cqi import CQiClient
|
||||
from . import cqi
|
||||
from .. import db, socketio
|
||||
from ..decorators import socketio_login_required
|
||||
from ..events import connected_sessions
|
||||
@ -39,7 +39,7 @@ def pj_corpus_analysis_query(query):
|
||||
corpus = client.corpora.get('CORPUS')
|
||||
try:
|
||||
results = corpus.query(query)
|
||||
except Exception as e:
|
||||
except cqi.errors.CQiException as e:
|
||||
response = {'code': 1, 'msg': str(e)}
|
||||
socketio.emit('pj_corpus_analysis_query', response, room=request.sid)
|
||||
else:
|
||||
@ -82,10 +82,10 @@ def pj_corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
|
||||
while corpus.status != 'analysing':
|
||||
db.session.refresh(corpus)
|
||||
socketio.sleep(3)
|
||||
client = CQiClient('corpus_{}_analysis'.format(corpus_id))
|
||||
client = cqi.CQiClient('corpus_{}_analysis'.format(corpus_id))
|
||||
try:
|
||||
client.connect()
|
||||
except Exception:
|
||||
except cqi.errors.CQiException:
|
||||
response = {'code': 500, 'msg': 'Internal Server Error'}
|
||||
socketio.emit('pj_corpus_analysis_init', response, room=session_id)
|
||||
return
|
||||
@ -102,7 +102,7 @@ def pj_corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
|
||||
''' Teardown analysis session '''
|
||||
try:
|
||||
client.disconnect()
|
||||
except Exception:
|
||||
except cqi.errors.CQiException:
|
||||
pass
|
||||
pj_corpus_analysis_clients.pop(session_id, None)
|
||||
pj_corpus_analysis_sessions[corpus_id].remove(session_id)
|
||||
|
Loading…
x
Reference in New Issue
Block a user