From 98fecacb1152824457a9278eb9890c583a94bc51 Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Fri, 24 Apr 2020 15:44:40 +0200 Subject: [PATCH] use cqi from pypi --- app/corpora/cqi/__init__.py | 8 - app/corpora/cqi/api/__init__.py | 2 - app/corpora/cqi/api/client.py | 605 --------------------------- app/corpora/cqi/api/specification.py | 404 ------------------ app/corpora/cqi/client.py | 46 -- app/corpora/cqi/errors.py | 180 -------- app/corpora/cqi/models/__init__.py | 0 app/corpora/cqi/models/attributes.py | 162 ------- app/corpora/cqi/models/corpora.py | 96 ----- app/corpora/cqi/models/ressource.py | 78 ---- app/corpora/cqi/models/subcorpora.py | 103 ----- app/corpora/cqi/version.py | 5 - app/corpora/events.py | 2 +- requirements.txt | 1 + 14 files changed, 2 insertions(+), 1690 deletions(-) delete mode 100644 app/corpora/cqi/__init__.py delete mode 100644 app/corpora/cqi/api/__init__.py delete mode 100644 app/corpora/cqi/api/client.py delete mode 100644 app/corpora/cqi/api/specification.py delete mode 100644 app/corpora/cqi/client.py delete mode 100644 app/corpora/cqi/errors.py delete mode 100644 app/corpora/cqi/models/__init__.py delete mode 100644 app/corpora/cqi/models/attributes.py delete mode 100644 app/corpora/cqi/models/corpora.py delete mode 100644 app/corpora/cqi/models/ressource.py delete mode 100644 app/corpora/cqi/models/subcorpora.py delete mode 100644 app/corpora/cqi/version.py diff --git a/app/corpora/cqi/__init__.py b/app/corpora/cqi/__init__.py deleted file mode 100644 index 4174f55d..00000000 --- a/app/corpora/cqi/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# flake8: noqa -from .api import APIClient -from .client import CQiClient -from .version import version, version_info - - -__title__ = 'CQi' -__version__ = version diff --git a/app/corpora/cqi/api/__init__.py b/app/corpora/cqi/api/__init__.py deleted file mode 100644 index ff518441..00000000 --- a/app/corpora/cqi/api/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# flake8: noqa -from .client import APIClient diff --git a/app/corpora/cqi/api/client.py b/app/corpora/cqi/api/client.py deleted file mode 100644 index bc62e65f..00000000 --- a/app/corpora/cqi/api/client.py +++ /dev/null @@ -1,605 +0,0 @@ -from time import sleep -from . import specification -from ..errors import cl_error_lookup, error_lookup, cqp_error_lookup -import socket -import struct - - -class APIClient: - """ - A low-level client for the IMS Open Corpus Workbench (CWB) corpus query - interface (CQi) API. - - Example: - >>> import cqi - >>> client = cqi.APIClient('127.0.0.1') - >>> client.ctrl_connect('user', 'password') - {'code': 258, 'msg': 'CQI_STATUS_CONNECT_OK'} - >>> client.ctrl_ping() - {'code': 260, 'msg': 'CQI_STATUS_PING_OK'} - >>> client.ctrl_bye() - {'code': 259, 'msg': 'CQI_STATUS_BYE_OK'} - - Attributes: - host (str): URL to the CQP server. For example, - ``cqpserver.localhost`` or ``127.0.0.1``. - port (int): Port the CQP server listens on. Default: ``4877`` - socket (socket.socket): Socket for communicating with a CQP server. - """ - - def __init__(self, host, port=4877): - self.host = host - self.port = port - self.socket = socket.socket() - - def ctrl_connect(self, username, password): - self.socket.connect((self.host, self.port)) - # INPUT: (STRING username, STRING password) - # OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED - self.__send_WORD(specification.CTRL_CONNECT) - self.__send_STRING(username) - self.__send_STRING(password) - return self.__recv_response() - - def ctrl_bye(self): - # INPUT: () - # OUTPUT: CQI_STATUS_BYE_OK - self.__send_WORD(specification.CTRL_BYE) - response = self.__recv_response() - self.socket.close() - return response - - def ctrl_user_abort(self): - # INPUT: () - # OUTPUT: - self.__send_WORD(specification.CTRL_USER_ABORT) - - def ctrl_ping(self): - # INPUT: () - # OUTPUT: CQI_STATUS_PING_OK - self.__send_WORD(specification.CTRL_PING) - return self.__recv_response() - - def ctrl_last_general_error(self): - # INPUT: () - # OUTPUT: CQI_DATA_STRING - # full-text error message for the last general error reported by the - # CQi server - self.__send_WORD(specification.CTRL_LAST_GENERAL_ERROR) - return self.__recv_response() - - def ask_feature_cqi_1_0(self): - # INPUT: () - # OUTPUT: CQI_DATA_BOOL - self.__send_WORD(specification.ASK_FEATURE_CQI_1_0) - return self.__recv_response() - - def ask_feature_cl_2_3(self): - # INPUT: () - # OUTPUT: CQI_DATA_BOOL - self.__send_WORD(specification.ASK_FEATURE_CL_2_3) - return self.__recv_response() - - def ask_feature_cqp_2_3(self): - # INPUT: () - # OUTPUT: CQI_DATA_BOOL - self.__send_WORD(specification.ASK_FEATURE_CL_2_3) - return self.__recv_response() - - def corpus_list_coprora(self): - # INPUT: () - # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(specification.CORPUS_LIST_CORPORA) - return self.__recv_response() - - def corpus_charset(self, corpus): - # INPUT: (STRING corpus) - # OUTPUT: CQI_DATA_STRING - self.__send_WORD(specification.CORPUS_CHARSET) - self.__send_STRING(corpus) - return self.__recv_response() - - def corpus_properties(self, corpus): - # INPUT: (STRING corpus) - # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(specification.CORPUS_PROPERTIES) - self.__send_STRING(corpus) - return self.__recv_response() - - def corpus_positional_attributes(self, corpus): - # INPUT: (STRING corpus) - # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(specification.CORPUS_POSITIONAL_ATTRIBUTES) - self.__send_STRING(corpus) - return self.__recv_response() - - def corpus_structural_attributes(self, corpus): - # INPUT: (STRING corpus) - # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(specification.CORPUS_STRUCTURAL_ATTRIBUTES) - self.__send_STRING(corpus) - return self.__recv_response() - - def corpus_structural_attribute_has_values(self, attribute): - # INPUT: (STRING attribute) - # OUTPUT: CQI_DATA_BOOL - self.__send_WORD(specification.CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES) - self.__send_STRING(attribute) - return self.__recv_response() - - def corpus_alignment_attributes(self, corpus): - # INPUT: (STRING corpus) - # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(specification.CORPUS_ALIGNMENT_ATTRIBUTES) - self.__send_STRING(corpus) - return self.__recv_response() - - def corpus_full_name(self, corpus): - # INPUT: (STRING corpus) - # OUTPUT: CQI_DATA_STRING - # the full name of as specified in its registry entry - self.__send_WORD(specification.CORPUS_FULL_NAME) - self.__send_STRING(corpus) - return self.__recv_response() - - def corpus_info(self, corpus): - # INPUT: (STRING corpus) - # OUTPUT: CQI_DATA_STRING_LIST - # returns the contents of the .info file of as a list of lines - self.__send_WORD(specification.CORPUS_INFO) - self.__send_STRING(corpus) - return self.__recv_response() - - def corpus_drop_corpus(self, corpus): - # INPUT: (STRING corpus) - # OUTPUT: CQI_STATUS_OK - # try to unload a corpus and all its attributes from memory - self.__send_WORD(specification.CORPUS_DROP_CORPUS) - self.__send_STRING(corpus) - return self.__recv_response() - - def cl_attribute_size(self, attribute): - # INPUT: (STRING attribute) - # OUTPUT: CQI_DATA_INT - # returns the size of : - # number of tokens (positional) - # number of regions (structural) - # number of alignments (alignment) - self.__send_WORD(specification.CL_ATTRIBUTE_SIZE) - self.__send_STRING(attribute) - return self.__recv_response() - - def cl_lexicon_size(self, attribute): - # INPUT: (STRING attribute) - # OUTPUT: CQI_DATA_INT - # returns the number of entries in the lexicon of a positional - # attribute; - # valid lexicon IDs range from 0 .. (lexicon_size - 1) - self.__send_WORD(specification.CL_LEXICON_SIZE) - self.__send_STRING(attribute) - return self.__recv_response() - - def cl_drop_attribute(self, attribute): - # INPUT: (STRING attribute) - # OUTPUT: CQI_STATUS_OK - # unload attribute from memory - self.__send_WORD(specification.CL_DROP_ATTRIBUTE) - self.__send_STRING(attribute) - return self.__recv_response() - - """ - " NOTE: simple (scalar) mappings are applied to lists (the returned list - " has exactly the same length as the list passed as an argument) - """ - - def cl_str2id(self, attribute, strings): - # INPUT: (STRING attribute, STRING_LIST strings) - # OUTPUT: CQI_DATA_INT_LIST - # returns -1 for every string in that is not found in the - # lexicon - self.__send_WORD(specification.CL_STR2ID) - self.__send_STRING(attribute) - self.__send_STRING_LIST(strings) - return self.__recv_response() - - def cl_id2str(self, attribute, id): - # INPUT: (STRING attribute, INT_LIST id) - # OUTPUT: CQI_DATA_STRING_LIST - # returns "" for every ID in that is out of range - self.__send_WORD(specification.CL_ID2STR) - self.__send_STRING(attribute) - self.__send_INT_LIST(id) - return self.__recv_response() - - def cl_id2freq(self, attribute, id): - # INPUT: (STRING attribute, INT_LIST id) - # OUTPUT: CQI_DATA_INT_LIST - # returns 0 for every ID in that is out of range - self.__send_WORD(specification.CL_ID2FREQ) - self.__send_STRING(attribute) - self.__send_INT_LIST(id) - return self.__recv_response() - - def cl_cpos2id(self, attribute, cpos): - # INPUT: (STRING attribute, INT_LIST cpos) - # OUTPUT: CQI_DATA_INT_LIST - # returns -1 for every corpus position in that is out of range - self.__send_WORD(specification.CL_ID2FREQ) - self.__send_STRING(attribute) - self.__send_INT_LIST(cpos) - return self.__recv_response() - - def cl_cpos2str(self, attribute, cpos): - # INPUT: (STRING attribute, INT_LIST cpos) - # OUTPUT: CQI_DATA_STRING_LIST - # returns "" for every corpus position in that is out of range - self.__send_WORD(specification.CL_CPOS2STR) - self.__send_STRING(attribute) - self.__send_INT_LIST(cpos) - return self.__recv_response() - - def cl_cpos2struc(self, attribute, cpos): - # INPUT: (STRING attribute, INT_LIST cpos) - # OUTPUT: CQI_DATA_INT_LIST - # returns -1 for every corpus position not inside a structure region - self.__send_WORD(specification.CL_CPOS2STRUC) - self.__send_STRING(attribute) - self.__send_INT_LIST(cpos) - return self.__recv_response() - - """ - " NOTE: temporary addition for the Euralex2000 tutorial, but should - " probably be included in CQi specs - """ - - def cl_cpos2lbound(self, attribute, cpos): - # INPUT: (STRING attribute, INT_LIST cpos) - # OUTPUT: CQI_DATA_INT_LIST - # returns left boundary of s-attribute region enclosing cpos, -1 if not - # in region - self.__send_WORD(specification.CL_CPOS2LBOUND) - self.__send_STRING(attribute) - self.__send_INT_LIST(cpos) - return self.__recv_response() - - def cl_cpos2rbound(self, attribute, cpos): - # INPUT: (STRING attribute, INT_LIST cpos) - # OUTPUT: CQI_DATA_INT_LIST - # returns right boundary of s-attribute region enclosing cpos, -1 if - # not in region - self.__send_WORD(specification.CL_CPOS2RBOUND) - self.__send_STRING(attribute) - self.__send_INT_LIST(cpos) - return self.__recv_response() - - def cl_cpos2alg(self, attribute, cpos): - # INPUT: (STRING attribute, INT_LIST cpos) - # OUTPUT: CQI_DATA_INT_LIST - # returns -1 for every corpus position not inside an alignment - self.__send_WORD(specification.CL_CPOS2ALG) - self.__send_STRING(attribute) - self.__send_INT_LIST(cpos) - return self.__recv_response() - - def cl_struc2str(self, attribute, strucs): - # INPUT: (STRING attribute, INT_LIST strucs) - # OUTPUT: CQI_DATA_STRING_LIST - # returns annotated string values of structure regions in ; "" - # if out of range - # check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES() first - self.__send_WORD(specification.CL_STRUC2STR) - self.__send_STRING(attribute) - self.__send_INT_LIST(strucs) - return self.__recv_response() - - """ - " NOTE: the following mappings take a single argument and return multiple - " values, including lists of arbitrary size - """ - - def cl_id2cpos(self, attribute, id): - # INPUT: (STRING attribute, INT id) - # OUTPUT: CQI_DATA_INT_LIST - # returns all corpus positions where the given token occurs - self.__send_WORD(specification.CL_ID2CPOS) - self.__send_STRING(attribute) - self.__send_INT(id) - return self.__recv_response() - - def cl_idlist2cpos(self, attribute, id_list): - # INPUT: (STRING attribute, INT_LIST id_list) - # OUTPUT: CQI_DATA_INT_LIST - # returns all corpus positions where one of the tokens in - # occurs; the returned list is sorted as a whole, not per token id - self.__send_WORD(specification.CL_IDLIST2CPOS) - self.__send_STRING(attribute) - self.__send_INT_LIST(id_list) - return self.__recv_response() - - def cl_regex2id(self, attribute, regex): - # INPUT: (STRING attribute, STRING regex) - # OUTPUT: CQI_DATA_INT_LIST - # returns lexicon IDs of all tokens that match ; the returned - # list may be empty (size 0); - self.__send_WORD(specification.CL_REGEX2ID) - self.__send_STRING(attribute) - self.__send_STRING(regex) - return self.__recv_response() - - def cl_struc2cpos(self, attribute, struc): - # INPUT: (STRING attribute, INT struc) - # OUTPUT: CQI_DATA_INT_INT - # returns start and end corpus positions of structure region - self.__send_WORD(specification.CL_STRUC2CPOS) - self.__send_STRING(attribute) - self.__send_INT(struc) - return self.__recv_response() - - def cl_alg2cpos(self, attribute, alg): - # INPUT: (STRING attribute, INT alg) - # OUTPUT: CQI_DATA_INT_INT_INT_INT - # returns (src_start, src_end, target_start, target_end) - self.__send_WORD(specification.CL_ALG2CPOS) - self.__send_STRING(attribute) - self.__send_INT(alg) - return self.__recv_response() - - def cqp_query(self, mother_corpus, subcorpus_name, query): - # INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query) - # OUTPUT: CQI_STATUS_OK - # must include the ';' character terminating the query. - self.__send_WORD(specification.CQP_QUERY) - self.__send_STRING(mother_corpus) - self.__send_STRING(subcorpus_name) - self.__send_STRING(query) - return self.__recv_response() - - def cqp_list_subcorpora(self, corpus): - # INPUT: (STRING corpus) - # OUTPUT: CQI_DATA_STRING_LIST - self.__send_WORD(specification.CQP_LIST_SUBCORPORA) - self.__send_STRING(corpus) - return self.__recv_response() - - def cqp_subcorpus_size(self, subcorpus): - # INPUT: (STRING subcorpus) - # OUTPUT: CQI_DATA_INT - self.__send_WORD(specification.CQP_SUBCORPUS_SIZE) - self.__send_STRING(subcorpus) - return self.__recv_response() - - def cqp_subcorpus_has_field(self, subcorpus, field): - # INPUT: (STRING subcorpus, BYTE field) - # OUTPUT: CQI_DATA_BOOL - self.__send_WORD(specification.CQP_SUBCORPUS_HAS_FIELD) - self.__send_STRING(subcorpus) - self.__send_BYTE(field) - return self.__recv_response() - - def cqp_dump_subcorpus(self, subcorpus, field, first, last): - # INPUT: (STRING subcorpus, BYTE field, INT first, INT last) - # OUTPUT: CQI_DATA_INT_LIST - # Dump the values of for match ranges .. - # in . is one of the CQI_CONST_FIELD_* constants. - self.__send_WORD(specification.CQP_DUMP_SUBCORPUS) - self.__send_STRING(subcorpus) - self.__send_BYTE(field) - self.__send_INT(first) - self.__send_INT(last) - return self.__recv_response() - - def cqp_drop_subcorpus(self, subcorpus): - # INPUT: (STRING subcorpus) - # OUTPUT: CQI_STATUS_OK - # delete a subcorpus from memory - self.__send_WORD(specification.CQP_DROP_SUBCORPUS) - self.__send_STRING(subcorpus) - return self.__recv_response() - - """ - " NOTE: The following two functions are temporarily included for the - " Euralex 2000 tutorial demo - """ - - def cqp_fdist_1(self, subcorpus, cutoff, field, attribute): - """ NOTE: frequency distribution of single tokens """ - # INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute) - # OUTPUT: CQI_DATA_INT_LIST - # returns (id, frequency) pairs flattened into a list of size 2* - # field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET, - # CQI_CONST_FIELD_KEYWORD - # NB: pairs are sorted by frequency desc. - self.__send_WORD(specification.CQP_FDIST_1) - self.__send_STRING(subcorpus) - self.__send_INT(cutoff) - self.__send_BYTE(field) - self.__send_STRING(attribute) - return self.__recv_response() - - def cqp_fdist_2(self, subcorpus, cutoff, field1, attribute1, field2, - attribute2): - """ NOTE: frequency distribution of pairs of tokens """ - # INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1, - # BYTE field2, STRING attribute2) - # OUTPUT: CQI_DATA_INT_LIST - # returns (id1, id2, frequency) pairs flattened into a list of size - # 3* - # NB: triples are sorted by frequency desc. - self.__send_WORD(specification.CQP_FDIST_2) - self.__send_STRING(subcorpus) - self.__send_INT(cutoff) - self.__send_BYTE(field1) - self.__send_STRING(attribute1) - self.__send_BYTE(field2) - self.__send_STRING(attribute2) - return self.__recv_response() - - def __recv_response(self): - byte_data = self.__recv_WORD() - response_type = byte_data >> 8 - if response_type == specification.CL_ERROR: - raise cl_error_lookup[byte_data]() - elif response_type == specification.CQP_ERROR: - raise cqp_error_lookup[byte_data]() - elif response_type == specification.DATA: - return self.__recv_DATA(byte_data) - elif response_type == specification.ERROR: - raise error_lookup[byte_data]() - elif response_type == specification.STATUS: - return {'code': byte_data, 'msg': specification.lookup[byte_data]} - else: - raise Exception('Unknown response type: {}'.format(response_type)) - - def __recv_DATA(self, data_type): - if data_type == specification.DATA_BYTE: - data = self.__recv_DATA_BYTE() - elif data_type == specification.DATA_BOOL: - data = self.__recv_DATA_BOOL() - elif data_type == specification.DATA_INT: - data = self.__recv_DATA_INT() - elif data_type == specification.DATA_STRING: - data = self.__recv_DATA_STRING() - elif data_type == specification.DATA_BYTE_LIST: - data = self.__recv_DATA_BYTE_LIST() - elif data_type == specification.DATA_BOOL_LIST: - data = self.__recv_DATA_BOOL_LIST() - elif data_type == specification.DATA_INT_LIST: - data = self.__recv_DATA_INT_LIST() - elif data_type == specification.DATA_STRING_LIST: - data = self.__recv_DATA_STRING_LIST() - elif data_type == specification.DATA_INT_INT: - data = self.__recv_DATA_INT_INT() - elif data_type == specification.DATA_INT_INT_INT_INT: - data = self.__recv_DATA_INT_INT_INT_INT() - elif data_type == specification.DATA_INT_TABLE: - data = self.__recv_DATA_INT_TABLE() - else: - raise Exception('Unknown data type: {}'.format(data_type)) - return data - - def __recv_DATA_BYTE(self): - while True: - if (len(self.socket.recv(1, socket.MSG_PEEK)) == 1): - byte_data = self.socket.recv(1) - break - sleep(0.1) - return struct.unpack('!B', byte_data)[0] - - def __recv_DATA_BOOL(self): - while True: - if (len(self.socket.recv(1, socket.MSG_PEEK)) == 1): - byte_data = self.socket.recv(1) - break - sleep(0.1) - return struct.unpack('!?', byte_data)[0] - - def __recv_DATA_INT(self): - while True: - if (len(self.socket.recv(4, socket.MSG_PEEK)) == 4): - byte_data = self.socket.recv(4) - break - sleep(0.1) - return struct.unpack('!i', byte_data)[0] - - def __recv_DATA_STRING(self): - n = self.__recv_WORD() - while True: - if (len(self.socket.recv(n, socket.MSG_PEEK)) == n): - byte_data = self.socket.recv(n) - break - sleep(0.1) - return struct.unpack('!{}s'.format(n), byte_data)[0].decode() - - def __recv_DATA_BYTE_LIST(self): - data = [] - n = self.__recv_DATA_INT() - while n > 0: - data.append(self.__recv_DATA_BYTE()) - n -= 1 - return data - - def __recv_DATA_BOOL_LIST(self): - data = [] - n = self.__recv_DATA_INT() - while n > 0: - data.append(self.__recv_DATA_BOOL()) - n -= 1 - return data - - def __recv_DATA_INT_LIST(self): - data = [] - n = self.__recv_DATA_INT() - while n > 0: - data.append(self.__recv_DATA_INT()) - n -= 1 - return data - - def __recv_DATA_STRING_LIST(self): - data = [] - n = self.__recv_DATA_INT() - while n > 0: - data.append(self.__recv_DATA_STRING()) - n -= 1 - return data - - def __recv_DATA_INT_INT(self): - return (self.__recv_DATA_INT(), self.__recv_DATA_INT()) - - def __recv_DATA_INT_INT_INT_INT(self): - return (self.__recv_DATA_INT(), - self.__recv_DATA_INT(), - self.__recv_DATA_INT(), - self.__recv_DATA_INT()) - - def __recv_DATA_INT_TABLE(self): - rows = self.__recv_DATA_INT() - columns = self.__recv_DATA_INT() - data = [] - for i in range(0, rows): - row = [] - for j in range(0, columns): - row.append(self.__recv_DATA_INT()) - data.append(row) - return data - - def __recv_WORD(self): - while True: - if (len(self.socket.recv(2, socket.MSG_PEEK)) == 2): - byte_data = self.socket.recv(2) - break - sleep(0.1) - return struct.unpack('!H', byte_data)[0] - - def __send_BYTE(self, byte_data): - data = struct.pack('!B', byte_data) - self.socket.sendall(data) - - def __send_BOOL(self, bool_data): - data = struct.pack('!?', bool_data) - self.socket.sendall(data) - - def __send_INT(self, int_data): - data = struct.pack('!i', int_data) - self.socket.sendall(data) - - def __send_STRING(self, string_data): - encoded_string_data = string_data.encode('utf-8') - n = len(encoded_string_data) - data = struct.pack('!H{}s'.format(n), n, encoded_string_data) - self.socket.sendall(data) - - def __send_INT_LIST(self, int_list_data): - n = len(int_list_data) - self.__send_INT(n) - for int_data in int_list_data: - self.__send_INT(int_data) - - def __send_STRING_LIST(self, string_list_data): - n = len(string_list_data) - self.__send_INT(n) - for string_data in string_list_data: - self.__send_STRING(string_data) - - def __send_WORD(self, word_data): - data = struct.pack('!H', word_data) - self.socket.sendall(data) diff --git a/app/corpora/cqi/api/specification.py b/app/corpora/cqi/api/specification.py deleted file mode 100644 index 1147d0d1..00000000 --- a/app/corpora/cqi/api/specification.py +++ /dev/null @@ -1,404 +0,0 @@ -# ########################################################################### # -# IMS CQi specification # -# # -# Version: 0.1a ;o) # -# Author: Stefan Evert (evert@ims.uni-stuttgart.de) # -# Modified by (codestyle): Patrick Jentsch (p.jentsch@uni-bielefeld.de) # -# Modified date: Thurs Oct 10 # -# ########################################################################### # -""" 1. padding """ -PAD = 0x00 - - -""" 2. CQi responses """ -""" 2.1 CQI_STATUS_* """ -STATUS = 0x01 -STATUS_OK = 0x0101 -STATUS_CONNECT_OK = 0x0102 -STATUS_BYE_OK = 0x0103 -STATUS_PING_OK = 0x0104 - -""" 2.2 CQI_ERROR_* """ -ERROR = 0x02 -ERROR_GENERAL_ERROR = 0x0201 -ERROR_CONNECT_REFUSED = 0x0202 -ERROR_USER_ABORT = 0x0203 -ERROR_SYNTAX_ERROR = 0x0204 -# includes corpus/attribute/subcorpus specifier syntax - -""" 2.3 CQI_DATA_* """ -DATA = 0x03 -DATA_BYTE = 0x0301 -DATA_BOOL = 0x0302 -DATA_INT = 0x0303 -DATA_STRING = 0x0304 -DATA_BYTE_LIST = 0x0305 -DATA_BOOL_LIST = 0x0306 -DATA_INT_LIST = 0x0307 -DATA_STRING_LIST = 0x0308 -DATA_INT_INT = 0x0309 -DATA_INT_INT_INT_INT = 0x030A -DATA_INT_TABLE = 0x030B - -""" 2.4 CQI_CL_ERROR_* """ -""" -" NOTE: some CL error codes are not represented in the CQi specs -" - usually because they're not used in the CL any more -" - CDA_ENOSTRING is not considered an error (returns -1) -" - CDA_EARGS: dynamic attribute calls not yet supported -""" -CL_ERROR = 0x04 -CL_ERROR_NO_SUCH_ATTRIBUTE = 0x0401 -# returned if CQi server couldn't open attribute -CL_ERROR_WRONG_ATTRIBUTE_TYPE = 0x0402 -# CDA_EATTTYPE -CL_ERROR_OUT_OF_RANGE = 0x0403 -# CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG -CL_ERROR_REGEX = 0x0404 -# CDA_EPATTERN (not used), CDA_EBADREGEX -CL_ERROR_CORPUS_ACCESS = 0x0405 -# CDA_ENODATA -CL_ERROR_OUT_OF_MEMORY = 0x0406 -# CDA_ENOMEM -# this means the CQi server has run out of memory; -# try discarding some other corpora and/or subcorpora -CL_ERROR_INTERNAL = 0x0407 -# CDA_EOTHER, CDA_ENYI -# this is the classical 'please contact technical support' error - -""" 2.5 CQI_CQP_ERROR_* """ -CQP_ERROR = 0x05 -# CQP error messages yet to be defined -CQP_ERROR_GENERAL = 0x0501 -CQP_ERROR_NO_SUCH_CORPUS = 0x0502 -CQP_ERROR_INVALID_FIELD = 0x0503 -CQP_ERROR_OUT_OF_RANGE = 0x0504 -# various cases where a number is out of range - - -""" 3. CQi commands """ -""" 3.1 CQI_CTRL_* """ -CTRL = 0x11 -CTRL_CONNECT = 0x1101 -# INPUT: (STRING username, STRING password) -# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED -CTRL_BYE = 0x1102 -# INPUT: () -# OUTPUT: CQI_STATUS_BYE_OK -CTRL_USER_ABORT = 0x1103 -# INPUT: () -# OUTPUT: -CTRL_PING = 0x1104 -# INPUT: () -# OUTPUT: CQI_STATUS_PING_OK -CTRL_LAST_GENERAL_ERROR = 0x1105 -# INPUT: () -# OUTPUT: CQI_DATA_STRING -# full-text error message for the last general error reported by the CQi server - -""" 3.2 CQI_ASK_FEATURE_* """ -ASK_FEATURE = 0x12 -ASK_FEATURE_CQI_1_0 = 0x1201 -# INPUT: () -# OUTPUT: CQI_DATA_BOOL -ASK_FEATURE_CL_2_3 = 0x1202 -# INPUT: () -# OUTPUT: CQI_DATA_BOOL -ASK_FEATURE_CQP_2_3 = 0x1203 -# INPUT: () -# OUTPUT: CQI_DATA_BOOL - -""" 3.3 CQI_CORPUS_* """ -CORPUS = 0x13 -CORPUS_LIST_CORPORA = 0x1301 -# INPUT: () -# OUTPUT: CQI_DATA_STRING_LIST -CORPUS_CHARSET = 0x1303 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING -CORPUS_PROPERTIES = 0x1304 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -CORPUS_POSITIONAL_ATTRIBUTES = 0x1305 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -CORPUS_STRUCTURAL_ATTRIBUTES = 0x1306 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES = 0x1307 -# INPUT: (STRING attribute) -# OUTPUT: CQI_DATA_BOOL -CORPUS_ALIGNMENT_ATTRIBUTES = 0x1308 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -CORPUS_FULL_NAME = 0x1309 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING -# the full name of as specified in its registry entry -CORPUS_INFO = 0x130A -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -# returns the contents of the .info file of as a list of lines -CORPUS_DROP_CORPUS = 0x130B -# INPUT: (STRING corpus) -# OUTPUT: CQI_STATUS_OK -# try to unload a corpus and all its attributes from memory - -""" 3.4 CQI_CL_* """ -CL = 0x14 -# low-level corpus access (CL functions) -CL_ATTRIBUTE_SIZE = 0x1401 -# INPUT: (STRING attribute) -# OUTPUT: CQI_DATA_INT -# returns the size of : -# - number of tokens (positional) -# - number of regions (structural) -# - number of alignments (alignment) -CL_LEXICON_SIZE = 0x1402 -# INPUT: (STRING attribute) -# OUTPUT: CQI_DATA_INT -# returns the number of entries in the lexicon of a positional attribute; -# valid lexicon IDs range from 0 .. (lexicon_size - 1) -CL_DROP_ATTRIBUTE = 0x1403 -# INPUT: (STRING attribute) -# OUTPUT: CQI_STATUS_OK -# unload attribute from memory -""" -" NOTE: simple (scalar) mappings are applied to lists (the returned list has -" exactly the same length as the list passed as an argument) -""" -CL_STR2ID = 0x1404 -# INPUT: (STRING attribute, STRING_LIST strings) -# OUTPUT: CQI_DATA_INT_LIST -# returns -1 for every string in that is not found in the lexicon -CL_ID2STR = 0x1405 -# INPUT: (STRING attribute, INT_LIST id) -# OUTPUT: CQI_DATA_STRING_LIST -# returns "" for every ID in that is out of range -CL_ID2FREQ = 0x1406 -# INPUT: (STRING attribute, INT_LIST id) -# OUTPUT: CQI_DATA_INT_LIST -# returns 0 for every ID in that is out of range -CL_CPOS2ID = 0x1407 -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_INT_LIST -# returns -1 for every corpus position in that is out of range -CL_CPOS2STR = 0x1408 -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_STRING_LIST -# returns "" for every corpus position in that is out of range -CL_CPOS2STRUC = 0x1409 -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_INT_LIST -# returns -1 for every corpus position not inside a structure region -""" -" NOTE: temporary addition for the Euralex2000 tutorial, but should probably be -" included in CQi specs -""" -CL_CPOS2LBOUND = 0x1420 -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_INT_LIST -# returns left boundary of s-attribute region enclosing cpos, -1 if not in -# region -CL_CPOS2RBOUND = 0x1421 -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_INT_LIST -# returns right boundary of s-attribute region enclosing cpos, -1 if not in -# region -CL_CPOS2ALG = 0x140A -# INPUT: (STRING attribute, INT_LIST cpos) -# OUTPUT: CQI_DATA_INT_LIST -# returns -1 for every corpus position not inside an alignment -CL_STRUC2STR = 0x140B -# INPUT: (STRING attribute, INT_LIST strucs) -# OUTPUT: CQI_DATA_STRING_LIST -# returns annotated string values of structure regions in ; "" if out -# of range -# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES() first -""" -" NOTE: the following mappings take a single argument and return multiple -" values, including lists of arbitrary size -""" -CL_ID2CPOS = 0x140C -# INPUT: (STRING attribute, INT id) -# OUTPUT: CQI_DATA_INT_LIST -# returns all corpus positions where the given token occurs -CL_IDLIST2CPOS = 0x140D -# INPUT: (STRING attribute, INT_LIST id_list) -# OUTPUT: CQI_DATA_INT_LIST -# returns all corpus positions where one of the tokens in -# occurs; the returned list is sorted as a whole, not per token id -CL_REGEX2ID = 0x140E -# INPUT: (STRING attribute, STRING regex) -# OUTPUT: CQI_DATA_INT_LIST -# returns lexicon IDs of all tokens that match ; the returned -# list may be empty (size 0); -CL_STRUC2CPOS = 0x140F -# INPUT: (STRING attribute, INT struc) -# OUTPUT: CQI_DATA_INT_INT -# returns start and end corpus positions of structure region -CL_ALG2CPOS = 0x1410 -# INPUT: (STRING attribute, INT alg) -# OUTPUT: CQI_DATA_INT_INT_INT_INT -# returns (src_start, src_end, target_start, target_end) - -""" 3.5 CQI_CQP_* """ -CQP = 0x15 -CQP_QUERY = 0x1501 -# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query) -# OUTPUT: CQI_STATUS_OK -# must include the ';' character terminating the query. -CQP_LIST_SUBCORPORA = 0x1502 -# INPUT: (STRING corpus) -# OUTPUT: CQI_DATA_STRING_LIST -CQP_SUBCORPUS_SIZE = 0x1503 -# INPUT: (STRING subcorpus) -# OUTPUT: CQI_DATA_INT -CQP_SUBCORPUS_HAS_FIELD = 0x1504 -# INPUT: (STRING subcorpus, BYTE field) -# OUTPUT: CQI_DATA_BOOL -CQP_DUMP_SUBCORPUS = 0x1505 -# INPUT: (STRING subcorpus, BYTE field, INT first, INT last) -# OUTPUT: CQI_DATA_INT_LIST -# Dump the values of for match ranges .. in . -# is one of the CQI_CONST_FIELD_* constants. -CQP_DROP_SUBCORPUS = 0x1509 -# INPUT: (STRING subcorpus) -# OUTPUT: CQI_STATUS_OK -# delete a subcorpus from memory -""" -" NOTE: The following two functions are temporarily included for the Euralex -" 2000 tutorial demo -""" -""" NOTE: frequency distribution of single tokens """ -CQP_FDIST_1 = 0x1510 -# INPUT: (STRING subcorpus, INT cutoff, BYTE field, STRING attribute) -# OUTPUT: CQI_DATA_INT_LIST -# returns (id, frequency) pairs flattened into a list of size 2* -# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET, -# CQI_CONST_FIELD_KEYWORD -# NB: pairs are sorted by frequency desc. -""" NOTE: frequency distribution of pairs of tokens """ -CQP_FDIST_2 = 0x1511 -# INPUT: (STRING subcorpus, INT cutoff, BYTE field1, STRING attribute1, -# BYTE field2, STRING attribute2) -# OUTPUT: CQI_DATA_INT_LIST -# returns (id1, id2, frequency) pairs flattened into a list of size 3* -# NB: triples are sorted by frequency desc. - - -""" 4. Constant Definitions """ -CONST_FALSE = 0x00 -CONST_NO = 0x00 -CONST_TRUE = 0x01 -CONST_YES = 0x01 -""" -" NOTE: The following constants specify which field will be returned by -" CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands. -""" -CONST_FIELD_MATCH = 0x10 -CONST_FIELD_MATCHEND = 0x11 -""" -" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the -" numerical values 0 .. 9, so clients do not need to look up the constant -" values if they're handling arbitrary targets. -""" -CONST_FIELD_TARGET_0 = 0x00 -CONST_FIELD_TARGET_1 = 0x01 -CONST_FIELD_TARGET_2 = 0x02 -CONST_FIELD_TARGET_3 = 0x03 -CONST_FIELD_TARGET_4 = 0x04 -CONST_FIELD_TARGET_5 = 0x05 -CONST_FIELD_TARGET_6 = 0x06 -CONST_FIELD_TARGET_7 = 0x07 -CONST_FIELD_TARGET_8 = 0x08 -CONST_FIELD_TARGET_9 = 0x09 -""" -" NOTE: The following constants are provided for backward compatibility with -" traditional CQP field names & while the generalised target concept -" isn't yet implemented in the CQPserver. -""" -CONST_FIELD_TARGET = 0x00 -CONST_FIELD_KEYWORD = 0x09 -""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """ -MAJOR_VERSION = 0x00 -MINOR_VERSION = 0x01 - - -""" 5. CQi lookup dictionary. """ -lookup = { - 257: 'CQI_STATUS_OK', - 258: 'CQI_STATUS_CONNECT_OK', - 259: 'CQI_STATUS_BYE_OK', - 260: 'CQI_STATUS_PING_OK', - 513: 'CQI_ERROR_GENERAL_ERROR', - 514: 'CQI_ERROR_CONNECT_REFUSED', - 515: 'CQI_ERROR_USER_ABORT', - 516: 'CQI_ERROR_SYNTAX_ERROR', - 769: 'CQI_DATA_BYTE', - 770: 'CQI_DATA_BOOL', - 771: 'CQI_DATA_INT', - 772: 'CQI_DATA_STRING', - 773: 'CQI_DATA_BYTE_LIST', - 774: 'CQI_DATA_BOOL_LIST', - 775: 'CQI_DATA_INT_LIST', - 776: 'CQI_DATA_STRING_LIST', - 777: 'CQI_DATA_INT_INT', - 778: 'CQI_DATA_INT_INT_INT_INT', - 779: 'CQI_DATA_INT_TABLE', - 1025: 'CQI_CL_ERROR_NO_SUCH_ATTRIBUTE', - 1026: 'CQI_CL_ERROR_WRONG_ATTRIBUTE_TYPE', - 1027: 'CQI_CL_ERROR_OUT_OF_RANGE', - 1028: 'CQI_CL_ERROR_REGEX', - 1029: 'CQI_CL_ERROR_CORPUS_ACCESS', - 1030: 'CQI_CL_ERROR_OUT_OF_MEMORY', - 1031: 'CQI_CL_ERROR_INTERNAL', - 1281: 'CQI_CQP_ERROR_GENERAL', - 1282: 'CQI_CQP_ERROR_NO_SUCH_CORPUS', - 1283: 'CQI_CQP_ERROR_INVALID_FIELD', - 1284: 'CQI_CQP_ERROR_OUT_OF_RANGE', - 4353: 'CQI_CTRL_CONNECT', - 4354: 'CQI_CTRL_BYE', - 4355: 'CQI_CTRL_USER_ABORT', - 4356: 'CQI_CTRL_PING', - 4357: 'CQI_CTRL_LAST_GENERAL_ERROR', - 4609: 'CQI_ASK_FEATURE_CQI_1_0', - 4610: 'CQI_ASK_FEATURE_CL_2_3', - 4611: 'CQI_ASK_FEATURE_CQP_2_3', - 4865: 'CQI_CORPUS_LIST_CORPORA', - 4867: 'CQI_CORPUS_CHARSET', - 4868: 'CQI_CORPUS_PROPERTIES', - 4869: 'CQI_CORPUS_POSITIONAL_ATTRIBUTES', - 4870: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTES', - 4871: 'CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES', - 4872: 'CQI_CORPUS_ALIGNMENT_ATTRIBUTES', - 4873: 'CQI_CORPUS_FULL_NAME', - 4874: 'CQI_CORPUS_INFO', - 4875: 'CQI_CORPUS_DROP_CORPUS', - 5121: 'CQI_CL_ATTRIBUTE_SIZE', - 5122: 'CQI_CL_LEXICON_SIZE', - 5123: 'CQI_CL_DROP_ATTRIBUTE', - 5124: 'CQI_CL_STR2ID', - 5125: 'CQI_CL_ID2STR', - 5126: 'CQI_CL_ID2FREQ', - 5127: 'CQI_CL_CPOS2ID', - 5128: 'CQI_CL_CPOS2STR', - 5129: 'CQI_CL_CPOS2STRUC', - 5130: 'CQI_CL_CPOS2ALG', - 5131: 'CQI_CL_STRUC2STR', - 5132: 'CQI_CL_ID2CPOS', - 5133: 'CQI_CL_IDLIST2CPOS', - 5134: 'CQI_CL_REGEX2ID', - 5135: 'CQI_CL_STRUC2CPOS', - 5136: 'CQI_CL_ALG2CPOS', - 5152: 'CQI_CL_CPOS2LBOUND', - 5153: 'CQI_CL_CPOS2RBOUND', - 5377: 'CQI_CQP_QUERY', - 5378: 'CQI_CQP_LIST_SUBCORPORA', - 5379: 'CQI_CQP_SUBCORPUS_SIZE', - 5380: 'CQI_CQP_SUBCORPUS_HAS_FIELD', - 5381: 'CQI_CQP_DUMP_SUBCORPUS', - 5385: 'CQI_CQP_DROP_SUBCORPUS', - 5392: 'CQI_CQP_FDIST_1', - 5393: 'CQI_CQP_FDIST_2' -} diff --git a/app/corpora/cqi/client.py b/app/corpora/cqi/client.py deleted file mode 100644 index 0fba29eb..00000000 --- a/app/corpora/cqi/client.py +++ /dev/null @@ -1,46 +0,0 @@ -from .api import APIClient -from .models.corpora import CorpusCollection - - -class CQiClient: - """ - A client for communicating with a CQi server. - - Example: - >>> import cqi - >>> client = cqi.CQiClient('127.0.0.1') - >>> client.connect() - {'code': 258, 'msg': 'CQI_STATUS_CONNECT_OK'} - >>> client.ping() - {'code': 260, 'msg': 'CQI_STATUS_PING_OK'} - >>> client.disconnect() - {'code': 259, 'msg': 'CQI_STATUS_BYE_OK'} - - Attributes: - api (APIClient): An API client pointing to the specified CQP server. - """ - - def __init__(self, host, port=4877): - """ - CQiClient constructor - - Args: - host (str): URL to the CQP server. For example, - ``cqpserver.localhost`` or ``127.0.0.1``. - port (int): Port the CQP server listens on. Default: ``4877`` - """ - self.api = APIClient(host, port=port) - - def connect(self, username='anonymous', password=''): - status = self.api.ctrl_connect(username, password) - return status - - def disconnect(self): - return self.api.ctrl_bye() - - def ping(self): - return self.api.ctrl_ping() - - @property - def corpora(self): - return CorpusCollection(client=self) diff --git a/app/corpora/cqi/errors.py b/app/corpora/cqi/errors.py deleted file mode 100644 index f2a8c9aa..00000000 --- a/app/corpora/cqi/errors.py +++ /dev/null @@ -1,180 +0,0 @@ -from .api import specification - - -class CQiException(Exception): - """ - A base class from which all other exceptions inherit. - If you want to catch all errors that the CQi package might raise, - catch this base exception. - """ - - def __init__(self, *args, **kwargs): - super(CQiException, self).__init__(*args, **kwargs) - self.code = None - self.name = None - self.description = None - - -class Error(CQiException): - def __init__(self, *args, **kwargs): - super(Error, self).__init__(*args, **kwargs) - self.code = specification.ERROR - - -class ErrorGeneralError(Error): - def __init__(self, *args, **kwargs): - super(ErrorGeneralError, self).__init__(*args, **kwargs) - self.code = specification.ERROR_GENERAL_ERROR - self.name = specification.lookup[self.code] - - -class ErrorConnectRefused(Error): - def __init__(self, *args, **kwargs): - super(ErrorConnectRefused, self).__init__(*args, **kwargs) - self.code = specification.ERROR_CONNECT_REFUSED - self.name = specification.lookup[self.code] - - -class ErrorUserAbort(Error): - def __init__(self, *args, **kwargs): - super(ErrorUserAbort, self).__init__(*args, **kwargs) - self.code = specification.ERROR_USER_ABORT - self.name = specification.lookup[self.code] - - -class ErrorSyntaxError(Error): - def __init__(self, *args, **kwargs): - super(ErrorSyntaxError, self).__init__(*args, **kwargs) - self.code = specification.ERROR_SYNTAX_ERROR - self.name = specification.lookup[self.code] - - -class CLError(CQiException): - def __init__(self, *args, **kwargs): - super(CLError, self).__init__(*args, **kwargs) - self.code = specification.CL_ERROR - - -class CLErrorNoSuchAttribute(CLError): - def __init__(self, *args, **kwargs): - super(CLErrorNoSuchAttribute, self).__init__(*args, **kwargs) - self.code = specification.CL_ERROR_NO_SUCH_ATTRIBUTE - self.name = specification.lookup[self.code] - self.description = "CQi server couldn't open attribute" - - -class CLErrorWrongAttributeType(CLError): - # CDA_EATTTYPE - def __init__(self, *args, **kwargs): - super(CLErrorWrongAttributeType, self).__init__(*args, **kwargs) - self.code = specification.CL_ERROR_WRONG_ATTRIBUTE_TYPE - self.name = specification.lookup[self.code] - - -class CLErrorOutOfRange(CLError): - # CDA_EIDORNG, CDA_EIDXORNG, CDA_EPOSORNG - def __init__(self, *args, **kwargs): - super(CLErrorOutOfRange, self).__init__(*args, **kwargs) - self.code = specification.CL_ERROR_OUT_OF_RANGE - self.name = specification.lookup[self.code] - - -class CLErrorRegex(CLError): - # CDA_EPATTERN (not used), CDA_EBADREGEX - def __init__(self, *args, **kwargs): - super(CLErrorRegex, self).__init__(*args, **kwargs) - self.code = specification.CL_ERROR_REGEX - self.name = specification.lookup[self.code] - - -class CLErrorCorpusAccess(CLError): - # CDA_ENODATA - def __init__(self, *args, **kwargs): - super(CLErrorCorpusAccess, self).__init__(*args, **kwargs) - self.code = specification.CL_ERROR_CORPUS_ACCESS - self.name = specification.lookup[self.code] - - -class CLErrorOutOfMemory(CLError): - # CDA_ENOMEM - def __init__(self, *args, **kwargs): - super(CLErrorOutOfMemory, self).__init__(*args, **kwargs) - self.code = specification.CL_ERROR_OUT_OF_MEMORY - self.name = specification.lookup[self.code] - self.description = ('CQi server has run out of memory; try discarding ' - 'some other corpora and/or subcorpora') - - -class CLErrorInternal(CLError): - # CDA_EOTHER, CDA_ENYI - def __init__(self, *args, **kwargs): - super(CLErrorInternal, self).__init__(*args, **kwargs) - self.code = specification.CL_ERROR_INTERNAL - self.name = specification.lookup[self.code] - self.description = "Classical 'please contact technical support' error" - - -class CQPError(CQiException): - # CQP error messages yet to be defined - def __init__(self, *args, **kwargs): - super(CQPError, self).__init__(*args, **kwargs) - self.code = specification.CQP_ERROR - - -class CQPErrorGeneral(CQPError): - def __init__(self, *args, **kwargs): - super(CQPErrorGeneral, self).__init__(*args, **kwargs) - self.code = specification.CQP_ERROR_GENERAL - self.name = specification.lookup[self.code] - - -class CQPErrorNoSuchCorpus(CQPError): - def __init__(self, *args, **kwargs): - super(CQPErrorNoSuchCorpus, self).__init__(*args, **kwargs) - self.code = specification.CQP_ERROR_NO_SUCH_CORPUS - self.name = specification.lookup[self.code] - - -class CQPErrorInvalidField(CQPError): - def __init__(self, *args, **kwargs): - super(CQPErrorInvalidField, self).__init__(*args, **kwargs) - self.code = specification.CQP_ERROR_INVALID_FIELD - self.name = specification.lookup[self.code] - - -class CQPErrorOutOfRange(CQPError): - def __init__(self, *args, **kwargs): - super(CQPErrorOutOfRange, self).__init__(*args, **kwargs) - self.code = specification.CQP_ERROR_OUT_OF_RANGE - self.name = specification.lookup[self.code] - self.description = 'A number is out of range' - - -error_lookup = { - specification.ERROR: Error, - specification.ERROR_GENERAL_ERROR: ErrorGeneralError, - specification.ERROR_CONNECT_REFUSED: ErrorConnectRefused, - specification.ERROR_USER_ABORT: ErrorUserAbort, - specification.ERROR_SYNTAX_ERROR: ErrorSyntaxError -} - - -cl_error_lookup = { - specification.CL_ERROR: CLError, - specification.CL_ERROR_NO_SUCH_ATTRIBUTE: CLErrorNoSuchAttribute, - specification.CL_ERROR_WRONG_ATTRIBUTE_TYPE: CLErrorWrongAttributeType, - specification.CL_ERROR_OUT_OF_RANGE: CLErrorOutOfRange, - specification.CL_ERROR_REGEX: CLErrorRegex, - specification.CL_ERROR_CORPUS_ACCESS: CLErrorCorpusAccess, - specification.CL_ERROR_OUT_OF_MEMORY: CLErrorOutOfMemory, - specification.CL_ERROR_INTERNAL: CLErrorInternal -} - - -cqp_error_lookup = { - specification.CQP_ERROR: CQPError, - specification.CQP_ERROR_GENERAL: CQPErrorGeneral, - specification.CQP_ERROR_NO_SUCH_CORPUS: CQPErrorNoSuchCorpus, - specification.CQP_ERROR_INVALID_FIELD: CQPErrorInvalidField, - specification.CQP_ERROR_OUT_OF_RANGE: CQPErrorOutOfRange -} diff --git a/app/corpora/cqi/models/__init__.py b/app/corpora/cqi/models/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/app/corpora/cqi/models/attributes.py b/app/corpora/cqi/models/attributes.py deleted file mode 100644 index 58aa1363..00000000 --- a/app/corpora/cqi/models/attributes.py +++ /dev/null @@ -1,162 +0,0 @@ -from .ressource import Collection, Model - - -class Attribute(Model): - """ - This is a class representing an attribute. Attributes denote the general - category of information. A specific occurence is identified by an Id. - """ - - id_attribute = 'api_name' - - @staticmethod - def _attrs(client, corpus, name): - api_name = '{}.{}'.format(corpus.attrs['api_name'], name) - return {'api_name': api_name, - 'name': name, - 'size': client.api.cl_attribute_size(api_name)} - - def drop(self): - return self.client.api.cl_drop_attribute(self.attrs['api_name']) - - -class AttributeCollection(Collection): - model = Attribute - - def __init__(self, client=None, corpus=None): - super(AttributeCollection, self).__init__(client=client) - self.corpus = corpus - - def get(self, attribute_name): - return self.prepare_model(self.model._attrs(self.client, self.corpus, - attribute_name)) - - def list(self): - raise NotImplementedError - - -class AlignmentAttribute(Attribute): - def cpos_by_ids(self, id_list): - return self.client.api.cl_alg2cpos(self.attrs['api_name'], id_list) - - def ids_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2alg(self.attrs['api_name'], cpos_list) - - -class AlignmentAttributeCollection(AttributeCollection): - model = AlignmentAttribute - - def list(self): - return [self.prepare_model(self.model._attrs(self.client, self.corpus, attr)) # noqa - for attr in self.client.api.corpus_alignment_attributes(self.corpus.attrs['api_name'])] # noqa - - -class PositionalAttribute(Attribute): - @staticmethod - def _attrs(client, corpus, name): - attrs = super(PositionalAttribute, PositionalAttribute)._attrs(client, corpus, name) # noqa - attrs['lexicon_size'] = client.api.cl_lexicon_size(attrs['api_name']) - return attrs - - def cpos_by_id(self, id): - return self.client.api.cl_id2cpos(self.attrs['api_name'], id) - - def cpos_by_ids(self, id_list): - return self.client.api.cl_idlist2cpos(self.attrs['api_name'], id_list) - - def freqs_by_ids(self, id_list): - return self.client.api.cl_id2freq(self.attrs['api_name'], id_list) - - def ids_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2id(self.attrs['api_name'], cpos_list) - - def ids_by_regex(self, regex): - return self.client.api.cl_regex2id(self.attrs['api_name'], regex) - - def ids_by_values(self, value_list): - return self.client.api.cl_str2id(self.attrs['api_name'], value_list) - - def values_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2str(self.attrs['api_name'], cpos_list) - - def values_by_ids(self, id_list): - return self.client.api.cl_id2str(self.attrs['api_name'], id_list) - - -class PositionalAttributeCollection(AttributeCollection): - model = PositionalAttribute - - def list(self): - return [self.prepare_model(self.model._attrs(self.client, self.corpus, attr)) # noqa - for attr in self.client.api.corpus_positional_attributes(self.corpus.attrs['api_name'])] # noqa - - -class StructuralAttribute(Attribute): - @staticmethod - def _attrs(client, corpus, name): - attrs = super(StructuralAttribute, StructuralAttribute)._attrs(client, corpus, name) # noqa - attrs['has_values'] = client.api.corpus_structural_attribute_has_values(attrs['api_name']) # noqa - return attrs - - def cpos_by_id(self, id): - return self.client.api.cl_struc2cpos(self.attrs['api_name'], id) - - def ids_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2struc(self.attrs['api_name'], cpos_list) - - def lbound_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2lbound(self.attrs['api_name'], - cpos_list) - - def rbound_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2rbound(self.attrs['api_name'], - cpos_list) - - def values_by_ids(self, id_list): - return self.client.api.cl_struc2str(self.attrs['api_name'], id_list) - - def export(self, first_cpos, last_cpos, context=0, expand_lists=False): - first_id, last_id = self.ids_by_cpos([first_cpos, last_cpos]) - c = (first_cpos, last_cpos) - lc = rc = None - if context == 0: - cpos_list = list(range(first_cpos, (last_cpos + 1))) - else: - lc_lbound = self.cpos_by_id(max(0, (first_id - context)))[0] - if lc_lbound != first_cpos: - lc_rbound = max(0, (first_cpos - 1)) - lc = (lc_lbound, lc_rbound) - cpos_list_lbound = lc_lbound - else: - cpos_list_lbound = first_cpos - rc_rbound = \ - self.cpos_by_id(min((last_id + context), - (self.attrs['size'] - 1)))[1] - if rc_rbound != last_cpos: - rc_lbound = min((last_cpos + 1), - (self.collection.corpus.attrs['size'] - 1)) - rc = (rc_lbound, rc_rbound) - cpos_list_rbound = rc_rbound - else: - cpos_list_rbound = last_cpos - cpos_list = list(range(cpos_list_lbound, (cpos_list_rbound + 1))) - if expand_lists: - match = {'lc': list(range(lc[0], (lc[1] + 1))), - 'c': list(range(c[0], (c[1] + 1))), - 'rc': list(range(rc[0], (rc[1] + 1)))} - else: - match = {'lc': lc, 'c': c, 'rc': rc} - lookups = self.collection.corpus.lookups_by_cpos(cpos_list) - return {'match': match, **lookups} - - -class StructuralAttributeCollection(AttributeCollection): - model = StructuralAttribute - - def list(self, filters={}): - attrs = [self.prepare_model(self.model._attrs(self.client, self.corpus, attr)) # noqa - for attr in self.client.api.corpus_structural_attributes(self.corpus.attrs['api_name'])] # noqa - for k, v in filters.items(): - if k == 'part_of': - attrs = list(filter(lambda x: x.attrs['name'].startswith(v.attrs['name'] + '_'), attrs)) # noqa - return attrs diff --git a/app/corpora/cqi/models/corpora.py b/app/corpora/cqi/models/corpora.py deleted file mode 100644 index 8c282794..00000000 --- a/app/corpora/cqi/models/corpora.py +++ /dev/null @@ -1,96 +0,0 @@ -from .attributes import (AlignmentAttributeCollection, - PositionalAttributeCollection, - StructuralAttributeCollection) -from .ressource import Collection, Model -from .subcorpora import SubcorpusCollection - - -class Corpus(Model): - id_attribute = 'api_name' - - @staticmethod - def _attrs(client, name): - api_name = name - return {'api_name': api_name, - 'name': name, - 'size': client.api.cl_attribute_size( - '{}.word'.format(api_name)), - # 'info': client.api.corpus_info(name), - 'charset': client.api.corpus_charset(api_name), - # 'full_name' = client.api.corpus_full_name(name), - 'properties': client.api.corpus_properties(api_name)} - - def lookups_by_cpos(self, cpos_list): - cpos_list = list(set(cpos_list)) - lookups = {} - if cpos_list: - lookups['cpos_lookup'] = {} - for cpos in cpos_list: - lookups['cpos_lookup'][cpos] = {} - for attr in self.positional_attributes.list(): - cpos_attr_values = attr.values_by_cpos(cpos_list) - for i, cpos in enumerate(cpos_list): - lookups['cpos_lookup'][cpos][attr.attrs['name']] = \ - cpos_attr_values[i] - for attr in self.structural_attributes.list(): - if attr.attrs['has_values']: - continue - cpos_attr_ids = attr.ids_by_cpos(cpos_list) - for i, cpos in enumerate(cpos_list): - if cpos_attr_ids[i] != -1: - lookups['cpos_lookup'][cpos][attr.attrs['name']] = \ - cpos_attr_ids[i] - occured_attr_ids = list(filter(lambda x: x != -1, - set(cpos_attr_ids))) - if not occured_attr_ids: - continue - subattrs = \ - self.structural_attributes.list(filters={'part_of': attr}) - if not subattrs: - continue - lookup_name = '{}_lookup'.format(attr.attrs['name']) - lookups[lookup_name] = {} - for attr_id in occured_attr_ids: - lookups[lookup_name][attr_id] = {} - for subattr in subattrs: - subattr_values = subattr.values_by_ids(occured_attr_ids) - for i, subattr_value in enumerate(subattr_values): - subattr_name = \ - subattr.attrs['name'][(len(attr.attrs['name']) + 1):] - lookups[lookup_name][occured_attr_ids[i]][subattr_name] = \ - subattr_value - return lookups - - def drop(self): - return self.client.api.corpus_drop_corpus(self.attrs['api_name']) - - def query(self, query, subcorpus_name='Results'): - return self.client.api.cqp_query(self.attrs['api_name'], - subcorpus_name, query) - - @property - def alignment_attributes(self): - return AlignmentAttributeCollection(client=self.client, corpus=self) - - @property - def positional_attributes(self): - return PositionalAttributeCollection(client=self.client, corpus=self) - - @property - def structural_attributes(self): - return StructuralAttributeCollection(client=self.client, corpus=self) - - @property - def subcorpora(self): - return SubcorpusCollection(client=self.client, corpus=self) - - -class CorpusCollection(Collection): - model = Corpus - - def get(self, corpus_name): - return self.prepare_model(self.model._attrs(self.client, corpus_name)) - - def list(self): - return [self.prepare_model(self.model._attrs(self.client, corpus)) - for corpus in self.client.api.corpus_list_coprora()] diff --git a/app/corpora/cqi/models/ressource.py b/app/corpora/cqi/models/ressource.py deleted file mode 100644 index 449bc93b..00000000 --- a/app/corpora/cqi/models/ressource.py +++ /dev/null @@ -1,78 +0,0 @@ -class Model: - """ - A base class for representing a single object on the server. - """ - id_attribute = 'Id' - - def __init__(self, attrs=None, client=None, collection=None): - #: A client pointing at the server that this object is on. - self.client = client - - #: The collection that this model is part of. - self.collection = collection - - #: The raw representation of this object from the API - self.attrs = attrs or {} - - @staticmethod - def _attrs(client, key): - raise NotImplementedError - - def __repr__(self): - return "<{}: {}>".format(self.__class__.__name__, self.id) - - def __eq__(self, other): - return isinstance(other, self.__class__) and self.id == other.id - - def __hash__(self): - return hash("{}:{}".format(self.__class__.__name__, self.id)) - - @property - def id(self): - """ - The ID of the object. - """ - return self.attrs.get(self.id_attribute) - - def reload(self): - """ - Load this object from the server again and update ``attrs`` with the - new data. - """ - new_model = self.collection.get(self.id) - self.attrs = new_model.attrs - - -class Collection: - """ - A base class for representing all objects of a particular type on the - server. - """ - - #: The type of object this collection represents, set by subclasses - model = None - - def __init__(self, client=None): - #: The client pointing at the server that this collection of objects - #: is on. - self.client = client - - def list(self): - raise NotImplementedError - - def get(self, key): - raise NotImplementedError - - def prepare_model(self, attrs): - """ - Create a model from a set of attributes. - """ - if isinstance(attrs, Model): - attrs.client = self.client - attrs.collection = self - return attrs - elif isinstance(attrs, dict): - return self.model(attrs=attrs, client=self.client, collection=self) - else: - raise Exception("Can't create {} from {}".format( - self.model.__name__, attrs)) diff --git a/app/corpora/cqi/models/subcorpora.py b/app/corpora/cqi/models/subcorpora.py deleted file mode 100644 index a97462db..00000000 --- a/app/corpora/cqi/models/subcorpora.py +++ /dev/null @@ -1,103 +0,0 @@ -from .ressource import Collection, Model -from ..api.specification import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH, - CONST_FIELD_MATCHEND, CONST_FIELD_TARGET) - - -class Subcorpus(Model): - id_attribute = 'api_name' - - @staticmethod - def _attrs(client, corpus, name): - api_name = '{}:{}'.format(corpus.attrs['api_name'], name) - fields = {} - if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_MATCH): - fields['match'] = CONST_FIELD_MATCH - if client.api.cqp_subcorpus_has_field(api_name, - CONST_FIELD_MATCHEND): - fields['matchend'] = CONST_FIELD_MATCHEND - if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_TARGET): - fields['target'] = CONST_FIELD_TARGET - if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_KEYWORD): - fields['keyword'] = CONST_FIELD_KEYWORD - return {'api_name': api_name, - 'name': name, - 'fields': fields, - 'size': client.api.cqp_subcorpus_size(api_name)} - - def drop(self): - return self.client.api.cqp_drop_subcorpus(self.attrs['api_name']) - - def dump(self, field, first, last): - return self.client.api.cqp_dump_subcorpus(self.attrs['api_name'], - field, first, last) - - def export(self, context=25, cutoff=float('inf'), expand_lists=False, - offset=0): - if self.attrs['size'] == 0: - return {"matches": []} - first_match = max(0, offset) - last_match = min((offset + cutoff - 1), (self.attrs['size'] - 1)) - match_boundaries = zip(self.dump(self.attrs['fields']['match'], - first_match, last_match), - self.dump(self.attrs['fields']['matchend'], - first_match, last_match)) - cpos_list = [] - matches = [] - for match_start, match_end in match_boundaries: - c = (match_start, match_end) - lc = rc = None - if context == 0: - cpos_list += list(range(match_start, (match_end + 1))) - else: - lc_rbound = max(0, (match_start - 1)) - if lc_rbound != match_start: - lc_lbound = max(0, (match_start - context)) - lc = (lc_lbound, lc_rbound) - cpos_list_lbound = lc_lbound - else: - cpos_list_lbound = match_start - rc_lbound = min((match_end + 1), - (self.collection.corpus.attrs['size'] - 1)) - if rc_lbound != match_end: - rc_rbound = min((match_end + context), - (self.collection.corpus.attrs['size'] - 1)) - rc = (rc_lbound, rc_rbound) - cpos_list_rbound = rc_rbound - else: - cpos_list_rbound = match_end - cpos_list += list(range(cpos_list_lbound, - (cpos_list_rbound + 1))) - if expand_lists: - match = {'lc': list(range(lc[0], (lc[1] + 1))), - 'c': list(range(c[0], (c[1] + 1))), - 'rc': list(range(rc[0], (rc[1] + 1)))} - else: - match = {'lc': lc, 'c': c, 'rc': rc} - matches.append(match) - lookups = self.collection.corpus.lookups_by_cpos(cpos_list) - return {'matches': matches, **lookups} - - def fdist_1(self, cutoff, field, attribute): - return self.client.api.cqp_fdist_1(self.attrs['api_name'], cutoff, - field, attribute._name) - - def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2): - return self.client.api.cqp_fdist_2(self.attrs['api_name'], cutoff, - field_1, attribute_1._name, - field_2, attribute_2._name) - - -class SubcorpusCollection(Collection): - model = Subcorpus - - def __init__(self, client=None, corpus=None): - super(SubcorpusCollection, self).__init__(client=client) - self.corpus = corpus - - def get(self, subcorpus_name): - return self.prepare_model(self.model._attrs(self.client, self.corpus, - subcorpus_name)) - - def list(self): - return [self.prepare_model(self.model._attrs(self.client, self.corpus, subcorpus)) # noqa - for subcorpus in self.client.api.cqp_list_subcorpora(self.corpus.attrs['api_name'])] # noqa diff --git a/app/corpora/cqi/version.py b/app/corpora/cqi/version.py deleted file mode 100644 index e9c2235f..00000000 --- a/app/corpora/cqi/version.py +++ /dev/null @@ -1,5 +0,0 @@ -from .api.specification import MAJOR_VERSION, MINOR_VERSION - - -version = '{}.{}'.format(MAJOR_VERSION, MINOR_VERSION) -version_info = (MAJOR_VERSION, MINOR_VERSION) diff --git a/app/corpora/events.py b/app/corpora/events.py index 0d973dee..f8597a67 100644 --- a/app/corpora/events.py +++ b/app/corpora/events.py @@ -1,11 +1,11 @@ from flask import current_app, request from flask_login import current_user from socket import gaierror -from . import cqi from .. import db, socketio from ..decorators import socketio_login_required from ..events import connected_sessions from ..models import Corpus, User +import cqi import math diff --git a/requirements.txt b/requirements.txt index 7fb9990b..ef0254a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +cqi email_validator eventlet Flask