From 416c724f70b371184626909057e01baa1ef85732 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Thu, 12 Mar 2020 14:13:07 +0100
Subject: [PATCH] Simplify
---
app/corpora/CQiWrapper/CQi.py | 66 ++------------------
app/corpora/events.py | 44 ++++---------
app/templates/corpora/analyse_corpus.html.j2 | 2 +-
3 files changed, 20 insertions(+), 92 deletions(-)
diff --git a/app/corpora/CQiWrapper/CQi.py b/app/corpora/CQiWrapper/CQi.py
index 306e6732..7fcc8c4f 100644
--- a/app/corpora/CQiWrapper/CQi.py
+++ b/app/corpora/CQiWrapper/CQi.py
@@ -6,10 +6,9 @@
# Modified by: Patrick Jentsch #
# ########################################################################### #
-from app import logger
+from time import sleep
import socket
import struct
-import time
""" 1. padding """
@@ -420,7 +419,6 @@ class Client:
def ctrl_connect(self, username, password):
# INPUT: (STRING username, STRING password)
# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
- # print('CTRL_CONNECT')
self.__send_WORD(CTRL_CONNECT)
self.__send_STRING(username)
self.__send_STRING(password)
@@ -429,20 +427,17 @@ class Client:
def ctrl_bye(self):
# INPUT: ()
# OUTPUT: CQI_STATUS_BYE_OK
- # print('CTRL_BYE')
self.__send_WORD(CTRL_BYE)
return self.__recv_response()
def ctrl_user_abort(self):
# INPUT: ()
# OUTPUT:
- # print('CTRL_USER_ABORT')
self.__send_WORD(CTRL_USER_ABORT)
def ctrl_ping(self):
# INPUT: ()
# OUTPUT: CQI_STATUS_PING_OK
- # print('CTRL_PING')
self.__send_WORD(CTRL_PING)
return self.__recv_response()
@@ -451,42 +446,36 @@ class Client:
# OUTPUT: CQI_DATA_STRING
# full-text error message for the last general error reported by the
# CQi server
- # print('CTRL_LAST_GENERAL_ERROR')
self.__send_WORD(CTRL_LAST_GENERAL_ERROR)
return self.__recv_response()
def ask_feature_cqi_1_0(self):
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
- # print('ASK_FEATURE_CQI_1_0')
self.__send_WORD(ASK_FEATURE_CQI_1_0)
return self.__recv_response()
def ask_feature_cl_2_3(self):
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
- # print('ASK_FEATURE_CL_2_3')
self.__send_WORD(ASK_FEATURE_CL_2_3)
return self.__recv_response()
def ask_feature_cqp_2_3(self):
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
- # print('ASK_FEATURE_CL_2_3')
self.__send_WORD(ASK_FEATURE_CL_2_3)
return self.__recv_response()
def corpus_list_coprora(self):
# INPUT: ()
# OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_LIST_CORPORA')
self.__send_WORD(CORPUS_LIST_CORPORA)
return self.__recv_response()
def corpus_charset(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING
- # print('CORPUS_CHARSET')
self.__send_WORD(CORPUS_CHARSET)
self.__send_STRING(corpus)
return self.__recv_response()
@@ -494,7 +483,6 @@ class Client:
def corpus_properties(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_PROPERTIES')
self.__send_WORD(CORPUS_PROPERTIES)
self.__send_STRING(corpus)
return self.__recv_response()
@@ -502,7 +490,6 @@ class Client:
def corpus_positional_attributes(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_POSITIONAL_ATTRIBUTES')
self.__send_WORD(CORPUS_POSITIONAL_ATTRIBUTES)
self.__send_STRING(corpus)
return self.__recv_response()
@@ -510,7 +497,6 @@ class Client:
def corpus_structural_attributes(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_STRUCTURAL_ATTRIBUTES')
self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTES)
self.__send_STRING(corpus)
return self.__recv_response()
@@ -518,7 +504,6 @@ class Client:
def corpus_structural_attribute_has_values(self, attribute):
# INPUT: (STRING attribute)
# OUTPUT: CQI_DATA_BOOL
- # print('CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES')
self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
self.__send_STRING(attribute)
return self.__recv_response()
@@ -526,7 +511,6 @@ class Client:
def corpus_alignment_attributes(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
- # print('CORPUS_ALIGNMENT_ATTRIBUTES')
self.__send_WORD(CORPUS_ALIGNMENT_ATTRIBUTES)
self.__send_STRING(corpus)
return self.__recv_response()
@@ -535,7 +519,6 @@ class Client:
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING
# the full name of as specified in its registry entry
- # print('CORPUS_FULL_NAME')
self.__send_WORD(CORPUS_FULL_NAME)
self.__send_STRING(corpus)
return self.__recv_response()
@@ -544,20 +527,14 @@ class Client:
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# returns the contents of the .info file of as a list of lines
- # print('CORPUS_INFO')
self.__send_WORD(CORPUS_INFO)
self.__send_STRING(corpus)
return self.__recv_response()
def corpus_drop_corpus(self, corpus):
- '''
- ' Broken
- ' TODO: Check what type of return value is provided by the server.
- '''
# INPUT: (STRING corpus)
# OUTPUT: CQI_STATUS_OK
# try to unload a corpus and all its attributes from memory
- # print('CORPUS_DROP_CORPUS')
self.__send_WORD(CORPUS_DROP_CORPUS)
self.__send_STRING(corpus)
return self.__recv_response()
@@ -569,7 +546,6 @@ class Client:
# number of tokens (positional)
# number of regions (structural)
# number of alignments (alignment)
- # print('CL_ATTRIBUTE_SIZE')
self.__send_WORD(CL_ATTRIBUTE_SIZE)
self.__send_STRING(attribute)
return self.__recv_response()
@@ -580,7 +556,6 @@ class Client:
# returns the number of entries in the lexicon of a positional
# attribute;
# valid lexicon IDs range from 0 .. (lexicon_size - 1)
- # print('CL_LEXICON_SIZE')
self.__send_WORD(CL_LEXICON_SIZE)
self.__send_STRING(attribute)
return self.__recv_response()
@@ -589,7 +564,6 @@ class Client:
# INPUT: (STRING attribute)
# OUTPUT: CQI_STATUS_OK
# unload attribute from memory
- # print('CL_DROP_ATTRIBUTE')
self.__send_WORD(CL_LEXICON_SIZE)
self.__send_STRING(attribute)
return self.__recv_response()
@@ -604,7 +578,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every string in that is not found in the
# lexicon
- # print('CL_STR2ID')
self.__send_WORD(CL_LEXICON_SIZE)
self.__send_STRING(attribute)
self.__send_STRING_LIST(strings)
@@ -614,7 +587,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST id)
# OUTPUT: CQI_DATA_STRING_LIST
# returns "" for every ID in that is out of range
- # print('CL_ID2STR')
self.__send_WORD(CL_ID2STR)
self.__send_STRING(attribute)
self.__send_INT_LIST(id)
@@ -624,7 +596,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST id)
# OUTPUT: CQI_DATA_INT_LIST
# returns 0 for every ID in that is out of range
- # print('CL_ID2FREQ')
self.__send_WORD(CL_ID2FREQ)
self.__send_STRING(attribute)
self.__send_INT_LIST(id)
@@ -634,7 +605,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position in that is out of range
- # print('CL_CPOS2ID')
self.__send_WORD(CL_ID2FREQ)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@@ -644,7 +614,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_STRING_LIST
# returns "" for every corpus position in that is out of range
- # print('CL_CPOS2STR')
self.__send_WORD(CL_CPOS2STR)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@@ -654,7 +623,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position not inside a structure region
- # print('CL_CPOS2STRUC')
self.__send_WORD(CL_CPOS2STRUC)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@@ -670,7 +638,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# returns left boundary of s-attribute region enclosing cpos, -1 if not
# in region
- # print('CL_CPOS2LBOUND')
self.__send_WORD(CL_CPOS2LBOUND)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@@ -681,7 +648,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# returns right boundary of s-attribute region enclosing cpos, -1 if
# not in region
- # print('CL_CPOS2RBOUND')
self.__send_WORD(CL_CPOS2RBOUND)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@@ -691,7 +657,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position not inside an alignment
- # print('CL_CPOS2ALG')
self.__send_WORD(CL_CPOS2ALG)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@@ -703,7 +668,6 @@ class Client:
# returns annotated string values of structure regions in ; ""
# if out of range
# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES() first
- # print('CL_STRUC2STR')
self.__send_WORD(CL_STRUC2STR)
self.__send_STRING(attribute)
self.__send_INT_LIST(strucs)
@@ -718,7 +682,6 @@ class Client:
# INPUT: (STRING attribute, INT id)
# OUTPUT: CQI_DATA_INT_LIST
# returns all corpus positions where the given token occurs
- # print('CL_ID2CPOS')
self.__send_WORD(CL_ID2CPOS)
self.__send_STRING(attribute)
self.__send_INT(id)
@@ -729,7 +692,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# returns all corpus positions where one of the tokens in
# occurs; the returned list is sorted as a whole, not per token id
- # print('CL_IDLIST2CPOS')
self.__send_WORD(CL_IDLIST2CPOS)
self.__send_STRING(attribute)
self.__send_INT_LIST(id_list)
@@ -740,7 +702,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# returns lexicon IDs of all tokens that match ; the returned
# list may be empty (size 0);
- # print('CL_REGEX2ID')
self.__send_WORD(CL_REGEX2ID)
self.__send_STRING(attribute)
self.__send_STRING(regex)
@@ -750,7 +711,6 @@ class Client:
# INPUT: (STRING attribute, INT struc)
# OUTPUT: CQI_DATA_INT_INT
# returns start and end corpus positions of structure region
- # print('CL_STRUC2CPOS')
self.__send_WORD(CL_STRUC2CPOS)
self.__send_STRING(attribute)
self.__send_INT(struc)
@@ -760,7 +720,6 @@ class Client:
# INPUT: (STRING attribute, INT alg)
# OUTPUT: CQI_DATA_INT_INT_INT_INT
# returns (src_start, src_end, target_start, target_end)
- # print('CL_ALG2CPOS')
self.__send_WORD(CL_ALG2CPOS)
self.__send_STRING(attribute)
self.__send_INT(alg)
@@ -770,7 +729,6 @@ class Client:
# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
# OUTPUT: CQI_STATUS_OK
# must include the ';' character terminating the query.
- # print('CQP_QUERY')
self.__send_WORD(CQP_QUERY)
self.__send_STRING(mother_corpus)
self.__send_STRING(subcorpus_name)
@@ -780,7 +738,6 @@ class Client:
def cqp_list_subcorpora(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
- # print('CQP_LIST_SUBCORPORA')
self.__send_WORD(CQP_LIST_SUBCORPORA)
self.__send_STRING(corpus)
return self.__recv_response()
@@ -788,7 +745,6 @@ class Client:
def cqp_subcorpus_size(self, subcorpus):
# INPUT: (STRING subcorpus)
# OUTPUT: CQI_DATA_INT
- # print('CQP_SUBCORPUS_SIZE')
self.__send_WORD(CQP_SUBCORPUS_SIZE)
self.__send_STRING(subcorpus)
return self.__recv_response()
@@ -796,7 +752,6 @@ class Client:
def cqp_subcorpus_has_field(self, subcorpus, field):
# INPUT: (STRING subcorpus, BYTE field)
# OUTPUT: CQI_DATA_BOOL
- # print('CQP_SUBCORPUS_HAS_FIELD')
self.__send_WORD(CQP_SUBCORPUS_HAS_FIELD)
self.__send_STRING(subcorpus)
self.__send_BYTE(field)
@@ -807,7 +762,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# Dump the values of for match ranges ..
# in . is one of the CQI_CONST_FIELD_* constants.
- # print('CQP_DUMP_SUBCORPUS')
self.__send_WORD(CQP_DUMP_SUBCORPUS)
self.__send_STRING(subcorpus)
self.__send_BYTE(field)
@@ -819,7 +773,6 @@ class Client:
# INPUT: (STRING subcorpus)
# OUTPUT: CQI_STATUS_OK
# delete a subcorpus from memory
- # print('CQP_DROP_SUBCORPUS')
self.__send_WORD(CQP_DROP_SUBCORPUS)
self.__send_STRING(subcorpus)
return self.__recv_response()
@@ -837,7 +790,6 @@ class Client:
# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
# CQI_CONST_FIELD_KEYWORD
# NB: pairs are sorted by frequency desc.
- # print('CQP_FDIST_1')
self.__send_WORD(CQP_FDIST_1)
self.__send_STRING(subcorpus)
self.__send_INT(cutoff)
@@ -854,7 +806,6 @@ class Client:
# returns (id1, id2, frequency) pairs flattened into a list of size
# 3*
# NB: triples are sorted by frequency desc.
- # print('CQP_FDIST_2')
self.__send_WORD(CQP_FDIST_2)
self.__send_STRING(subcorpus)
self.__send_INT(cutoff)
@@ -914,8 +865,7 @@ class Client:
if (len(self.connection.recv(1, socket.MSG_PEEK)) == 1):
byte_data = self.connection.recv(1)
break
- logger.warning('Waiting for data transfer to complete...')
- time.sleep(0.1)
+ sleep(0.1)
return struct.unpack('!B', byte_data)[0]
def __recv_DATA_BOOL(self):
@@ -923,8 +873,7 @@ class Client:
if (len(self.connection.recv(1, socket.MSG_PEEK)) == 1):
byte_data = self.connection.recv(1)
break
- logger.warning('Waiting for data transfer to complete...')
- time.sleep(0.1)
+ sleep(0.1)
return struct.unpack('!?', byte_data)[0]
def __recv_DATA_INT(self):
@@ -932,8 +881,7 @@ class Client:
if (len(self.connection.recv(4, socket.MSG_PEEK)) == 4):
byte_data = self.connection.recv(4)
break
- logger.warning('Waiting for data transfer to complete...')
- time.sleep(0.1)
+ sleep(0.1)
return struct.unpack('!i', byte_data)[0]
def __recv_DATA_STRING(self):
@@ -942,8 +890,7 @@ class Client:
if (len(self.connection.recv(n, socket.MSG_PEEK)) == n):
byte_data = self.connection.recv(n)
break
- logger.warning('Waiting for data transfer to complete...')
- time.sleep(0.1)
+ sleep(0.1)
return struct.unpack('!{}s'.format(n), byte_data)[0].decode()
def __recv_DATA_BYTE_LIST(self):
@@ -1003,8 +950,7 @@ class Client:
if (len(self.connection.recv(2, socket.MSG_PEEK)) == 2):
byte_data = self.connection.recv(2)
break
- logger.warning('Waiting for data transfer to complete...')
- time.sleep(0.1)
+ sleep(0.1)
return struct.unpack('!H', byte_data)[0]
def __send_BYTE(self, byte_data):
diff --git a/app/corpora/events.py b/app/corpora/events.py
index 190157b1..8449ad08 100644
--- a/app/corpora/events.py
+++ b/app/corpora/events.py
@@ -26,51 +26,33 @@ def init_corpus_analysis(corpus_id):
corpus_id, current_user.id, request.sid)
-@socketio.on('corpus_analysis')
+@socketio.on('corpus_analysis_query')
@login_required
-def corpus_analysis(message):
+def corpus_analysis_query(query):
client = corpus_analysis_clients.get(request.sid)
if client is None:
socketio.emit('query', '[424]: Failed Dependency',
room=request.sid)
return
- # Prepare and execute a query
- corpus_name = 'CORPUS'
- query = str(message['query'])
- result_len = 200 # int(message['hits_per_page'])
- context_len = int(message['context'])
- result_offset = 0
- client.select_corpus(corpus_name)
+ client.select_corpus('CORPUS')
try:
client.query_subcorpus(query)
except Exception as e:
logger.warning(e)
response = str(e)
if response == "CQI_CQP_ERROR_GENERAL":
- response = {'code': 1,
- 'result': {'matches': [],
- 'match_count': 0,
- 'cpos_lookup': {},
- 'text_lookup': {}}
- }
+ response = {'code': 1}
socketio.emit('corpus_analysis_query', response, room=request.sid)
else:
- logger.warning('====== Initial query {} ======'.format(query))
- response = client.show_query_results(result_len=result_len,
- context_len=context_len,
- result_offset=result_offset)
- result_offset += result_len # initial offset is plus result len because client.show_query_results has already been executed once
- socketio.emit('corpus_analysis_query', response, room=request.sid)
- while result_offset < client.match_count:
- logger.warning('====== While loop start for {} ======'.format(query))
- logger.warning('result_offset: {}'.format(result_offset))
- response = client.show_query_results(result_len=result_len,
- context_len=context_len,
- result_offset=result_offset)
- result_offset += result_len
- result_offset = min(result_offset, client.match_count)
- logger.warning('result_offset end of while loop: {}'.format(result_offset))
- socketio.emit('corpus_analysis_query', response, room=request.sid)
+ chunk_size = 500
+ chunk_start = 0
+ logger.warning('pjentsch -> Start result transmission for query: {}'.format(query))
+ while chunk_start < client.match_count:
+ chunk = client.show_query_results(result_len=chunk_size,
+ context_len=100,
+ result_offset=chunk_start)
+ socketio.emit('corpus_analysis_query', chunk, room=request.sid)
+ chunk_start += chunk_size
@socketio.on('inspect_match')
diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2
index 333b4636..14a0312b 100644
--- a/app/templates/corpora/analyse_corpus.html.j2
+++ b/app/templates/corpora/analyse_corpus.html.j2
@@ -270,7 +270,7 @@
function sendQuery(event) {
event.preventDefault();
queryData = getQueryData(queryFormElement);
- nopaque.socket.emit("corpus_analysis", queryData);
+ nopaque.socket.emit("corpus_analysis_query", queryData.query);
// full results object declaration, kind of global maybe store it later?
// will always be reset if a query is sent, so that only the chunks of the
// current query will be saved in it