Merge branch 'development' of gitlab.ub.uni-bielefeld.de:sfb1288inf/opaque into development

This commit is contained in:
stephan 2020-03-12 14:49:03 +01:00
commit 6bdc0b354b
3 changed files with 21 additions and 94 deletions

View File

@ -6,10 +6,9 @@
# Modified by: Patrick Jentsch <p.jentsch@uni-bielefeld.de #
# Modified date: Thurs Oct 10 <Uhrzeit> #
# ########################################################################### #
from app import logger
from time import sleep
import socket
import struct
import time
""" 1. padding """
@ -420,7 +419,6 @@ class Client:
def ctrl_connect(self, username, password):
# INPUT: (STRING username, STRING password)
# OUTPUT: CQI_STATUS_CONNECT_OK, CQI_ERROR_CONNECT_REFUSED
# print('CTRL_CONNECT')
self.__send_WORD(CTRL_CONNECT)
self.__send_STRING(username)
self.__send_STRING(password)
@ -429,20 +427,17 @@ class Client:
def ctrl_bye(self):
# INPUT: ()
# OUTPUT: CQI_STATUS_BYE_OK
# print('CTRL_BYE')
self.__send_WORD(CTRL_BYE)
return self.__recv_response()
def ctrl_user_abort(self):
# INPUT: ()
# OUTPUT:
# print('CTRL_USER_ABORT')
self.__send_WORD(CTRL_USER_ABORT)
def ctrl_ping(self):
# INPUT: ()
# OUTPUT: CQI_STATUS_PING_OK
# print('CTRL_PING')
self.__send_WORD(CTRL_PING)
return self.__recv_response()
@ -451,42 +446,36 @@ class Client:
# OUTPUT: CQI_DATA_STRING
# full-text error message for the last general error reported by the
# CQi server
# print('CTRL_LAST_GENERAL_ERROR')
self.__send_WORD(CTRL_LAST_GENERAL_ERROR)
return self.__recv_response()
def ask_feature_cqi_1_0(self):
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
# print('ASK_FEATURE_CQI_1_0')
self.__send_WORD(ASK_FEATURE_CQI_1_0)
return self.__recv_response()
def ask_feature_cl_2_3(self):
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
# print('ASK_FEATURE_CL_2_3')
self.__send_WORD(ASK_FEATURE_CL_2_3)
return self.__recv_response()
def ask_feature_cqp_2_3(self):
# INPUT: ()
# OUTPUT: CQI_DATA_BOOL
# print('ASK_FEATURE_CL_2_3')
self.__send_WORD(ASK_FEATURE_CL_2_3)
return self.__recv_response()
def corpus_list_coprora(self):
# INPUT: ()
# OUTPUT: CQI_DATA_STRING_LIST
# print('CORPUS_LIST_CORPORA')
self.__send_WORD(CORPUS_LIST_CORPORA)
return self.__recv_response()
def corpus_charset(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING
# print('CORPUS_CHARSET')
self.__send_WORD(CORPUS_CHARSET)
self.__send_STRING(corpus)
return self.__recv_response()
@ -494,7 +483,6 @@ class Client:
def corpus_properties(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# print('CORPUS_PROPERTIES')
self.__send_WORD(CORPUS_PROPERTIES)
self.__send_STRING(corpus)
return self.__recv_response()
@ -502,7 +490,6 @@ class Client:
def corpus_positional_attributes(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# print('CORPUS_POSITIONAL_ATTRIBUTES')
self.__send_WORD(CORPUS_POSITIONAL_ATTRIBUTES)
self.__send_STRING(corpus)
return self.__recv_response()
@ -510,7 +497,6 @@ class Client:
def corpus_structural_attributes(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# print('CORPUS_STRUCTURAL_ATTRIBUTES')
self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTES)
self.__send_STRING(corpus)
return self.__recv_response()
@ -518,7 +504,6 @@ class Client:
def corpus_structural_attribute_has_values(self, attribute):
# INPUT: (STRING attribute)
# OUTPUT: CQI_DATA_BOOL
# print('CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES')
self.__send_WORD(CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES)
self.__send_STRING(attribute)
return self.__recv_response()
@ -526,7 +511,6 @@ class Client:
def corpus_alignment_attributes(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# print('CORPUS_ALIGNMENT_ATTRIBUTES')
self.__send_WORD(CORPUS_ALIGNMENT_ATTRIBUTES)
self.__send_STRING(corpus)
return self.__recv_response()
@ -535,7 +519,6 @@ class Client:
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING
# the full name of <corpus> as specified in its registry entry
# print('CORPUS_FULL_NAME')
self.__send_WORD(CORPUS_FULL_NAME)
self.__send_STRING(corpus)
return self.__recv_response()
@ -544,20 +527,14 @@ class Client:
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# returns the contents of the .info file of <corpus> as a list of lines
# print('CORPUS_INFO')
self.__send_WORD(CORPUS_INFO)
self.__send_STRING(corpus)
return self.__recv_response()
def corpus_drop_corpus(self, corpus):
'''
' Broken
' TODO: Check what type of return value is provided by the server.
'''
# INPUT: (STRING corpus)
# OUTPUT: CQI_STATUS_OK
# try to unload a corpus and all its attributes from memory
# print('CORPUS_DROP_CORPUS')
self.__send_WORD(CORPUS_DROP_CORPUS)
self.__send_STRING(corpus)
return self.__recv_response()
@ -569,7 +546,6 @@ class Client:
# number of tokens (positional)
# number of regions (structural)
# number of alignments (alignment)
# print('CL_ATTRIBUTE_SIZE')
self.__send_WORD(CL_ATTRIBUTE_SIZE)
self.__send_STRING(attribute)
return self.__recv_response()
@ -580,7 +556,6 @@ class Client:
# returns the number of entries in the lexicon of a positional
# attribute;
# valid lexicon IDs range from 0 .. (lexicon_size - 1)
# print('CL_LEXICON_SIZE')
self.__send_WORD(CL_LEXICON_SIZE)
self.__send_STRING(attribute)
return self.__recv_response()
@ -589,7 +564,6 @@ class Client:
# INPUT: (STRING attribute)
# OUTPUT: CQI_STATUS_OK
# unload attribute from memory
# print('CL_DROP_ATTRIBUTE')
self.__send_WORD(CL_LEXICON_SIZE)
self.__send_STRING(attribute)
return self.__recv_response()
@ -604,7 +578,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every string in <strings> that is not found in the
# lexicon
# print('CL_STR2ID')
self.__send_WORD(CL_LEXICON_SIZE)
self.__send_STRING(attribute)
self.__send_STRING_LIST(strings)
@ -614,7 +587,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST id)
# OUTPUT: CQI_DATA_STRING_LIST
# returns "" for every ID in <id> that is out of range
# print('CL_ID2STR')
self.__send_WORD(CL_ID2STR)
self.__send_STRING(attribute)
self.__send_INT_LIST(id)
@ -624,7 +596,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST id)
# OUTPUT: CQI_DATA_INT_LIST
# returns 0 for every ID in <id> that is out of range
# print('CL_ID2FREQ')
self.__send_WORD(CL_ID2FREQ)
self.__send_STRING(attribute)
self.__send_INT_LIST(id)
@ -634,7 +605,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position in <cpos> that is out of range
# print('CL_CPOS2ID')
self.__send_WORD(CL_ID2FREQ)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@ -644,7 +614,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_STRING_LIST
# returns "" for every corpus position in <cpos> that is out of range
# print('CL_CPOS2STR')
self.__send_WORD(CL_CPOS2STR)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@ -654,7 +623,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position not inside a structure region
# print('CL_CPOS2STRUC')
self.__send_WORD(CL_CPOS2STRUC)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@ -670,7 +638,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# returns left boundary of s-attribute region enclosing cpos, -1 if not
# in region
# print('CL_CPOS2LBOUND')
self.__send_WORD(CL_CPOS2LBOUND)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@ -681,7 +648,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# returns right boundary of s-attribute region enclosing cpos, -1 if
# not in region
# print('CL_CPOS2RBOUND')
self.__send_WORD(CL_CPOS2RBOUND)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@ -691,7 +657,6 @@ class Client:
# INPUT: (STRING attribute, INT_LIST cpos)
# OUTPUT: CQI_DATA_INT_LIST
# returns -1 for every corpus position not inside an alignment
# print('CL_CPOS2ALG')
self.__send_WORD(CL_CPOS2ALG)
self.__send_STRING(attribute)
self.__send_INT_LIST(cpos)
@ -703,7 +668,6 @@ class Client:
# returns annotated string values of structure regions in <strucs>; ""
# if out of range
# check CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES(<attribute>) first
# print('CL_STRUC2STR')
self.__send_WORD(CL_STRUC2STR)
self.__send_STRING(attribute)
self.__send_INT_LIST(strucs)
@ -718,7 +682,6 @@ class Client:
# INPUT: (STRING attribute, INT id)
# OUTPUT: CQI_DATA_INT_LIST
# returns all corpus positions where the given token occurs
# print('CL_ID2CPOS')
self.__send_WORD(CL_ID2CPOS)
self.__send_STRING(attribute)
self.__send_INT(id)
@ -729,7 +692,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# returns all corpus positions where one of the tokens in <id_list>
# occurs; the returned list is sorted as a whole, not per token id
# print('CL_IDLIST2CPOS')
self.__send_WORD(CL_IDLIST2CPOS)
self.__send_STRING(attribute)
self.__send_INT_LIST(id_list)
@ -740,7 +702,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# returns lexicon IDs of all tokens that match <regex>; the returned
# list may be empty (size 0);
# print('CL_REGEX2ID')
self.__send_WORD(CL_REGEX2ID)
self.__send_STRING(attribute)
self.__send_STRING(regex)
@ -750,7 +711,6 @@ class Client:
# INPUT: (STRING attribute, INT struc)
# OUTPUT: CQI_DATA_INT_INT
# returns start and end corpus positions of structure region <struc>
# print('CL_STRUC2CPOS')
self.__send_WORD(CL_STRUC2CPOS)
self.__send_STRING(attribute)
self.__send_INT(struc)
@ -760,7 +720,6 @@ class Client:
# INPUT: (STRING attribute, INT alg)
# OUTPUT: CQI_DATA_INT_INT_INT_INT
# returns (src_start, src_end, target_start, target_end)
# print('CL_ALG2CPOS')
self.__send_WORD(CL_ALG2CPOS)
self.__send_STRING(attribute)
self.__send_INT(alg)
@ -770,7 +729,6 @@ class Client:
# INPUT: (STRING mother_corpus, STRING subcorpus_name, STRING query)
# OUTPUT: CQI_STATUS_OK
# <query> must include the ';' character terminating the query.
# print('CQP_QUERY')
self.__send_WORD(CQP_QUERY)
self.__send_STRING(mother_corpus)
self.__send_STRING(subcorpus_name)
@ -780,7 +738,6 @@ class Client:
def cqp_list_subcorpora(self, corpus):
# INPUT: (STRING corpus)
# OUTPUT: CQI_DATA_STRING_LIST
# print('CQP_LIST_SUBCORPORA')
self.__send_WORD(CQP_LIST_SUBCORPORA)
self.__send_STRING(corpus)
return self.__recv_response()
@ -788,7 +745,6 @@ class Client:
def cqp_subcorpus_size(self, subcorpus):
# INPUT: (STRING subcorpus)
# OUTPUT: CQI_DATA_INT
# print('CQP_SUBCORPUS_SIZE')
self.__send_WORD(CQP_SUBCORPUS_SIZE)
self.__send_STRING(subcorpus)
return self.__recv_response()
@ -796,7 +752,6 @@ class Client:
def cqp_subcorpus_has_field(self, subcorpus, field):
# INPUT: (STRING subcorpus, BYTE field)
# OUTPUT: CQI_DATA_BOOL
# print('CQP_SUBCORPUS_HAS_FIELD')
self.__send_WORD(CQP_SUBCORPUS_HAS_FIELD)
self.__send_STRING(subcorpus)
self.__send_BYTE(field)
@ -807,7 +762,6 @@ class Client:
# OUTPUT: CQI_DATA_INT_LIST
# Dump the values of <field> for match ranges <first> .. <last>
# in <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants.
# print('CQP_DUMP_SUBCORPUS')
self.__send_WORD(CQP_DUMP_SUBCORPUS)
self.__send_STRING(subcorpus)
self.__send_BYTE(field)
@ -819,7 +773,6 @@ class Client:
# INPUT: (STRING subcorpus)
# OUTPUT: CQI_STATUS_OK
# delete a subcorpus from memory
# print('CQP_DROP_SUBCORPUS')
self.__send_WORD(CQP_DROP_SUBCORPUS)
self.__send_STRING(subcorpus)
return self.__recv_response()
@ -837,7 +790,6 @@ class Client:
# field is one of CQI_CONST_FIELD_MATCH, CQI_CONST_FIELD_TARGET,
# CQI_CONST_FIELD_KEYWORD
# NB: pairs are sorted by frequency desc.
# print('CQP_FDIST_1')
self.__send_WORD(CQP_FDIST_1)
self.__send_STRING(subcorpus)
self.__send_INT(cutoff)
@ -854,7 +806,6 @@ class Client:
# returns <n> (id1, id2, frequency) pairs flattened into a list of size
# 3*<n>
# NB: triples are sorted by frequency desc.
# print('CQP_FDIST_2')
self.__send_WORD(CQP_FDIST_2)
self.__send_STRING(subcorpus)
self.__send_INT(cutoff)
@ -914,8 +865,7 @@ class Client:
if (len(self.connection.recv(1, socket.MSG_PEEK)) == 1):
byte_data = self.connection.recv(1)
break
logger.warning('Waiting for data transfer to complete...')
time.sleep(0.1)
sleep(0.1)
return struct.unpack('!B', byte_data)[0]
def __recv_DATA_BOOL(self):
@ -923,8 +873,7 @@ class Client:
if (len(self.connection.recv(1, socket.MSG_PEEK)) == 1):
byte_data = self.connection.recv(1)
break
logger.warning('Waiting for data transfer to complete...')
time.sleep(0.1)
sleep(0.1)
return struct.unpack('!?', byte_data)[0]
def __recv_DATA_INT(self):
@ -932,8 +881,7 @@ class Client:
if (len(self.connection.recv(4, socket.MSG_PEEK)) == 4):
byte_data = self.connection.recv(4)
break
logger.warning('Waiting for data transfer to complete...')
time.sleep(0.1)
sleep(0.1)
return struct.unpack('!i', byte_data)[0]
def __recv_DATA_STRING(self):
@ -942,8 +890,7 @@ class Client:
if (len(self.connection.recv(n, socket.MSG_PEEK)) == n):
byte_data = self.connection.recv(n)
break
logger.warning('Waiting for data transfer to complete...')
time.sleep(0.1)
sleep(0.1)
return struct.unpack('!{}s'.format(n), byte_data)[0].decode()
def __recv_DATA_BYTE_LIST(self):
@ -1003,8 +950,7 @@ class Client:
if (len(self.connection.recv(2, socket.MSG_PEEK)) == 2):
byte_data = self.connection.recv(2)
break
logger.warning('Waiting for data transfer to complete...')
time.sleep(0.1)
sleep(0.1)
return struct.unpack('!H', byte_data)[0]
def __send_BYTE(self, byte_data):

View File

@ -26,51 +26,32 @@ def init_corpus_analysis(corpus_id):
corpus_id, current_user.id, request.sid)
@socketio.on('corpus_analysis')
@socketio.on('corpus_analysis_query')
@login_required
def corpus_analysis(message):
def corpus_analysis_query(query):
client = corpus_analysis_clients.get(request.sid)
if client is None:
socketio.emit('query', '[424]: Failed Dependency',
room=request.sid)
socketio.emit('query', '[424]: Failed Dependency', room=request.sid)
return
# Prepare and execute a query
corpus_name = 'CORPUS'
query = str(message['query'])
result_len = 200 # int(message['hits_per_page'])
context_len = int(message['context'])
result_offset = 0
client.select_corpus(corpus_name)
client.select_corpus('CORPUS')
try:
client.query_subcorpus(query)
except Exception as e:
logger.warning(e)
response = str(e)
if response == "CQI_CQP_ERROR_GENERAL":
response = {'code': 1,
'result': {'matches': [],
'match_count': 0,
'cpos_lookup': {},
'text_lookup': {}}
}
response = {'code': 1}
socketio.emit('corpus_analysis_query', response, room=request.sid)
else:
logger.warning('====== Initial query {} ======'.format(query))
response = client.show_query_results(result_len=result_len,
context_len=context_len,
result_offset=result_offset)
result_offset += result_len # initial offset is plus result len because client.show_query_results has already been executed once
socketio.emit('corpus_analysis_query', response, room=request.sid)
while result_offset < client.match_count:
logger.warning('====== While loop start for {} ======'.format(query))
logger.warning('result_offset: {}'.format(result_offset))
response = client.show_query_results(result_len=result_len,
context_len=context_len,
result_offset=result_offset)
result_offset += result_len
result_offset = min(result_offset, client.match_count)
logger.warning('result_offset end of while loop: {}'.format(result_offset))
socketio.emit('corpus_analysis_query', response, room=request.sid)
chunk_size = 500
chunk_start = 0
context = 100
while chunk_start < client.match_count:
chunk = client.show_query_results(result_len=chunk_size,
context_len=context,
result_offset=chunk_start)
socketio.emit('corpus_analysis_query', chunk, room=request.sid)
chunk_start += chunk_size
@socketio.on('inspect_match')

View File

@ -270,7 +270,7 @@
function sendQuery(event) {
event.preventDefault();
queryData = getQueryData(queryFormElement);
nopaque.socket.emit("corpus_analysis", queryData);
nopaque.socket.emit("corpus_analysis_query", queryData.query);
// full results object declaration, kind of global maybe store it later?
// will always be reset if a query is sent, so that only the chunks of the
// current query will be saved in it