Add tmp List.js implemetation

This commit is contained in:
Stephan Porada
2020-01-17 14:31:14 +01:00
parent e04b31d282
commit e4b794d37c
4 changed files with 162 additions and 108 deletions

View File

@ -99,9 +99,9 @@ class CQiWrapper(CQiClient):
+ ':'
+ result_subcorpus_name)
self.SUBCORPUS_NAMES.append(self.result_subcorpus)
self.nr_matches = self.cqp_subcorpus_size(self.result_subcorpus)
print('Nr of all matches is:', self.nr_matches)
logger.warning('Nr of all matches is: {}'.format(self.nr_matches))
self.total_nr_matches = self.cqp_subcorpus_size(self.result_subcorpus)
logger.warning('Nr of all matches is: {}'.format(self.total_nr_matches))
logger.warning('Nr of all matches is: {}'.format(self.total_nr_matches))
def show_subcorpora(self):
'''
@ -135,7 +135,7 @@ class CQiWrapper(CQiClient):
self.corpus_max_len = self.cl_attribute_size(
self.attr_strings['positional_attrs']['word']
)
self.nr_matches = min(result_len, self.nr_matches)
self.nr_matches = min(result_len, self.total_nr_matches)
if self.nr_matches == 0:
logger.warning('Query resulted in 0 matches.')
return None
@ -145,7 +145,9 @@ class CQiWrapper(CQiClient):
# pair of cpositions
# [(1355, 1357), (1477, 1479)] Example for two boundry pairs
offset_start = 0 if result_offset == 0 else result_offset
offset_end = self.nr_matches + result_offset - 1
logger.warning('Offset start is: {}'.format(offset_start))
offset_end = min((self.nr_matches + result_offset - 1), self.total_nr_matches - 1)
logger.warning('Offset end is: {}'.format(offset_end))
match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus,
CONST_FIELD_MATCH,
offset_start,
@ -182,7 +184,7 @@ class CQiWrapper(CQiClient):
t1 = time.time()
t_total = t1 - t0
logger.warning('Time to create all CPOS for query: {}'.format(t_total))
print('Requesting {} CPOS with one query.'.format(len_all_cpos))
logger.warning('Requesting {} CPOS with one query.'.format(len_all_cpos))
# Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for
# all cpos entries in all_cpos_list
@ -191,12 +193,12 @@ class CQiWrapper(CQiClient):
all_cpos_infos, text_lookup = self.get_cpos_infos(all_cpos)
t3 = time.time()
t_final = t3 - t2
print('Got infos for {} CPOS in {} seconds:'.format(len_all_cpos,
t_final))
logger.warning('Got infos for {} CPOS in {} seconds:'.format(len_all_cpos, t_final))
self.results = {'matches': all_matches,
'cpos_lookup': all_cpos_infos,
'text_lookup': text_lookup,
'nr_matches': self.nr_matches}
'nr_matches': self.nr_matches,
'total_nr_matches': self.total_nr_matches}
return self.results
def get_cpos_infos(self, all_cpos):
@ -235,7 +237,10 @@ class CQiWrapper(CQiClient):
struct_values = self.cl_struc2str(s_key[0], text_lookup_ids)
zipped = dict(zip(text_lookup_ids, struct_values))
for zip_key, zip_value in zipped.items():
logger.warning('key: {}'.format(zip_key))
logger.warning('value: {}'.format(zip_value))
check = text_lookup.get(zip_key)
logger.warning('check: {}'.format(check))
if check is None:
text_lookup[zip_key] = {s_value: zip_value}
else:

View File

@ -45,7 +45,7 @@ def corpus_analysis(message):
# Prepare and execute a query
corpus_name = 'CORPUS'
query = str(message['query'])
result_len = int(message['hits_per_page'])
result_len = 500
context_len = int(message['context'])
result_offset = 0
client.select_corpus(corpus_name)
@ -58,7 +58,19 @@ def corpus_analysis(message):
results = client.show_query_results(result_len=result_len,
context_len=context_len,
result_offset=result_offset)
result_offset += result_len # initial offfset is plus result len because client.show_query_results has been already executed once
while result_offset < client.total_nr_matches:
logger.warning('===While loop start.===')
logger.warning('result_offset: {}'.format(result_offset))
results_append = client.show_query_results(result_len=result_len,
context_len=context_len,
result_offset=result_offset)
results['matches'].extend(results_append['matches'])
results['cpos_lookup'].update(results_append['cpos_lookup'])
results['text_lookup'].update(results_append['text_lookup'])
result_offset += result_len
result_offset = min(result_offset, client.total_nr_matches)
logger.warning('result_offset end of while loop: {}'.format(result_offset))
socketio.emit('corpus_analysis', results, room=request.sid)