Fixe some things for query results

This commit is contained in:
Stephan Porada 2019-11-28 15:19:52 +01:00
parent dbd580b3c0
commit 1c9e715980
3 changed files with 17 additions and 10 deletions

View File

@ -133,8 +133,8 @@ class CQiWrapper(CQiClient):
# match_boundries shows the start and end cpos of one match as a # match_boundries shows the start and end cpos of one match as a
# pair of cpositions # pair of cpositions
# [(1355, 1357), (1477, 1479)] Example for two boundry pairs # [(1355, 1357), (1477, 1479)] Example for two boundry pairs
offset_start = 0 + (result_offset + 1) if result_offset != 0 else result_offset offset_start = 0 if result_offset == 0 else result_offset
offset_end = self.nr_matches + result_offset offset_end = self.nr_matches + result_offset - 1
match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus, match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus,
CONST_FIELD_MATCH, CONST_FIELD_MATCH,
offset_start, offset_start,
@ -152,9 +152,10 @@ class CQiWrapper(CQiClient):
all_matches = [] all_matches = []
all_cpos = [] all_cpos = []
for start, end in match_boundaries: for start, end in match_boundaries:
end += 1
lc_cpos = list(range(max([0, start - self.context_len]), start)) lc_cpos = list(range(max([0, start - self.context_len]), start))
lc = {'lc': lc_cpos} lc = {'lc': lc_cpos}
match_cpos = list(range(start, end + 1)) match_cpos = list(range(start, end))
match = {'hit': match_cpos} match = {'hit': match_cpos}
rc_cpos = list(range(end, min([self.corpus_max_len, end + self.context_len]))) rc_cpos = list(range(end, min([self.corpus_max_len, end + self.context_len])))
rc = {'rc': rc_cpos} rc = {'rc': rc_cpos}
@ -226,7 +227,7 @@ class CQiWrapper(CQiClient):
text_lookup = {} text_lookup = {}
for d in structs_to_check: for d in structs_to_check:
s_key, s_value = zip(*d.items()) s_key, s_value = zip(*d.items())
s_value = s_value[0].split('_')[1] s_value = s_value[0].split('_', 1)[1]
struct_values = self.cl_struc2str(s_key[0], text_lookup_ids) struct_values = self.cl_struc2str(s_key[0], text_lookup_ids)
zipped = dict(zip(text_lookup_ids, struct_values)) zipped = dict(zip(text_lookup_ids, struct_values))
for zip_key, zip_value in zipped.items(): for zip_key, zip_value in zipped.items():

View File

@ -44,10 +44,15 @@ def corpus_analysis(message):
return return
""" Prepare and execute a query """ """ Prepare and execute a query """
corpus_name = 'CORPUS' corpus_name = 'CORPUS'
query = (message['query']) query = str(message['query'])
result_len = int(message['hits_per_page'])
context_len = int(message['context'])
result_offset = int(0)
client.select_corpus(corpus_name) client.select_corpus(corpus_name)
client.query_subcorpus(query) client.query_subcorpus(query)
results = client.show_query_results(result_len=int(message['hits_per_page']), context_len=int(message['context'])) results = client.show_query_results(result_len=result_len,
context_len=context_len,
result_offset=result_offset)
socketio.emit('corpus_analysis', results, room=request.sid) socketio.emit('corpus_analysis', results, room=request.sid)

View File

@ -269,10 +269,11 @@
</tr> </tr>
<tr> <tr>
<td class="left-align"> <td class="left-align">
word: ${token["word"]}<br> Word: ${token["word"]}<br>
lemma: ${token["lemma"]}<br> Lemma: ${token["lemma"]}<br>
pos: ${token["pos"]}<br> POS: ${token["pos"]}<br>
simple_pos: ${token["simple_pos"]} Simple POS: ${token["simple_pos"]}<br>
NER: ${token["ner"]}
</td> </td>
<td class="left-align"> <td class="left-align">
Title: ${result["text_lookup"][token["text"]]["title"]}<br> Title: ${result["text_lookup"][token["text"]]["title"]}<br>