From 1c9e715980f59acc1bcbb8e659f2ca870562a5aa Mon Sep 17 00:00:00 2001 From: Stephan Porada Date: Thu, 28 Nov 2019 15:19:52 +0100 Subject: [PATCH] Fixe some things for query results --- app/corpora/CQiWrapper/CQiWrapper.py | 9 +++++---- app/corpora/events.py | 9 +++++++-- app/templates/corpora/analyse_corpus.html.j2 | 9 +++++---- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/app/corpora/CQiWrapper/CQiWrapper.py b/app/corpora/CQiWrapper/CQiWrapper.py index ea0acff9..c313bf05 100644 --- a/app/corpora/CQiWrapper/CQiWrapper.py +++ b/app/corpora/CQiWrapper/CQiWrapper.py @@ -133,8 +133,8 @@ class CQiWrapper(CQiClient): # match_boundries shows the start and end cpos of one match as a # pair of cpositions # [(1355, 1357), (1477, 1479)] Example for two boundry pairs - offset_start = 0 + (result_offset + 1) if result_offset != 0 else result_offset - offset_end = self.nr_matches + result_offset + offset_start = 0 if result_offset == 0 else result_offset + offset_end = self.nr_matches + result_offset - 1 match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus, CONST_FIELD_MATCH, offset_start, @@ -152,9 +152,10 @@ class CQiWrapper(CQiClient): all_matches = [] all_cpos = [] for start, end in match_boundaries: + end += 1 lc_cpos = list(range(max([0, start - self.context_len]), start)) lc = {'lc': lc_cpos} - match_cpos = list(range(start, end + 1)) + match_cpos = list(range(start, end)) match = {'hit': match_cpos} rc_cpos = list(range(end, min([self.corpus_max_len, end + self.context_len]))) rc = {'rc': rc_cpos} @@ -226,7 +227,7 @@ class CQiWrapper(CQiClient): text_lookup = {} for d in structs_to_check: s_key, s_value = zip(*d.items()) - s_value = s_value[0].split('_')[1] + s_value = s_value[0].split('_', 1)[1] struct_values = self.cl_struc2str(s_key[0], text_lookup_ids) zipped = dict(zip(text_lookup_ids, struct_values)) for zip_key, zip_value in zipped.items(): diff --git a/app/corpora/events.py b/app/corpora/events.py index 8d1580c3..954e1594 100644 --- a/app/corpora/events.py +++ b/app/corpora/events.py @@ -44,10 +44,15 @@ def corpus_analysis(message): return """ Prepare and execute a query """ corpus_name = 'CORPUS' - query = (message['query']) + query = str(message['query']) + result_len = int(message['hits_per_page']) + context_len = int(message['context']) + result_offset = int(0) client.select_corpus(corpus_name) client.query_subcorpus(query) - results = client.show_query_results(result_len=int(message['hits_per_page']), context_len=int(message['context'])) + results = client.show_query_results(result_len=result_len, + context_len=context_len, + result_offset=result_offset) socketio.emit('corpus_analysis', results, room=request.sid) diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2 index 5db09786..a488c1e6 100644 --- a/app/templates/corpora/analyse_corpus.html.j2 +++ b/app/templates/corpora/analyse_corpus.html.j2 @@ -269,10 +269,11 @@ - word: ${token["word"]}
- lemma: ${token["lemma"]}
- pos: ${token["pos"]}
- simple_pos: ${token["simple_pos"]} + Word: ${token["word"]}
+ Lemma: ${token["lemma"]}
+ POS: ${token["pos"]}
+ Simple POS: ${token["simple_pos"]}
+ NER: ${token["ner"]} Title: ${result["text_lookup"][token["text"]]["title"]}