From 54f3036d89725df744087789c100bd6753f280c9 Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Wed, 27 Nov 2019 13:29:43 +0100 Subject: [PATCH] push --- app/corpora/events.py | 47 +++++++++++++++---- app/templates/corpora/analyse_corpus.html.j2 | 49 +++++++++++++------- 2 files changed, 71 insertions(+), 25 deletions(-) diff --git a/app/corpora/events.py b/app/corpora/events.py index 0da86efc..1298a847 100644 --- a/app/corpora/events.py +++ b/app/corpora/events.py @@ -47,15 +47,44 @@ def corpus_analysis(message): room=request.sid) return """ Prepare and execute a query """ - logger.warning('Payload: {}'.format(message)) - corpus_name = 'CORPUS' - query = message['query'] - client.select_corpus(corpus_name) - client.query_subcorpus(query) - results = client.show_query_results( - result_len=int(message['hits_per_page']), - context_len=int(message['context'])) - socketio.emit('corpus_analysis', results, room=request.sid) + corpus = 'CORPUS' + query = (message['query']) + query_subcorpus = 'Results' + client.cqp_query(corpus, query_subcorpus, query) + + data = {'matches': [], 'cpos_lookup': {}, 'text_loopup': {}} + + """ Evaluate query results """ + match_corpus = '{}:{}'.format(corpus, query_subcorpus) + match_num = min(int(message['hits_per_page']), client.cqp_subcorpus_size(match_corpus)) + match_boundaries = zip(client.cqp_dump_subcorpus(match_corpus, + 0x10, + 0, match_num - 1), + client.cqp_dump_subcorpus(match_corpus, + 0x11, + 0, match_num - 1)) + context = 15 + corpus_len = 10000 + for match_start, match_end in match_boundaries: + data['matches'].append({'lc': list(range(max(0, match_start - int(message['context'])), match_start)), + 'hit': list(range(match_start, match_end + 1)), + 'rc': list(range(match_end + 1, min(corpus_len, match_end + 1 + int(message['context']))))}) + cpos_list = [] + for match in data['matches']: + cpos_list += match['lc'] + match['hit'] + match['rc'] + cpos_list = list(set(cpos_list)) + pos_list = client.cl_cpos2str('{}.pos'.format(corpus), cpos_list) + simple_pos_list = client.cl_cpos2str('{}.simple_pos'.format(corpus), cpos_list) + text_id_list = client.cl_cpos2struc('{}.text_title'.format(corpus), cpos_list) + word_list = client.cl_cpos2str('{}.word'.format(corpus), cpos_list) + for cpos, pos, simple_pos, text_id, word in zip(cpos_list, pos_list, simple_pos_list, text_id_list, word_list): + data['cpos_lookup'][cpos] = {'pos': pos, 'simple_pos': simple_pos, 'text_id': text_id, 'word': word} + text_author_list = client.cl_struc2str('{}.text_author'.format(corpus), text_id_list) + text_publishing_year_list = client.cl_struc2str('{}.text_publishing_year'.format(corpus), text_id_list) + text_title_list = client.cl_struc2str('{}.text_title'.format(corpus), text_id_list) + for text_id, text_author, text_publishing_year, text_title in zip(text_id_list, text_author_list, text_publishing_year_list, text_title_list): + data['text_loopup'][text_id] = {'author': text_author, 'publishing_year': text_publishing_year, 'title': text_title} + socketio.emit('corpus_analysis', data, room=request.sid) def corpus_analysis_session_handler(app, corpus_id, session_id): diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2 index c60fa662..da81332b 100644 --- a/app/templates/corpora/analyse_corpus.html.j2 +++ b/app/templates/corpora/analyse_corpus.html.j2 @@ -42,6 +42,7 @@
+ Query Link
{{ query_download_form.hidden_tag() }} Download Results @@ -124,39 +125,55 @@ let queryData = {"context": formData.get("context"), "hits_per_page": formData.get("hits_per_page"), "query": formData.get("query")}; - socket.emit('corpus_analysis', queryData); - M.toast({html: 'Query has been sent!'}); + socket.emit("corpus_analysis", queryData); + M.toast({html: "Query has been sent!"}); }); - socket.on('corpus_analysis', function(result) { - console.log(result['matches']); - - var htmlString; + socket.on("corpus_analysis", function(result) { + console.log(result); + var htmlString = ""; var token; + if (result['matches'].length === 0) { + M.toast({html: 'No results!'}); + } + for (let match of result['matches']) { - htmlString = ``; + htmlString += ``; + htmlString += `` htmlString += ``; - for (cpos of match['lc']) { - token = result['cpos_lookup'][cpos]; - htmlString += `${token['word']}`; + for (cpos of match["lc"]) { + token = result["cpos_lookup"][cpos]; + htmlString += token["simple_pos"] != "PUNCT" ? " " : ""; + htmlString += `${token["word"]}`; } htmlString += ``; htmlString += ``; for (cpos of match["hit"]) { - token = result['cpos_lookup'][cpos]; - htmlString += `${token['word']}`; + token = result["cpos_lookup"][cpos]; + htmlString += token["simple_pos"] != "PUNCT" ? " " : ""; + htmlString += `${token["word"]}`; } htmlString += ``; htmlString += ``; - for (cpos of match['rc']) { - token = result['cpos_lookup'][cpos]; - htmlString += `${token['word']}`; + for (cpos of match["rc"]) { + token = result["cpos_lookup"][cpos]; + htmlString += token["simple_pos"] != "PUNCT" ? " " : ""; + htmlString += `${token["word"]}`; } htmlString += ``; htmlString += ``; - queryResultsElement.insertAdjacentHTML("beforeend", htmlString); } + + queryResultsElement.innerHTML = htmlString; + + queryResultsElement.querySelectorAll(".token").forEach(function(tokenElement) { + tokenElement.addEventListener("click", function(event) { + let token = result["cpos_lookup"][tokenElement.dataset.cpos]; + let text = result["text_loopup"][token["text_id"]]; + alert(`${token["word"]} // ${token["pos"]} // ${token["simple_pos"]} // ${text["title"]}`); + }); + }); }); {% endblock %}