From e4b794d37c1f942c719b4ecb76f2209c8fdf5106 Mon Sep 17 00:00:00 2001 From: Stephan Porada Date: Fri, 17 Jan 2020 14:31:14 +0100 Subject: [PATCH] Add tmp List.js implemetation --- app/corpora/CQiWrapper/CQiWrapper.py | 23 +- app/corpora/events.py | 16 +- app/templates/admin/index.html.j2 | 11 +- app/templates/corpora/analyse_corpus.html.j2 | 220 +++++++++++-------- 4 files changed, 162 insertions(+), 108 deletions(-) diff --git a/app/corpora/CQiWrapper/CQiWrapper.py b/app/corpora/CQiWrapper/CQiWrapper.py index eea9e558..ee515164 100644 --- a/app/corpora/CQiWrapper/CQiWrapper.py +++ b/app/corpora/CQiWrapper/CQiWrapper.py @@ -99,9 +99,9 @@ class CQiWrapper(CQiClient): + ':' + result_subcorpus_name) self.SUBCORPUS_NAMES.append(self.result_subcorpus) - self.nr_matches = self.cqp_subcorpus_size(self.result_subcorpus) - print('Nr of all matches is:', self.nr_matches) - logger.warning('Nr of all matches is: {}'.format(self.nr_matches)) + self.total_nr_matches = self.cqp_subcorpus_size(self.result_subcorpus) + logger.warning('Nr of all matches is: {}'.format(self.total_nr_matches)) + logger.warning('Nr of all matches is: {}'.format(self.total_nr_matches)) def show_subcorpora(self): ''' @@ -135,7 +135,7 @@ class CQiWrapper(CQiClient): self.corpus_max_len = self.cl_attribute_size( self.attr_strings['positional_attrs']['word'] ) - self.nr_matches = min(result_len, self.nr_matches) + self.nr_matches = min(result_len, self.total_nr_matches) if self.nr_matches == 0: logger.warning('Query resulted in 0 matches.') return None @@ -145,7 +145,9 @@ class CQiWrapper(CQiClient): # pair of cpositions # [(1355, 1357), (1477, 1479)] Example for two boundry pairs offset_start = 0 if result_offset == 0 else result_offset - offset_end = self.nr_matches + result_offset - 1 + logger.warning('Offset start is: {}'.format(offset_start)) + offset_end = min((self.nr_matches + result_offset - 1), self.total_nr_matches - 1) + logger.warning('Offset end is: {}'.format(offset_end)) match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus, CONST_FIELD_MATCH, offset_start, @@ -182,7 +184,7 @@ class CQiWrapper(CQiClient): t1 = time.time() t_total = t1 - t0 logger.warning('Time to create all CPOS for query: {}'.format(t_total)) - print('Requesting {} CPOS with one query.'.format(len_all_cpos)) + logger.warning('Requesting {} CPOS with one query.'.format(len_all_cpos)) # Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for # all cpos entries in all_cpos_list @@ -191,12 +193,12 @@ class CQiWrapper(CQiClient): all_cpos_infos, text_lookup = self.get_cpos_infos(all_cpos) t3 = time.time() t_final = t3 - t2 - print('Got infos for {} CPOS in {} seconds:'.format(len_all_cpos, - t_final)) + logger.warning('Got infos for {} CPOS in {} seconds:'.format(len_all_cpos, t_final)) self.results = {'matches': all_matches, 'cpos_lookup': all_cpos_infos, 'text_lookup': text_lookup, - 'nr_matches': self.nr_matches} + 'nr_matches': self.nr_matches, + 'total_nr_matches': self.total_nr_matches} return self.results def get_cpos_infos(self, all_cpos): @@ -235,7 +237,10 @@ class CQiWrapper(CQiClient): struct_values = self.cl_struc2str(s_key[0], text_lookup_ids) zipped = dict(zip(text_lookup_ids, struct_values)) for zip_key, zip_value in zipped.items(): + logger.warning('key: {}'.format(zip_key)) + logger.warning('value: {}'.format(zip_value)) check = text_lookup.get(zip_key) + logger.warning('check: {}'.format(check)) if check is None: text_lookup[zip_key] = {s_value: zip_value} else: diff --git a/app/corpora/events.py b/app/corpora/events.py index 46bcfe3b..226e8a5b 100644 --- a/app/corpora/events.py +++ b/app/corpora/events.py @@ -45,7 +45,7 @@ def corpus_analysis(message): # Prepare and execute a query corpus_name = 'CORPUS' query = str(message['query']) - result_len = int(message['hits_per_page']) + result_len = 500 context_len = int(message['context']) result_offset = 0 client.select_corpus(corpus_name) @@ -58,7 +58,19 @@ def corpus_analysis(message): results = client.show_query_results(result_len=result_len, context_len=context_len, result_offset=result_offset) - + result_offset += result_len # initial offfset is plus result len because client.show_query_results has been already executed once + while result_offset < client.total_nr_matches: + logger.warning('===While loop start.===') + logger.warning('result_offset: {}'.format(result_offset)) + results_append = client.show_query_results(result_len=result_len, + context_len=context_len, + result_offset=result_offset) + results['matches'].extend(results_append['matches']) + results['cpos_lookup'].update(results_append['cpos_lookup']) + results['text_lookup'].update(results_append['text_lookup']) + result_offset += result_len + result_offset = min(result_offset, client.total_nr_matches) + logger.warning('result_offset end of while loop: {}'.format(result_offset)) socketio.emit('corpus_analysis', results, room=request.sid) diff --git a/app/templates/admin/index.html.j2 b/app/templates/admin/index.html.j2 index efc07a09..7c0a96cd 100644 --- a/app/templates/admin/index.html.j2 +++ b/app/templates/admin/index.html.j2 @@ -11,15 +11,22 @@ + {{ table }} - + diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2 index bd48e441..aa2a01fb 100644 --- a/app/templates/corpora/analyse_corpus.html.j2 +++ b/app/templates/corpora/analyse_corpus.html.j2 @@ -129,19 +129,22 @@
-
+
Query Results - - - - - - - - - - -
TitleLeft contextMatchRight Context
+

+
    + + + + + + + + + + +
    TitleLeft contextMatchRight Context
    +
      @@ -215,15 +218,18 @@ var queryFormElement = document.getElementById("query-form"); var queryResultsElement = document.getElementById("query-results"); + var queryResultsMetadataElement = document.getElementById("query-results-metadata") var contextResultsElement = document.getElementById("context-results"); var queryLoadingElement = document.getElementById("getting-query-results"); var queryResultsTableElement = document.getElementById("recieved-query-results"); + var hitsPerPage; queryFormElement.addEventListener("submit", function(event) { event.preventDefault(); let formData = new FormData(queryFormElement); let queryData = {"context": formData.get("context"), "hits_per_page": formData.get("hits_per_page"), "query": formData.get("query")}; + hitsPerPage = formData.get("hits_per_page"); socket.emit("corpus_analysis", queryData); queryLoadingElement.classList.remove("hide"); queryResultsTableElement.classList.add("hide"); @@ -268,95 +274,118 @@ }); } return; - } + } else { + total_nr_matches = message["total_nr_matches"]; + let count_corpus_files = Object.keys(message["text_lookup"]).length; + queryResultsMetadataElement.innerText = message["total_nr_matches"] + " matches in " + count_corpus_files + " corpus files."; - var matchElement; - var matchTextTitlesElement; - var matchLeftContextElement; - var matchHitElement; - var matchRightContextElement; - var textTitles; - var token; - var tokenElement; + var matchElement; + var matchTextTitlesElement; + var matchLeftContextElement; + var matchHitElement; + var matchRightContextElement; + var textTitles; + var token; + var tokenElement; - // Set related global values - lookup["cpos"] = message["cpos_lookup"]; - lookup["text"] = message["text_lookup"]; - matches = message["matches"]; - tokenElements = new Set(); + // Set related global values + lookup["cpos"] = message["cpos_lookup"]; + lookup["text"] = message["text_lookup"]; + matches = message["matches"]; + tokenElements = new Set(); - for (let [index, match] of matches.entries()) { - matchElement = document.createElement("tr"); - matchElement.classList.add("match"); - matchElement.dataset.index = index; - matchTextTitlesElement = document.createElement("td"); - matchTextTitlesElement.classList.add("text-titles"); - matchElement.append(matchTextTitlesElement); - matchLeftContextElement = document.createElement("td"); - matchLeftContextElement.classList.add("left-context"); - for (cpos of match["lc"]) { - token = lookup["cpos"][cpos]; - tokenElement = document.createElement("span"); - tokenElement.classList.add("token"); - tokenElement.dataset.cpos = cpos; - tokenElement.innerText = token["word"]; - if (expertModeSwitchElement.checked) { - tokenElement.classList.add("chip"); - addToolTipToTokenElement(tokenElement, token); + for (let [index, match] of matches.entries()) { + matchElement = document.createElement("tr"); + matchElement.classList.add("match"); + matchElement.dataset.index = index; + matchTextTitlesElement = document.createElement("td"); + matchTextTitlesElement.classList.add("text-titles"); + matchElement.append(matchTextTitlesElement); + matchLeftContextElement = document.createElement("td"); + matchLeftContextElement.classList.add("left-context"); + for (cpos of match["lc"]) { + token = lookup["cpos"][cpos]; + tokenElement = document.createElement("span"); + tokenElement.classList.add("token"); + tokenElement.dataset.cpos = cpos; + tokenElement.innerText = token["word"]; + if (expertModeSwitchElement.checked) { + tokenElement.classList.add("chip"); + addToolTipToTokenElement(tokenElement, token); + } + matchLeftContextElement.append(tokenElement); + matchLeftContextElement.append(document.createTextNode(" ")); + tokenElements.add(tokenElement); + } + matchElement.append(matchLeftContextElement); + matchHitElement = document.createElement("td"); + matchHitElement.classList.add("hit"); + textTitles = new Set(); + for (cpos of match["hit"]) { + token = lookup["cpos"][cpos]; + tokenElement = document.createElement("span"); + tokenElement.classList.add("token"); + tokenElement.dataset.cpos = cpos; + tokenElement.innerText = token["word"]; + if (expertModeSwitchElement.checked) { + tokenElement.classList.add("chip"); + addToolTipToTokenElement(tokenElement, token); + } + matchHitElement.append(tokenElement); + matchHitElement.append(document.createTextNode(" ")); + tokenElements.add(tokenElement); + textTitles.add(lookup["text"][token["text"]]["title"]); + } + var moreContextBtn = document.createElement("a"); + moreContextBtn.setAttribute("class", "btn-floating btn waves-effect waves-light teal right inspect"); + moreContextBtn.innerHTML = 'search'; + matchHitElement.append(document.createElement("br"), document.createElement("br")); + matchHitElement.append(moreContextBtn); + + matchTextTitlesElement.innerText = [...textTitles].join(","); + matchElement.append(matchHitElement); + matchRightContextElement = document.createElement("td"); + matchRightContextElement.classList.add("right-context"); + for (cpos of match["rc"]) { + token = lookup["cpos"][cpos]; + tokenElement = document.createElement("span"); + tokenElement.classList.add("token"); + tokenElement.dataset.cpos = cpos; + tokenElement.innerText = token["word"]; + if (expertModeSwitchElement.checked) { + tokenElement.classList.add("chip"); + addToolTipToTokenElement(tokenElement, token); + } + matchRightContextElement.append(tokenElement); + matchRightContextElement.append(document.createTextNode(" ")); + tokenElements.add(tokenElement); + } + matchElement.append(matchRightContextElement); + queryResultsElement.append(matchElement); } - matchLeftContextElement.append(tokenElement); - matchLeftContextElement.append(document.createTextNode(" ")); - tokenElements.add(tokenElement); } - matchElement.append(matchLeftContextElement); - matchHitElement = document.createElement("td"); - matchHitElement.classList.add("hit"); - textTitles = new Set(); - for (cpos of match["hit"]) { - token = lookup["cpos"][cpos]; - tokenElement = document.createElement("span"); - tokenElement.classList.add("token"); - tokenElement.dataset.cpos = cpos; - tokenElement.innerText = token["word"]; - if (expertModeSwitchElement.checked) { - tokenElement.classList.add("chip"); - addToolTipToTokenElement(tokenElement, token); - } - matchHitElement.append(tokenElement); - matchHitElement.append(document.createTextNode(" ")); - tokenElements.add(tokenElement); - textTitles.add(lookup["text"][token["text"]]["title"]); + var options = {page: hitsPerPage, + pagination: [{ + name: "paginationTop", + paginationClass: "paginationTop", + innerWindow: 8, + outerWindow: 1 + }, { + paginationClass: "paginationBottom", + innerWindow: 8, + outerWindow: 1 + }], + valueNames: ["text-titles", "left-context", "hit", "right-context"]}; + var userList = new List('result-list', options); + var inspectBtns = document.getElementsByClassName("inspect"); + for(var i = 0; i < inspectBtns.length; i++) { + var inspectBtn = inspectBtns[i]; + var dataIndex = inspectBtn.parentNode.parentNode.getAttribute("data-index"); + inspectBtn.onclick = function() { + contextModal.open(); + socket.emit("inspect_match", {"cpos": matches[dataIndex]["hit"]}); + }; } - var moreContextBtn = document.createElement("a"); - moreContextBtn.setAttribute("class", "btn-floating btn waves-effect waves-light teal right"); - moreContextBtn.innerHTML = 'search'; - matchHitElement.append(document.createElement("br"), document.createElement("br")); - matchHitElement.append(moreContextBtn); - moreContextBtn.onclick = function() { - contextModal.open(); - socket.emit("inspect_match", {"cpos": match["hit"]}); - }; - matchTextTitlesElement.innerText = [...textTitles].join(","); - matchElement.append(matchHitElement); - matchRightContextElement = document.createElement("td"); - matchRightContextElement.classList.add("right-context"); - for (cpos of match["rc"]) { - token = lookup["cpos"][cpos]; - tokenElement = document.createElement("span"); - tokenElement.classList.add("token"); - tokenElement.dataset.cpos = cpos; - tokenElement.innerText = token["word"]; - if (expertModeSwitchElement.checked) { - tokenElement.classList.add("chip"); - addToolTipToTokenElement(tokenElement, token); - } - matchRightContextElement.append(tokenElement); - matchRightContextElement.append(document.createTextNode(" ")); - tokenElements.add(tokenElement); - } - matchElement.append(matchRightContextElement); - queryResultsElement.append(matchElement); - } }); function addToolTipToTokenElement(tokenElement, token) { @@ -425,5 +454,6 @@ contextResultsElement.append(sentenceElement); } }); + {% endblock %}