NEw analysis stuff

This commit is contained in:
Stephan Porada 2020-01-27 16:11:34 +01:00
parent b4427cd3ec
commit 4b0e9392a7
3 changed files with 97 additions and 71 deletions

View File

@ -44,7 +44,6 @@ class CQiWrapper(CQiClient):
''' '''
p_attrs = self.corpus_positional_attributes(self.corpus_name) p_attrs = self.corpus_positional_attributes(self.corpus_name)
struct_attrs = self.corpus_structural_attributes(self.corpus_name) struct_attrs = self.corpus_structural_attributes(self.corpus_name)
logger.warning('struct_attrs: {}'.format(struct_attrs))
self.attr_strings = {} self.attr_strings = {}
self.attr_strings['positional_attrs'] = {} self.attr_strings['positional_attrs'] = {}
self.attr_strings['struct_attrs'] = {} self.attr_strings['struct_attrs'] = {}
@ -137,9 +136,15 @@ class CQiWrapper(CQiClient):
self.attr_strings['positional_attrs']['word'] self.attr_strings['positional_attrs']['word']
) )
self.nr_matches = min(result_len, self.match_count) self.nr_matches = min(result_len, self.match_count)
if self.nr_matches == 0: if self.match_count == 0:
logger.warning('Query resulted in 0 matches.') logger.warning('Query resulted in 0 matches.')
return None self.results = {'code': 0,
'result': {'matches': [],
'match_count': self.match_count,
'cpos_lookup': {},
'text_lookup': {}}
}
return self.results
else: else:
# Get match cpos boundries # Get match cpos boundries
# match_boundries shows the start and end cpos of one match as a # match_boundries shows the start and end cpos of one match as a
@ -200,7 +205,7 @@ class CQiWrapper(CQiClient):
'result': {'matches': all_matches, 'result': {'matches': all_matches,
'match_count': self.match_count, 'match_count': self.match_count,
'cpos_lookup': all_cpos_infos, 'cpos_lookup': all_cpos_infos,
'text_lookup': text_lookup,} 'text_lookup': text_lookup}
} }
return self.results return self.results

View File

@ -20,7 +20,7 @@ corpus_analysis_clients = {}
@socketio.on('corpus_analysis_init') @socketio.on('corpus_analysis_init')
@login_required @login_required
def init_corpus_analysis(corpus_id): def init_corpus_analysis(corpus_id):
logger.warning("a") logger.warning("Initiating corpus analysis.")
socketio.start_background_task(corpus_analysis_session_handler, socketio.start_background_task(corpus_analysis_session_handler,
current_app._get_current_object(), current_app._get_current_object(),
corpus_id, current_user.id, request.sid) corpus_id, current_user.id, request.sid)
@ -45,27 +45,32 @@ def corpus_analysis(message):
client.query_subcorpus(query) client.query_subcorpus(query)
except Exception as e: except Exception as e:
logger.warning(e) logger.warning(e)
socketio.emit('corpus_analysis', str(e), room=request.sid) response = str(e)
if response == "CQI_CQP_ERROR_GENERAL":
response = {'code': 1,
'result': {'matches': [],
'match_count': 0,
'cpos_lookup': {},
'text_lookup': {}}
}
socketio.emit('corpus_analysis_query', response, room=request.sid)
else: else:
logger.warning('====== Initial query {} ======'.format(query)) logger.warning('====== Initial query {} ======'.format(query))
results = client.show_query_results(result_len=result_len, response = client.show_query_results(result_len=result_len,
context_len=context_len, context_len=context_len,
result_offset=result_offset) result_offset=result_offset)
result_offset += result_len # initial offfset is plus result len because client.show_query_results has been already executed once result_offset += result_len # initial offset is plus result len because client.show_query_results has already been executed once
socketio.emit('corpus_analysis', results, room=request.sid) socketio.emit('corpus_analysis_query', response, room=request.sid)
while result_offset < client.match_count: while result_offset < client.match_count:
logger.warning('====== While loop start for {} ======'.format(query)) logger.warning('====== While loop start for {} ======'.format(query))
logger.warning('result_offset: {}'.format(result_offset)) logger.warning('result_offset: {}'.format(result_offset))
results = client.show_query_results(result_len=result_len, response = client.show_query_results(result_len=result_len,
context_len=context_len, context_len=context_len,
result_offset=result_offset) result_offset=result_offset)
# results['matches'].extend(results_append['matches'])
# results['cpos_lookup'].update(results_append['cpos_lookup'])
# results['text_lookup'].update(results_append['text_lookup'])
result_offset += result_len result_offset += result_len
result_offset = min(result_offset, client.match_count) result_offset = min(result_offset, client.match_count)
logger.warning('result_offset end of while loop: {}'.format(result_offset)) logger.warning('result_offset end of while loop: {}'.format(result_offset))
socketio.emit('corpus_analysis', results, room=request.sid) socketio.emit('corpus_analysis_query', response, room=request.sid)
@socketio.on('inspect_match') @socketio.on('inspect_match')

View File

@ -224,12 +224,17 @@
{"dismissible": true}); {"dismissible": true});
M.Collapsible.init(elem, {accordion: false}); M.Collapsible.init(elem, {accordion: false});
loadingModal.open(); loadingModal.open();
nopaque.socket.emit("request_corpus_analysis", {{ corpus_id }}); nopaque.socket.emit("corpus_analysis_init", {{ corpus_id }});
}); });
// close loading modal if container for analysis has started // close loading modal if container for analysis has started
nopaque.socket.on("request_corpus_analysis", function(msg) { nopaque.socket.on("corpus_analysis_init", function(response) {
if (msg === "[201]: Created") {loadingModal.close();} if (response.code === 201) {
loadingModal.close();
} else {
console.log("[ERROR] corpus_analysis_init");
console.log(response);
}
}); });
// exper view stuff reuse maybe and REMOVE later // exper view stuff reuse maybe and REMOVE later
@ -275,9 +280,9 @@
queryFormElement.addEventListener("submit", function(event) { queryFormElement.addEventListener("submit", function(event) {
event.preventDefault(); event.preventDefault();
let formData = new FormData(queryFormElement); let formData = new FormData(queryFormElement);
let queryData = {"context": formData.get("context"), queryData = {"context": formData.get("context"), // global declaration
"hits_per_page": formData.get("hits_per_page"), "hits_per_page": formData.get("hits_per_page"),
"query": formData.get("query")}; "query": formData.get("query")};
hitsPerPage = formData.get("hits_per_page"); hitsPerPage = formData.get("hits_per_page");
nopaque.socket.emit("corpus_analysis", queryData); nopaque.socket.emit("corpus_analysis", queryData);
// full results object declaration, kind of global maybe store it later? // full results object declaration, kind of global maybe store it later?
@ -318,7 +323,24 @@
// socket.on triggered when result chunks are recieved // socket.on triggered when result chunks are recieved
nopaque.socket.on("corpus_analysis", function(response) { nopaque.socket.on("corpus_analysis_query", function(response) {
// ERROR code checking
if (response["code"] === 0) {
console.log("[ERROR] corpus_analysis_init");
console.log("Code:" + response["code"]);
// further code execution of this code block starting in line 342
} else if (response["code"] === 1) {
queryResultsTableElement.classList.add("hide");
queryLoadingElement.classList.add("hide");
nopaque.toast("Invalid query entered!", "red");
console.log("[SUCCESS] corpus_analysis_init");
console.log("Code:" + response["code"]);
return; // no further code execution of this code block
} else {
console.log("[ERROR] corpus_analysis_init");
console.log("Code:" + response["code"]);
return; // no further code execution of this code block
}
// logs the current recieved chunk // logs the current recieved chunk
chunk = response["result"]; chunk = response["result"];
console.log("### corpus_analysis chunk ###"); console.log("### corpus_analysis chunk ###");
@ -330,62 +352,56 @@
Object.assign(result["cpos_lookup"], chunk["cpos_lookup"]); Object.assign(result["cpos_lookup"], chunk["cpos_lookup"]);
Object.assign(result["text_lookup"], chunk["text_lookup"]); Object.assign(result["text_lookup"], chunk["text_lookup"]);
result["match_count"] = chunk["match_count"]; result["match_count"] = chunk["match_count"];
result["query"] = chunk["query"]; result["query"] = queryData["query"];
console.log(result); console.log(result);
// Some hiding and showing of loading animations // Some hiding and showing of loading animations
queryLoadingElement.classList.add("hide"); queryLoadingElement.classList.add("hide");
queryResultsTableElement.classList.remove("hide"); queryResultsTableElement.classList.remove("hide");
queryResultsElement.innerHTML = ""; queryResultsElement.innerHTML = "";
// some checks for erroneous or empty query results // check if query has any results
// No results for this query if (chunk["matches"].length === 0) {
if (chunk === null) {
queryResultsTableElement.classList.add("hide"); queryResultsTableElement.classList.add("hide");
nopaque.toast("No results for this query!"); nopaque.toast("No results for this query!");
return; return;
// Query was invalid }
} else if (chunk === "CQI_CQP_ERROR_GENERAL") {
queryResultsTableElement.classList.add("hide");
nopaque.toast("Invalid query entered!", "red");
return;
// List building/appending the chunks when query had results
} else {
// write metadata query information into HTML elements
// like nr. of all matches in how many files etc.
// TODO: count_corpus_files müssen aus full results genommen werden.
match_count = chunk["match_count"];
let count_corpus_files = Object.keys(result["text_lookup"]).length;
queryResultsMetadataElement.innerHTML = chunk["match_count"] + " matches in " + count_corpus_files + " corpus files.";
queryResultsMetadataElement.appendChild(exportQueryResults);
exportQueryResults.classList.remove("hide");
var toAdd = []; // List building/appending the chunks when query had results
for (let [index, match] of chunk["matches"].entries()) { // write metadata query information into HTML elements
lc_tokens = ""; // like nr. of all matches in how many files etc.
for (cpos of match["lc"]) { // TODO: count_corpus_files müssen aus full results genommen werden.
word = chunk["cpos_lookup"][cpos]["word"]; match_count = chunk["match_count"];
lc_tokens += " " + word; let count_corpus_files = Object.keys(result["text_lookup"]).length;
} queryResultsMetadataElement.innerHTML = chunk["match_count"] + " matches in " + count_corpus_files + " corpus files.";
// console.log(lc_tokens); queryResultsMetadataElement.appendChild(exportQueryResults);
hit_tokens = ""; exportQueryResults.classList.remove("hide");
for (cpos of match["hit"]) {
word = chunk["cpos_lookup"][cpos]["word"]; var toAdd = [];
hit_tokens += " " + word; for (let [index, match] of chunk["matches"].entries()) {
} lc_tokens = "";
// console.log(hit_tokens); for (cpos of match["lc"]) {
rc_tokens = ""; word = chunk["cpos_lookup"][cpos]["word"];
for (cpos of match["rc"]) { lc_tokens += " " + word;
word = chunk["cpos_lookup"][cpos]["word"]; }
rc_tokens += " " + word; // console.log(lc_tokens);
} hit_tokens = "";
// console.log(rc_tokens); for (cpos of match["hit"]) {
item = { titles: "test", lc: lc_tokens, hit: hit_tokens, rc: rc_tokens }; word = chunk["cpos_lookup"][cpos]["word"];
toAdd.push(item); hit_tokens += " " + word;
} }
resultList.add(toAdd, function(toAdd) {console.log('All ' // console.log(hit_tokens);
+ toAdd.length rc_tokens = "";
+ ' results were added!')}); for (cpos of match["rc"]) {
} word = chunk["cpos_lookup"][cpos]["word"];
rc_tokens += " " + word;
}
// console.log(rc_tokens);
item = { titles: "test", lc: lc_tokens, hit: hit_tokens, rc: rc_tokens };
toAdd.push(item);
}
resultList.add(toAdd, function(toAdd) {console.log('All '
+ toAdd.length
+ ' results were added!')});
}); });
// Function to download data to a file // Function to download data to a file