Change corpus analysis init behavior

This commit is contained in:
Patrick Jentsch 2020-01-27 13:18:54 +01:00
parent cd02046590
commit 2c0f009e7c
3 changed files with 448 additions and 21 deletions

View File

@ -1,6 +1,6 @@
from app import db, logger, socketio from app import db, logger, socketio
from app.events import connected_sessions from app.events import connected_sessions
from app.models import Corpus from app.models import Corpus, User
from flask import current_app, request from flask import current_app, request
from flask_login import current_user, login_required from flask_login import current_user, login_required
from .CQiWrapper.CQiWrapper import CQiWrapper from .CQiWrapper.CQiWrapper import CQiWrapper
@ -17,21 +17,13 @@ corpus_analysis_sessions = {}
corpus_analysis_clients = {} corpus_analysis_clients = {}
@socketio.on('request_corpus_analysis') @socketio.on('corpus_analysis_init')
@login_required @login_required
def request_corpus_analysis(corpus_id): def init_corpus_analysis(corpus_id):
corpus = Corpus.query.get(corpus_id) logger.warning("a")
if corpus is None:
socketio.emit('init_corpus_analysis', '[404]: Not Found',
room=request.sid)
elif not (corpus.creator == current_user
or current_user.is_administrator()):
socketio.emit('init_corpus_analysis', '[403]: Forbidden',
room=request.sid)
else:
socketio.start_background_task(corpus_analysis_session_handler, socketio.start_background_task(corpus_analysis_session_handler,
current_app._get_current_object(), current_app._get_current_object(),
corpus_id, request.sid) corpus_id, current_user.id, request.sid)
@socketio.on('corpus_analysis') @socketio.on('corpus_analysis')
@ -91,10 +83,21 @@ def inspect_match(message):
socketio.emit('match_context', match_context, room=request.sid) socketio.emit('match_context', match_context, room=request.sid)
def corpus_analysis_session_handler(app, corpus_id, session_id): def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
with app.app_context(): with app.app_context():
''' Setup analysis session ''' ''' Setup analysis session '''
corpus = Corpus.query.get(corpus_id) corpus = Corpus.query.get(corpus_id)
user = User.query.get(user_id)
if corpus is None:
logger.warning("404")
response = {'code': 404, 'msg': 'Not Found'}
socketio.emit('corpus_analysis_init', response, room=session_id)
return
elif not (corpus.creator == user or user.is_administrator()):
logger.warning("403")
response = {'code': 403, 'msg': 'Forbidden'}
socketio.emit('corpus_analysis_init', response, room=session_id)
return
while corpus.status != 'analysing': while corpus.status != 'analysing':
db.session.refresh(corpus) db.session.refresh(corpus)
socketio.sleep(3) socketio.sleep(3)
@ -105,8 +108,8 @@ def corpus_analysis_session_handler(app, corpus_id, session_id):
corpus_analysis_sessions[corpus_id] = [session_id] corpus_analysis_sessions[corpus_id] = [session_id]
else: else:
corpus_analysis_sessions[corpus_id].append(session_id) corpus_analysis_sessions[corpus_id].append(session_id)
socketio.emit('request_corpus_analysis', '[201]: Created', response = {'code': 201, 'msg': 'Created'}
room=session_id) socketio.emit('corpus_analysis_init', response, room=session_id)
''' Observe analysis session ''' ''' Observe analysis session '''
while session_id in connected_sessions: while session_id in connected_sessions:
socketio.sleep(3) socketio.sleep(3)

View File

@ -1,6 +1,6 @@
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from wtforms import (FileField, StringField, SubmitField, from wtforms import (BooleanField, FileField, StringField, SubmitField,
ValidationError, IntegerField, SelectField, TextAreaField) ValidationError, IntegerField, SelectField)
from wtforms.validators import DataRequired, Length from wtforms.validators import DataRequired, Length
@ -50,7 +50,8 @@ class EditCorpusFileForm(FlaskForm):
class QueryForm(FlaskForm): class QueryForm(FlaskForm):
query = StringField('CQP Query', expert_mode = BooleanField('Expert mode')
query = StringField('Query',
validators=[DataRequired(), Length(1, 1024)]) validators=[DataRequired(), Length(1, 1024)])
hits_per_page = SelectField('Hits per page', hits_per_page = SelectField('Hits per page',
choices=[('', 'Nr. of hits per page'), choices=[('', 'Nr. of hits per page'),

View File

@ -0,0 +1,423 @@
{% extends "full_width.html.j2" %}
{% block page_content %}
<!-- Query Form -->
<div class="col s12">
<form id="query-form">
{{ query_form.hidden_tag() }}
<ul class="collapsible expandable" id="query-form-collapsible">
<li class="active">
<div class="collapsible-body" style="padding-top: 10px; padding-right: 2rem; padding-bottom: 0px; padding-left: 2rem;">
<div class="row">
<div class="col s12 m11 input-field">
<i class="material-icons prefix">search</i>
{{ query_form.query() }}
{{ query_form.query.label }}
<span class="helper-text">
<a href="http://cwb.sourceforge.net/files/CQP_Tutorial/"><i class="material-icons" style="font-size: inherit;">help</i> CQP query language tutorial</a>
</span>
</div>
<div class="col s12 m1 right-align">
<p class="hide-on-small-only" style="margin: 0px;">&nbsp;</p>
<button class="btn-small waves-effect waves-light" type="submit">Send<i class="material-icons right">send</i></button>
</div>
</div>
</div>
</li>
<li>
<div class="collapsible-header"><i class="material-icons">settings</i>Display Options</div>
<div class="collapsible-body">
<div class="row">
<div class="col s12 m6 input-field">
<i class="material-icons prefix">format_list_numbered</i>
{{ query_form.hits_per_page() }}
{{ query_form.hits_per_page.label }}
</div>
<div class="col s12 m6 input-field">
<i class="material-icons prefix">short_text</i>
{{ query_form.context() }}
{{ query_form.context.label }}
</div>
</div>
<div class="col s12">
<div class="switch">
<label>
{{ query_form.expert_mode.label.text }}
{{ query_form.expert_mode() }}
<span class="lever"></span>
</label>
</div>
</div>
</div>
</li>
</ul>
</form>
</div>
<!-- Loading animation card when query results are being loaded -->
<div class="col s12">
<div class="card hide" id="query-loading-card">
<div class="card-content">
<span class="card-title">Fetching your results!</span>
<div>
<div class="progress">
<div class="indeterminate"></div>
</div>
</div>
</div>
</div>
</div>
<div class="col s12">
<div class="card" id="query-results-card">
<div class="card-content" id="result-list">
<span class="card-title">Query Results</span>
<p id="query-results-metadata">
<button id="export-query-results" class="waves-effect waves-light btn-small right" type="submit">Export Results<i class="material-icons right">file_download</i></button>
</p>
<ul class="pagination pagination-top"></ul>
<table class="responsive-table highlight">
<thead>
<tr>
<th style="width: 5%">Title</th>
<th style="width: 25%">Left context</th>
<th style="width: 45%">Match</th>
<th style="width: 25%">Right Context</th>
</tr>
</thead>
<tbody class="list" id="query-results"></tbody>
</table>
<ul class="pagination pagination-bottom"></ul>
</div>
</div>
</div>
<!-- Context modal used for detailed information about one match -->
<div class="modal modal-fixed-footer" id="context-modal">
<div class="modal-content">
<h4>Match context and information</h4>
<div id="context-modal-loading">
<div class="progress">
<div class="indeterminate"></div>
</div>
</div>
<div id="context-modal-ready" class="hide">
<div id="context-results"></div>
</div>
</div>
<div class="modal-footer">
<a href="#!" class="left waves-effect waves-green btn">Export</a>
<a href="#!" class="modal-close waves-effect waves-green red btn">Close</a>
</div>
</div>
<!-- Export query results modal -->
<div class="modal modal-fixed-footer" id="export-modal">
<div class="modal-content">
{{ query_download_form.hidden_tag() }}
<h4>Download current query Results</h4>
<p>The results of the current query can be downlaoded as several files like csv or json. Those files can be used in other software like excel. Also it is easy to publish your results as raw data like this!</p>
<table>
<tr>
<td>JSON</td>
<td>
<a class="btn waves-effect waves-light" id="download-results">Download
<i class="material-icons right">file_download</i>
</a>
</td>
</tr>
<tr>
<td>CSV</td>
<td>
<a class="btn waves-effect waves-light disabled">Download
<i class="material-icons right">file_download</i>
</a>
</td>
</tr>
<tr>
<td>EXCEL</td>
<td>
<a class="btn waves-effect waves-light disabled">Download
<i class="material-icons right">file_download</i>
</a>
</td>
</tr>
<tr>
<td>HTML</td>
<td>
<a class="btn waves-effect waves-light disabled">Download
<i class="material-icons right">file_download</i>
</a>
</td>
</tr>
</table>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-green red btn">Close</a>
</div>
</div>
<div id="init-modal" class="modal no-autoinit">
<div class="modal-content">
<h4>Initializing analysis</h4>
<div class="progress">
<div class="indeterminate"></div>
</div>
</div>
</div>
<script>
// Modal objects (initialized after DOM content is loaded)
var contextModal, exportModal, initModal;
// Query form elements
var queryFormElement = document.getElementById("query-form"),
queryElement = document.getElementById("query");
// Display option elements
var contextElement = document.getElementById("context"),
expertModeElement = document.getElementById("expert_mode"),
hitsPerPageElement = document.getElementById("hits_per_page");
var queryLoadingCardElement = document.getElementById("query-loading-card"),
queryResultsCardElement = document.getElementById("query-results-card"),
exportQueryResultsElement = document.getElementById("export-query-results"),
resultListElement = document.getElementById("resultList"),
resultList = new List('result-list',
{"pagination": [{"name": "paginationTop",
"paginationClass": "pagination-top",
"innerWindow": 8,
"outerWindow": 1},
{"name": "paginationBottom",
"paginationClass": "pagination-bottom",
"innerWindow": 8,
"outerWindow": 1}],
"valueNames": ["titles", "lc", "hit", "rc"],
"item": `<tr>
<td class="titles"></td>
<td class="lc"></td>
<td class="hit"></td>
<td class="rc"></td>
</tr>`});
// Query result data store
var result = {"lookup": {"cpos": {}, "s": {}, "text": {}}, "matches": []};
var tokenElements = [];
document.addEventListener("DOMContentLoaded", function() {
contextModal = M.Modal.getInstance(document.getElementById("context-modal"));
exportQueryResultsModal = M.Modal.getInstance(document.getElementById("export-modal"));
initModal = M.Modal.init(document.getElementById("init-modal"), {"dismissible": false});
M.Collapsible.init(document.getElementById("query-form-collapsible"), {"accordion": false});
initModal.open();
nopaque.socket.emit("corpus_analysis_init", {{ corpus_id }});
});
// close loading modal if container for analysis has started
nopaque.socket.on("corpus_analysis_init", function(response) {
if (response.code === 201) {
initModal.close();
} else {
console.log("[ERROR] corpus_analysis_init");
console.log(response);
}
});
expertModeElement.addEventListener("change", function(event) {
var tooltip;
if (expertModeElement.checked) {
for (let tokenElement of tokenElements) {
tokenElement.classList.add("chip");
token = lookup["cpos"][tokenElement.dataset.cpos];
addToolTipToTokenElement(tokenElement, token);
}
} else {
for (let tokenElement of tokenElements) {
tooltip = M.Tooltip.getInstance(tokenElement);
tooltip.destroy();
tokenElement.classList.remove("chip");
}
}
});
exportQueryResultsElement.addEventListener("click", function() {
exportModal.open();
});
// Get query form element and save its data on submit and send this data via
// socket.io to the CQP server
queryFormElement.addEventListener("submit", function(event) {
event.preventDefault();
let queryData = {"context": contextElement.value,
"hits_per_page": hitsPerPageElement.value,
"query": queryElement.value};
nopaque.socket.emit("corpus_analysis", queryData);
nopaque.toast("Query has been sent!");
// Unhide (show) loading card and hide result card
queryLoadingCardElement.classList.remove("hide");
queryResultsCardElement.classList.add("hide");
resultList.clear();
lookup = {"cpos": {}, "s": {}, "text": {}};
matches = [];
tokenElements = [];
});
// socket.on triggered when result chunks are recieved
nopaque.socket.on("corpus_analysis", function(chunk) {
console.log("### Recieved query results chunk ###");
console.log(chunk);
// Sanity checks
if (!chunk) {
// No results for this query
nopaque.toast("No results!");
return;
} else if (chunk === "CQI_CQP_ERROR_GENERAL") {
// Invalid query
queryResultsTableElement.classList.add("hide");
nopaque.toast("Invalid query!", "red");
return;
}
// Add chunk to the query result data stores
matches.push(...chunk["matches"]);
lookup["cpos"] = {...lookup["cpos"], ...chunk["cpos_lookup"]};
lookup["text"] = {...lookup["text"], ...chunk["text_lookup"]};
console.log("### Updated query result data stores ###");
console.log(matches);
console.log(lookup);
// Hide loading card and Unhide (show) result card
queryLoadingCardElement.classList.add("hide");
queryResultsCardElement.classList.remove("hide");
// write metadata query information into HTML elements
// like nr. of all matches in how many files etc.
// TODO: count_corpus_files müssen aus full results genommen werden.
let count_corpus_files = Object.keys(lookup["text"]).length;
queryResultsMetadataElement.innerHTML = chunk["total_nr_matches"] + " matches in " + count_corpus_files + " corpus files.";
queryResultsMetadataElement.appendChild(exportQueryResults);
var toAdd = [];
for (let [index, match] of chunk["matches"].entries()) {
lc_tokens = "";
for (cpos of match["lc"]) {
word = chunk["cpos_lookup"][cpos]["word"];
lc_tokens += " " + word;
}
// console.log(lc_tokens);
hit_tokens = "";
for (cpos of match["hit"]) {
word = chunk["cpos_lookup"][cpos]["word"];
hit_tokens += " " + word;
}
// console.log(hit_tokens);
rc_tokens = "";
for (cpos of match["rc"]) {
word = chunk["cpos_lookup"][cpos]["word"];
rc_tokens += " " + word;
}
// console.log(rc_tokens);
item = { titles: "test", lc: lc_tokens, hit: hit_tokens, rc: rc_tokens };
toAdd.push(item);
}
resultList.add(toAdd, function(toAdd) {console.log('All ' + toAdd.length + 'results were added!')});
});
// Function to download data to a file
function download(downloadElem, data, filename, type) {
var file = new Blob([data], {type: type});
if (window.navigator.msSaveOrOpenBlob) // IE10+
window.navigator.msSaveOrOpenBlob(file, filename);
else { // Others
var url = URL.createObjectURL(file);
downloadElem.href = url;
downloadElem.download = filename;
}
// create json filename for download
var today = new Date();
var currentDate = today.getUTCFullYear() + '-' + (today.getUTCMonth() +1) + '-' + today.getUTCDate();
var currentTime = today.getUTCHours() + ":" + today.getUTCMinutes() + ":" + today.getUTCSeconds();
var safeFilename = message['query'].replace(/[^a-z0-9_-]/gi, "_");
var resultFilename = "UTC-" + currentDate + "_" + currentTime + "_" + safeFilename + ".json";
// get a where download is served
var downloadResults = document.getElementById("download-results");
// stringify JSON object for json download
var dataStr = JSON.stringify(message, undefined, 2);
downloadResults.onclick = download(downloadResults,
dataStr,
resultFilename,
"text/json");
};
function addToolTipToTokenElement(tokenElement, token) {
M.Tooltip.init(tokenElement,
{"html": `<table>
<tr>
<th>Token information</th>
<th>Source information</th>
</tr>
<tr>
<td class="left-align">
Word: ${token["word"]}<br>
Lemma: ${token["lemma"]}<br>
POS: ${token["pos"]}<br>
Simple POS: ${token["simple_pos"]}<br>
NER: ${token["ner"]}
</td>
<td class="left-align">
Title: ${lookup["text"][token["text"]]["title"]}<br>
Author: ${lookup["text"][token["text"]]["author"]}<br>
Publishing year: ${lookup["text"][token["text"]]["publishing_year"]}
</td>
</tr>
</table>`,
"inDuration": 1500,
"margin": 15,
"position": "top",
"transitionMovement": 0});
}
nopaque.socket.on("match_context", function(message) {
console.log("### match_context ###");
console.log(message);
contextResultsElement.innerHTML = "<p>&nbsp;</p>";
document.getElementById("context-modal-loading").classList.add("hide");
document.getElementById("context-modal-ready").classList.remove("hide");
var sentenceElement;
var token;
var tokenElement;
lookup["cpos"] = {...lookup["cpos"], ...message["cpos_lookup"]};
lookup["s"] = message["context_s_cpos"];
lookup["text"] = {...lookup["text"], ...message["text_lookup"]};
for (let [key, value] of Object.entries(message['context_s_cpos'])) {
sentenceElement = document.createElement("p");
for (cpos of value) {
token = lookup["cpos"][cpos];
tokenElement = document.createElement("span");
tokenElement.classList.add("token");
if (message["match_cpos_list"].includes(cpos)) {
tokenElement.classList.add("bold");
}
tokenElement.dataset.cpos = cpos;
tokenElement.innerText = token["word"];
if (expertModeSwitchElement.checked) {
tokenElement.classList.add("chip");
addToolTipToTokenElement(tokenElement, token);
}
tokenElements.add(tokenElement);
sentenceElement.append(tokenElement);
sentenceElement.append(document.createTextNode(" "));
}
contextResultsElement.append(sentenceElement);
}
});
// collapsible display options
var elem = document.querySelector('.collapsible.expandable');
</script>
{% endblock %}