compatibility fixes and add reimplementations

This commit is contained in:
Patrick Jentsch 2020-03-28 19:29:19 +01:00
parent 970d7024e0
commit e3fde2d5c9
12 changed files with 499 additions and 44 deletions

View File

@ -3,3 +3,4 @@ from flask import Blueprint
corpora = Blueprint('corpora', __name__)
from . import events, views # noqa
from . import pj_events, pj_views # noqa

View File

@ -59,14 +59,14 @@ class Subcorpus:
else:
lc_rbound = max(0, (match_start - 1))
if lc_rbound != match_start:
lc_lbound = max(0, match_start - context)
lc_lbound = max(0, (match_start - 1 - context))
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
else:
cpos_list_lbound = match_start
rc_lbound = min((match_end + 1), (self.corpus.size - 1))
if rc_lbound != match_end:
rc_rbound = min((match_end + context),
rc_rbound = min((match_end + 1 + context),
(self.corpus.size - 1))
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound

View File

@ -72,9 +72,8 @@ def corpus_analysis_query(query):
logger.warning('Aborting due to status being "abort".')
break
else:
chunk = results.dump_values(context=context,
first_result=chunk_start,
num_results=chunk_size)
chunk = results.export(context=context, cutoff=chunk_size,
expand_lists=True, offset=chunk_start)
if (results.size == 0):
progress = 100
else:

View File

@ -15,7 +15,8 @@ class AddCorpusFileForm(FlaskForm):
journal = StringField('Journal', validators=[Length(0, 255)])
pages = StringField('Pages', validators=[Length(0, 255)])
publisher = StringField('Publisher', validators=[Length(0, 255)])
publishing_year = IntegerField('Publishing year', validators=[DataRequired()])
publishing_year = IntegerField('Publishing year',
validators=[DataRequired()])
school = StringField('School', validators=[Length(0, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 255)])
@ -43,7 +44,8 @@ class EditCorpusFileForm(FlaskForm):
journal = StringField('Journal', validators=[Length(0, 255)])
pages = StringField('Pages', validators=[Length(0, 255)])
publisher = StringField('Publisher', validators=[Length(0, 255)])
publishing_year = IntegerField('Publishing year', validators=[DataRequired()])
publishing_year = IntegerField('Publishing year',
validators=[DataRequired()])
school = StringField('School', validators=[Length(0, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 255)])

112
app/corpora/pj_events.py Normal file
View File

@ -0,0 +1,112 @@
from flask import current_app, request
from flask_login import current_user
from .cqi import CQiClient
from .. import db, socketio
from ..decorators import socketio_login_required
from ..events import connected_sessions
from ..models import Corpus, User
import math
'''
' A dictionary containing lists of, with corpus ids associated, Socket.IO
' session ids (sid). {<corpus_id>: [<sid>, ...], ...}
'''
pj_corpus_analysis_sessions = {}
'''
' A dictionary containing Socket.IO session id - CQi client pairs.
' {<sid>: CQiClient, ...}
'''
pj_corpus_analysis_clients = {}
@socketio.on('pj_corpus_analysis_init')
@socketio_login_required
def pj_init_corpus_analysis(corpus_id):
socketio.start_background_task(pj_corpus_analysis_session_handler,
current_app._get_current_object(),
corpus_id, current_user.id, request.sid)
@socketio.on('pj_corpus_analysis_query')
@socketio_login_required
def pj_corpus_analysis_query(query):
client = pj_corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 404, 'msg': 'Failed Dependency'}
socketio.emit('pj_corpus_analysis_query', response, room=request.sid)
return
corpus = client.corpora.get('CORPUS')
try:
results = corpus.query(query)
except Exception as e:
response = {'code': 1, 'msg': str(e)}
socketio.emit('pj_corpus_analysis_query', response, room=request.sid)
else:
response = {'code': 200, 'msg': 'OK',
'data': {'num_matches': results.size}}
socketio.emit('pj_corpus_analysis_query', response, room=request.sid)
chunk_size = 100
chunk_start = 0
context = 100
progress = 0
while chunk_start <= results.size:
chunk = results.export(context=context, offset=chunk_start,
cutoff=chunk_size)
if (results.size == 0):
progress = 100
else:
progress = ((chunk_start + chunk_size) / results.size) * 100
progress = min(100, int(math.ceil(progress)))
socketio.emit('pj_corpus_analysis_query_results',
{'chunk': chunk,
'progress': progress},
room=request.sid)
chunk_start += chunk_size
chunk_size = 250
def pj_corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
with app.app_context():
''' Setup analysis session '''
corpus = Corpus.query.get(corpus_id)
user = User.query.get(user_id)
if corpus is None:
response = {'code': 404, 'msg': 'Not Found'}
socketio.emit('pj_corpus_analysis_init', response, room=session_id)
return
elif not (corpus.creator == user or user.is_administrator()):
response = {'code': 403, 'msg': 'Forbidden'}
socketio.emit('pj_corpus_analysis_init', response, room=session_id)
return
while corpus.status != 'analysing':
db.session.refresh(corpus)
socketio.sleep(3)
client = CQiClient('pj_corpus_{}_analysis'.format(corpus_id))
try:
client.connect()
except Exception:
response = {'code': 500, 'msg': 'Internal Server Error'}
socketio.emit('pj_corpus_analysis_init', response, room=session_id)
return
pj_corpus_analysis_clients[session_id] = client
if corpus_id not in pj_corpus_analysis_sessions:
pj_corpus_analysis_sessions[corpus_id] = [session_id]
else:
pj_corpus_analysis_sessions[corpus_id].append(session_id)
response = {'code': 200, 'msg': 'OK'}
socketio.emit('pj_corpus_analysis_init', response, room=session_id)
''' Observe analysis session '''
while session_id in connected_sessions:
socketio.sleep(3)
''' Teardown analysis session '''
try:
client.disconnect()
except Exception:
pass
pj_corpus_analysis_clients.pop(session_id, None)
pj_corpus_analysis_sessions[corpus_id].remove(session_id)
if not pj_corpus_analysis_sessions[corpus_id]:
pj_corpus_analysis_sessions.pop(corpus_id, None)
corpus.status = 'stop analysis'
db.session.commit()

35
app/corpora/pj_forms.py Normal file
View File

@ -0,0 +1,35 @@
from flask_wtf import FlaskForm
from wtforms import BooleanField, StringField, SubmitField, SelectField
class PJQueryForm(FlaskForm):
query = StringField('Query')
submit = SubmitField('Send query')
class PJDisplayOptionsForm(FlaskForm):
expert_mode = BooleanField('Expert mode')
result_context = SelectField('Result context',
choices=[('', 'Choose your option'),
('10', '10'),
('20', '20'),
('30', '30'),
('40', '40'),
('50', '50'),
('60', '60'),
('70', '70'),
('80', '80'),
('90', '90'),
('100', '100')])
results_per_page = SelectField('Results per page',
choices=[('', 'Choose your option'),
('10', '10'),
('20', '20'),
('30', '30'),
('40', '40'),
('50', '50'),
('60', '60'),
('70', '70'),
('80', '80'),
('90', '90'),
('100', '100')])

25
app/corpora/pj_views.py Normal file
View File

@ -0,0 +1,25 @@
from flask import request, render_template
from flask_login import login_required
from . import corpora
from .pj_forms import PJDisplayOptionsForm, PJQueryForm
from .. import db
from ..models import Corpus
@corpora.route('/<int:corpus_id>/pj_analyse')
@login_required
def pj_analyse_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if corpus.status == 'prepared':
corpus.status = 'start analysis'
db.session.commit()
display_options_form = PJDisplayOptionsForm(
prefix='display-options-form',
result_context=request.args.get('context', 20),
results_per_page=request.args.get('results_per_page', 30))
query_form = PJQueryForm(prefix='query-form',
query=request.args.get('query'))
return render_template('corpora/pj_analyse_corpus.html.j2',
corpus_id=corpus_id,
display_options_form=display_options_form,
query_form=query_form, title='Corpus analysis')

View File

@ -0,0 +1,101 @@
class CorpusAnalysisClient {
constructor(corpusId, socket) {
this.callbacks = {};
this.corpusId = corpusId;
this.displays = {};
this.socket = socket;
socket.on("corpus_analysis_init", (response) => {
if (response.code === 200) {
console.log(`corpus_analysis_init: ${response.code} - ${response.msg}`);
if (this.callbacks.init) {this.callbacks.init(response.msg);}
if (this.displays.init) {this.displays.init.setVisibilityByStatus("success");}
} else {
if (this.displays.init) {
this.displays.init.errorContainer.innerHTML = `<p class="red-text"><i class="material-icons tiny">error</i> Error ${response.code}: ${response.msg}</p>`;
this.displays.init.setVisibilityByStatus("error");
}
console.error(`corpus_analysis_init: ${response.code} - ${response.msg}`);
}
});
socket.on("corpus_analysis_query", (response) => {
if (response.code === 200) {
console.log(`corpus_analysis_query: ${response.code} - ${response.msg}`);
if (this.callbacks.query) {this.callbacks.query(response.data);}
if (this.displays.query) {this.displays.query.setVisibilityByStatus("success");}
} else {
nopaque.flash("error", `Error ${response.code}: ${response.msg}`);
this.displays.query.errorContainer.innerHTML = `<p class="red-text"><i class="material-icons tiny">error</i> Error ${response.code}: ${response.msg}</p>`;
if (this.displays.query) {this.displays.query.setVisibilityByStatus("error");}
console.error(`corpus_analysis_query: ${response.code} - ${response.msg}`)
}
});
socket.on("corpus_analysis_query_results", (response) => {
console.log("corpus_analysis_query_results:")
console.log(response);
if (this.callbacks.query_results) {this.callbacks.query_results(response);}
});
}
init() {
if (this.displays.init) {
this.displays.init.errorContainer.innerHTML == "";
this.displays.init.setVisibilityByStatus("waiting");
}
this.socket.emit("corpus_analysis_init", this.corpusId);
}
query(query) {
if (this.displays.query) {
this.displays.query.errorContainer.innerHTML == "";
this.displays.query.setVisibilityByStatus("waiting");
}
nopaque.socket.emit("corpus_analysis_query", query);
}
setCallback(type, callback) {
this.callbacks[type] = callback;
}
setDisplay(type, display) {
this.displays[type] = display;
}
}
class CorpusAnalysisDisplay {
constructor(element) {
this.element = element;
this.errorContainer = element.querySelector(".error-container");
this.showOnError = element.querySelectorAll(".show-on-error");
this.showOnSuccess = element.querySelectorAll(".show-on-success");
this.showWhileWaiting = element.querySelectorAll(".show-while-waiting");
}
setVisibilityByStatus(status) {
switch (status) {
case "error":
for (let element of this.showOnError) {element.classList.remove("hide");}
for (let element of this.showOnSuccess) {element.classList.add("hide");}
for (let element of this.showWhileWaiting) {element.classList.add("hide");}
break;
case "success":
for (let element of this.showOnError) {element.classList.add("hide");}
for (let element of this.showOnSuccess) {element.classList.remove("hide");}
for (let element of this.showWhileWaiting) {element.classList.add("hide");}
break;
case "waiting":
for (let element of this.showOnError) {element.classList.add("hide");}
for (let element of this.showOnSuccess) {element.classList.add("hide");}
for (let element of this.showWhileWaiting) {element.classList.remove("hide");}
break;
default:
// Hide all
for (let element of this.showOnError) {element.classList.add("hide");}
for (let element of this.showOnSuccess) {element.classList.add("hide");}
for (let element of this.showWhileWaiting) {element.classList.add("hide");}
}
}
}

View File

@ -56,7 +56,7 @@ async function sendQuery(event) {
queryData = getQueryData(queryFormElement);
if (analysisStatus === "idle") {
nopaque.toast("Query has been sent!");
nopaque.flash("Query has been sent!");
nopaque.socket.emit("corpus_analysis_query", queryData.query);
helperSendQuery(queryData);
analysisStatus = checkAnalysisStatus(sessionId);
@ -100,7 +100,7 @@ function helperSendQuery(queryData) {
innerWindow: 8,
outerWindow: 1
}],
valueNames: ["titles", "lc", "hit", "rc", {data: ["index"]}],
valueNames: ["titles", "lc", "c", "rc", {data: ["index"]}],
item: `<span class="hidden"></span>`};
resultList = new ResultList('result-list', resultListOptions);
resultList.clear(); // empty list for new query
@ -132,7 +132,7 @@ function recieveResults(response) {
} else if (response["code"] === 1) {
queryResultsTableElement.classList.add("hide");
queryLoadingElement.classList.add("hide");
nopaque.toast("error", "Invalid query entered!");
nopaque.flash("error", "Invalid query entered!");
console.log("[ERROR] corpus_analysis_init");
console.log("Code:" + response["code"]);
return; // no further code execution of this code block
@ -173,7 +173,7 @@ function recieveResults(response) {
// check if query has any results
if (chunk["matches"].length === 0) {
queryResultsTableElement.classList.add("hide");
nopaque.toast("No results for this query!");
nopaque.flash("No results for this query!");
return;
}
@ -234,9 +234,9 @@ function activateInspect() {
//gets result cpos infos for one dataIndex to send back to the server
function inspect(dataIndex) {
console.log("Inspect!");
console.log(result["matches"][dataIndex]["hit"]);
console.log(result["matches"][dataIndex]["c"]);
contextModal.open();
nopaque.socket.emit("inspect_match", {"cpos": result["matches"][dataIndex]["hit"]});
nopaque.socket.emit("inspect_match", {"cpos": result["matches"][dataIndex]["c"]});
}
function showMatchContext(message) {
@ -282,7 +282,7 @@ function changeHitsPerPage(event) {
try {
resultList.page = event.target.value;
resultList.update();
nopaque.toast("Updated matches per page.")
nopaque.flash("Updated matches per page.")
} catch (e) {
console.log("resultList has no results right now. Live update of items per page is useless for now.");
}
@ -296,7 +296,7 @@ function changeContext(event) {
let array;
try {
if (event.type === "change") {
nopaque.toast("Updated context per match!");
nopaque.flash("Updated context per match!");
}
} catch (e) {
console.log(e);

View File

@ -145,7 +145,7 @@ class ResultList extends List {
matchNrElement = document.createElement("td");
matchNrElement.classList.add("match-nr");
matchRowElement.appendChild(hitCellElement);
for (cpos of values["hit"]) {
for (cpos of values["c"]) {
token = chunk["cpos_lookup"][cpos];
hitCellElement.insertAdjacentHTML("beforeend", `<span class="token" data-cpos="${cpos}">${token["word"]} </span>`);
// get text titles of every hit cpos token

View File

@ -258,11 +258,6 @@ server side -->
queryFinished = false;
analysisClientStatus = {};
nopaque.socket.on("connect", function() {
sessionId = nopaque.socket.id;
console.log("Clients current session id:", sessionId);
setAnalysisStatus("idle", sessionId);
});
// creates some models on DOMContentLoaded
document.addEventListener("DOMContentLoaded", function() {
@ -276,22 +271,38 @@ server side -->
{"dismissible": true});
M.Collapsible.init(elem, {accordion: false});
loadingModal.open();
nopaque.socket.on("connect", function() {
sessionId = nopaque.socket.id;
console.log("Clients current session id:", sessionId);
setAnalysisStatus("idle", sessionId);
});
// close loading modal if container for analysis has started
nopaque.socket.on("corpus_analysis_init", function(response) {
if (response.code === 201) {
loadingModal.close();
} else {
console.log("[ERROR] corpus_analysis_init");
console.log(response);
}
});
// recieves server side analysis status updates and sets those accordingly in
// analysisClientStatus
nopaque.socket.on("send_analysis_status_server", (response) => {
console.log("Recieved new analysis status from server:", response.status);
setAnalysisStatus(response.status);
});
// recieves results on "corpus_analysis_query" via socket.io
nopaque.socket.on("corpus_analysis_query", recieveResults);
// get context of one match if inspected via socket.io
nopaque.socket.on("match_context", showMatchContext);
nopaque.socket.emit("corpus_analysis_init", {{ corpus_id }});
});
// sets collapsible display options to expandable isntead of accordion
var elem = document.querySelector('.collapsible.expandable');
// close loading modal if container for analysis has started
nopaque.socket.on("corpus_analysis_init", function(response) {
if (response.code === 201) {
loadingModal.close();
} else {
console.log("[ERROR] corpus_analysis_init");
console.log(response);
}
});
// getting some HTML-elements to use/hide/remove/show or add some other
// elements to them.
// These elements will be used inside functions in nopaque.analyse_corpus.js
@ -304,19 +315,6 @@ server side -->
queryFormElement = document.getElementById("query-form");
queryFormElement.addEventListener("submit", sendQuery);
// recieves server side analysis status updates and sets those accordingly in
// analysisClientStatus
nopaque.socket.on("send_analysis_status_server", (response) => {
console.log("Recieved new analysis status from server:", response.status);
setAnalysisStatus(response.status);
});
// recieves results on "corpus_analysis_query" via socket.io
nopaque.socket.on("corpus_analysis_query", recieveResults);
// get context of one match if inspected via socket.io
nopaque.socket.on("match_context", showMatchContext);
// live update of hits per page if hits per page value is changed
hitsPerPageInputElement = document.getElementById("hits-per-page");
hitsPerPageInputElement.onchange = changeHitsPerPage;

View File

@ -0,0 +1,182 @@
{% extends "nopaque.html.j2" %}
{% set full_width = True %}
{% block page_content %}
<div class="col s12">
<ul class="collapsible expandable">
<li class="active hoverable">
<div class="collapsible-header">
<i class="material-icons">search</i>Query
</div>
<div class="collapsible-body">
<!-- Query form -->
<form id="query-form">
<div class="row">
<div class="col s12 m10">
<div class="input-field">
<i class="material-icons prefix">search</i>
{{ query_form.query() }}
{{ query_form.query.label }}
<span class="helper-text">
<a href="http://cwb.sourceforge.net/files/CQP_Tutorial/">
<i class="material-icons" style="font-size: inherit;">help</i>
CQP query language tutorial
</a>
</span>
</div>
</div>
<div class="col s12 m2">
<br class="hide-on-small-only">
{{ macros.submit_button(query_form.submit) }}
</div>
</div>
</form>
</div>
</li>
<li class="hoverable">
<div class="collapsible-header">
<i class="material-icons">settings</i>Display Options
</div>
<div class="collapsible-body">
<!-- Display options form -->
<form id="display-options-form">
<div class="row">
<div class="col s12 m6">
<div class="input-field">
<i class="material-icons prefix">format_list_numbered</i>
{{ display_options_form.results_per_page() }}
{{ display_options_form.results_per_page.label }}
</div>
</div>
<div class="col s12 m6">
<div class="input-field">
<i class="material-icons prefix">short_text</i>
{{ display_options_form.result_context() }}
{{ display_options_form.result_context.label }}
</div>
</div>
<div class="col s12">
<div class="switch">
<label>
{{ display_options_form.expert_mode.label.text }}
{{ display_options_form.expert_mode() }}
<span class="lever"></span>
</label>
</div>
</div>
</div>
</form>
</div>
</li>
</ul>
</div>
<div class="col s12" id="query-display">
<div class="card">
<div class="card-content">
<span class="card-title">Query Results</span>
<div class="error-container hide show-on-error"></div>
<div class="hide show-on-success">
<p>
<span id="received-match-num"></span> of <span id="total-match-num"></span> matches loaded.<br>
Matches occured in <span id="text-lookup-num"></span> corpus files.
</p>
<div class="progress" id="query-results-progress">
<div class="determinate" id="query-results-determinate"></div>
</div>
<ul class="pagination paginationTop"></ul>
<!-- Query results table -->
<table class="highlight responsive-table">
<thead>
<tr>
<th style="width: 2%">Nr.</th>
<th style="width: 3%">Title</th>
<th style="width: 25%">Left context</th>
<th style="width: 45%">Match</th>
<th style="width: 25%">Right Context</th>
</tr>
</thead>
<tbody class="list"></tbody>
</table>
<ul class="pagination paginationBottom"></ul>
</div>
</div>
</div>
</div>
<!-- Modals -->
<div class="modal no-autoinit" id="init-display">
<div class="modal-content">
<h4>Initializing your corpus analysis session...</h4>
<div class="error-container hide show-on-error"></div>
<div class="hide progress show-while-waiting">
<div class="indeterminate"></div>
</div>
</div>
</div>
<script src="{{ url_for('static', filename='js/nopaque.CorpusAnalysisClient.js') }}"></script>
<script>
var client = undefined;
var initDisplay = undefined;
var queryDisplay = undefined;
var initDisplayElement = document.getElementById("init-display");
var queryDisplayElement = document.getElementById("query-display");
var queryFormElement = document.getElementById("query-form");
var queryResultsDeterminateElement = document.getElementById("query-results-determinate");
var queryResultsProgressElement = document.getElementById("query-results-progress");
var receivedMatchNumElement = document.getElementById("received-match-num");
var textLookupNumElement = document.getElementById("text-lookup-num");
var totalMatchNumElement = document.getElementById("total-match-num");
var results = undefined;
document.addEventListener("DOMContentLoaded", () => {
var initModal = M.Modal.init(initDisplayElement, {dismissible: false});
// Init corpus analysis components
initDisplay = new CorpusAnalysisDisplay(initDisplayElement);
queryDisplay = new CorpusAnalysisDisplay(queryDisplayElement);
client = new CorpusAnalysisClient({{ corpus_id }}, nopaque.socket);
initModal.open();
client.setDisplay("init", initDisplay);
client.setCallback("init", () => {initModal.close();});
client.setDisplay("query", queryDisplay);
client.setCallback("query", (response) => {
// This is called when a query was successfull
results = {matches: [], cpos_lookup: {}, text_lookup: {}};
queryResultsDeterminateElement.style.width = "0%";
receivedMatchNumElement.innerText = "0";
textLookupNumElement.innerText = "0";
totalMatchNumElement.innerText = response.num_matches;
});
client.setCallback("query_results", (response) => {
// This is called when results are transmitted.
if (response.progress === 100) {
queryResultsProgressElement.classList.add("hide");
}
queryResultsDeterminateElement.style.width = `${response.progress}%`;
results.matches.push(...response.chunk.matches);
receivedMatchNumElement.innerText = `${results.matches.length}`;
Object.assign(results.cpos_lookup, response.chunk.cpos_lookup);
Object.assign(results.text_lookup, response.chunk.text_lookup);
textLookupNumElement.innerText = `${Object.keys(results.text_lookup).length}`;
});
// Trigger corpus analysis initialization on server side
client.init();
queryFormElement.addEventListener("submit", (e) => {
// Prevent page from reloading on submit
e.preventDefault();
// Gather query data
var queryFormData = new FormData(queryFormElement);
var query = queryFormData.get("query-form-query");
// Send query to server
client.query(query);
});
});
</script>
{% endblock %}