diff --git a/app/corpora/__init__.py b/app/corpora/__init__.py index cec49443..8fb4514b 100644 --- a/app/corpora/__init__.py +++ b/app/corpora/__init__.py @@ -3,3 +3,4 @@ from flask import Blueprint corpora = Blueprint('corpora', __name__) from . import events, views # noqa +from . import pj_events, pj_views # noqa diff --git a/app/corpora/cqi/models/subcorpora.py b/app/corpora/cqi/models/subcorpora.py index 32789bba..c34245d3 100644 --- a/app/corpora/cqi/models/subcorpora.py +++ b/app/corpora/cqi/models/subcorpora.py @@ -59,14 +59,14 @@ class Subcorpus: else: lc_rbound = max(0, (match_start - 1)) if lc_rbound != match_start: - lc_lbound = max(0, match_start - context) + lc_lbound = max(0, (match_start - 1 - context)) lc = (lc_lbound, lc_rbound) cpos_list_lbound = lc_lbound else: cpos_list_lbound = match_start rc_lbound = min((match_end + 1), (self.corpus.size - 1)) if rc_lbound != match_end: - rc_rbound = min((match_end + context), + rc_rbound = min((match_end + 1 + context), (self.corpus.size - 1)) rc = (rc_lbound, rc_rbound) cpos_list_rbound = rc_rbound diff --git a/app/corpora/events.py b/app/corpora/events.py index ee7093c0..bc0ebc40 100644 --- a/app/corpora/events.py +++ b/app/corpora/events.py @@ -72,9 +72,8 @@ def corpus_analysis_query(query): logger.warning('Aborting due to status being "abort".') break else: - chunk = results.dump_values(context=context, - first_result=chunk_start, - num_results=chunk_size) + chunk = results.export(context=context, cutoff=chunk_size, + expand_lists=True, offset=chunk_start) if (results.size == 0): progress = 100 else: diff --git a/app/corpora/forms.py b/app/corpora/forms.py index 801f0e39..6c7119a8 100644 --- a/app/corpora/forms.py +++ b/app/corpora/forms.py @@ -15,7 +15,8 @@ class AddCorpusFileForm(FlaskForm): journal = StringField('Journal', validators=[Length(0, 255)]) pages = StringField('Pages', validators=[Length(0, 255)]) publisher = StringField('Publisher', validators=[Length(0, 255)]) - publishing_year = IntegerField('Publishing year', validators=[DataRequired()]) + publishing_year = IntegerField('Publishing year', + validators=[DataRequired()]) school = StringField('School', validators=[Length(0, 255)]) submit = SubmitField() title = StringField('Title', validators=[DataRequired(), Length(1, 255)]) @@ -43,7 +44,8 @@ class EditCorpusFileForm(FlaskForm): journal = StringField('Journal', validators=[Length(0, 255)]) pages = StringField('Pages', validators=[Length(0, 255)]) publisher = StringField('Publisher', validators=[Length(0, 255)]) - publishing_year = IntegerField('Publishing year', validators=[DataRequired()]) + publishing_year = IntegerField('Publishing year', + validators=[DataRequired()]) school = StringField('School', validators=[Length(0, 255)]) submit = SubmitField() title = StringField('Title', validators=[DataRequired(), Length(1, 255)]) diff --git a/app/corpora/pj_events.py b/app/corpora/pj_events.py new file mode 100644 index 00000000..e57e5254 --- /dev/null +++ b/app/corpora/pj_events.py @@ -0,0 +1,112 @@ +from flask import current_app, request +from flask_login import current_user +from .cqi import CQiClient +from .. import db, socketio +from ..decorators import socketio_login_required +from ..events import connected_sessions +from ..models import Corpus, User +import math + + +''' +' A dictionary containing lists of, with corpus ids associated, Socket.IO +' session ids (sid). {: [, ...], ...} +''' +pj_corpus_analysis_sessions = {} +''' +' A dictionary containing Socket.IO session id - CQi client pairs. +' {: CQiClient, ...} +''' +pj_corpus_analysis_clients = {} + + +@socketio.on('pj_corpus_analysis_init') +@socketio_login_required +def pj_init_corpus_analysis(corpus_id): + socketio.start_background_task(pj_corpus_analysis_session_handler, + current_app._get_current_object(), + corpus_id, current_user.id, request.sid) + + +@socketio.on('pj_corpus_analysis_query') +@socketio_login_required +def pj_corpus_analysis_query(query): + client = pj_corpus_analysis_clients.get(request.sid) + if client is None: + response = {'code': 404, 'msg': 'Failed Dependency'} + socketio.emit('pj_corpus_analysis_query', response, room=request.sid) + return + corpus = client.corpora.get('CORPUS') + try: + results = corpus.query(query) + except Exception as e: + response = {'code': 1, 'msg': str(e)} + socketio.emit('pj_corpus_analysis_query', response, room=request.sid) + else: + response = {'code': 200, 'msg': 'OK', + 'data': {'num_matches': results.size}} + socketio.emit('pj_corpus_analysis_query', response, room=request.sid) + chunk_size = 100 + chunk_start = 0 + context = 100 + progress = 0 + while chunk_start <= results.size: + chunk = results.export(context=context, offset=chunk_start, + cutoff=chunk_size) + if (results.size == 0): + progress = 100 + else: + progress = ((chunk_start + chunk_size) / results.size) * 100 + progress = min(100, int(math.ceil(progress))) + socketio.emit('pj_corpus_analysis_query_results', + {'chunk': chunk, + 'progress': progress}, + room=request.sid) + chunk_start += chunk_size + chunk_size = 250 + + +def pj_corpus_analysis_session_handler(app, corpus_id, user_id, session_id): + with app.app_context(): + ''' Setup analysis session ''' + corpus = Corpus.query.get(corpus_id) + user = User.query.get(user_id) + if corpus is None: + response = {'code': 404, 'msg': 'Not Found'} + socketio.emit('pj_corpus_analysis_init', response, room=session_id) + return + elif not (corpus.creator == user or user.is_administrator()): + response = {'code': 403, 'msg': 'Forbidden'} + socketio.emit('pj_corpus_analysis_init', response, room=session_id) + return + while corpus.status != 'analysing': + db.session.refresh(corpus) + socketio.sleep(3) + client = CQiClient('pj_corpus_{}_analysis'.format(corpus_id)) + try: + client.connect() + except Exception: + response = {'code': 500, 'msg': 'Internal Server Error'} + socketio.emit('pj_corpus_analysis_init', response, room=session_id) + return + pj_corpus_analysis_clients[session_id] = client + if corpus_id not in pj_corpus_analysis_sessions: + pj_corpus_analysis_sessions[corpus_id] = [session_id] + else: + pj_corpus_analysis_sessions[corpus_id].append(session_id) + response = {'code': 200, 'msg': 'OK'} + socketio.emit('pj_corpus_analysis_init', response, room=session_id) + ''' Observe analysis session ''' + while session_id in connected_sessions: + socketio.sleep(3) + ''' Teardown analysis session ''' + try: + client.disconnect() + except Exception: + pass + pj_corpus_analysis_clients.pop(session_id, None) + pj_corpus_analysis_sessions[corpus_id].remove(session_id) + if not pj_corpus_analysis_sessions[corpus_id]: + pj_corpus_analysis_sessions.pop(corpus_id, None) + corpus.status = 'stop analysis' + db.session.commit() diff --git a/app/corpora/pj_forms.py b/app/corpora/pj_forms.py new file mode 100644 index 00000000..16d8e1cc --- /dev/null +++ b/app/corpora/pj_forms.py @@ -0,0 +1,35 @@ +from flask_wtf import FlaskForm +from wtforms import BooleanField, StringField, SubmitField, SelectField + + +class PJQueryForm(FlaskForm): + query = StringField('Query') + submit = SubmitField('Send query') + + +class PJDisplayOptionsForm(FlaskForm): + expert_mode = BooleanField('Expert mode') + result_context = SelectField('Result context', + choices=[('', 'Choose your option'), + ('10', '10'), + ('20', '20'), + ('30', '30'), + ('40', '40'), + ('50', '50'), + ('60', '60'), + ('70', '70'), + ('80', '80'), + ('90', '90'), + ('100', '100')]) + results_per_page = SelectField('Results per page', + choices=[('', 'Choose your option'), + ('10', '10'), + ('20', '20'), + ('30', '30'), + ('40', '40'), + ('50', '50'), + ('60', '60'), + ('70', '70'), + ('80', '80'), + ('90', '90'), + ('100', '100')]) diff --git a/app/corpora/pj_views.py b/app/corpora/pj_views.py new file mode 100644 index 00000000..02ffff70 --- /dev/null +++ b/app/corpora/pj_views.py @@ -0,0 +1,25 @@ +from flask import request, render_template +from flask_login import login_required +from . import corpora +from .pj_forms import PJDisplayOptionsForm, PJQueryForm +from .. import db +from ..models import Corpus + + +@corpora.route('//pj_analyse') +@login_required +def pj_analyse_corpus(corpus_id): + corpus = Corpus.query.get_or_404(corpus_id) + if corpus.status == 'prepared': + corpus.status = 'start analysis' + db.session.commit() + display_options_form = PJDisplayOptionsForm( + prefix='display-options-form', + result_context=request.args.get('context', 20), + results_per_page=request.args.get('results_per_page', 30)) + query_form = PJQueryForm(prefix='query-form', + query=request.args.get('query')) + return render_template('corpora/pj_analyse_corpus.html.j2', + corpus_id=corpus_id, + display_options_form=display_options_form, + query_form=query_form, title='Corpus analysis') diff --git a/app/static/js/nopaque.CorpusAnalysisClient.js b/app/static/js/nopaque.CorpusAnalysisClient.js new file mode 100644 index 00000000..b4045eec --- /dev/null +++ b/app/static/js/nopaque.CorpusAnalysisClient.js @@ -0,0 +1,101 @@ +class CorpusAnalysisClient { + constructor(corpusId, socket) { + this.callbacks = {}; + this.corpusId = corpusId; + this.displays = {}; + this.socket = socket; + + socket.on("corpus_analysis_init", (response) => { + if (response.code === 200) { + console.log(`corpus_analysis_init: ${response.code} - ${response.msg}`); + if (this.callbacks.init) {this.callbacks.init(response.msg);} + if (this.displays.init) {this.displays.init.setVisibilityByStatus("success");} + } else { + if (this.displays.init) { + this.displays.init.errorContainer.innerHTML = `

error Error ${response.code}: ${response.msg}

`; + this.displays.init.setVisibilityByStatus("error"); + } + console.error(`corpus_analysis_init: ${response.code} - ${response.msg}`); + } + }); + + socket.on("corpus_analysis_query", (response) => { + if (response.code === 200) { + console.log(`corpus_analysis_query: ${response.code} - ${response.msg}`); + if (this.callbacks.query) {this.callbacks.query(response.data);} + if (this.displays.query) {this.displays.query.setVisibilityByStatus("success");} + } else { + nopaque.flash("error", `Error ${response.code}: ${response.msg}`); + this.displays.query.errorContainer.innerHTML = `

error Error ${response.code}: ${response.msg}

`; + if (this.displays.query) {this.displays.query.setVisibilityByStatus("error");} + console.error(`corpus_analysis_query: ${response.code} - ${response.msg}`) + } + }); + + socket.on("corpus_analysis_query_results", (response) => { + console.log("corpus_analysis_query_results:") + console.log(response); + if (this.callbacks.query_results) {this.callbacks.query_results(response);} + }); + } + + init() { + if (this.displays.init) { + this.displays.init.errorContainer.innerHTML == ""; + this.displays.init.setVisibilityByStatus("waiting"); + } + this.socket.emit("corpus_analysis_init", this.corpusId); + } + + query(query) { + if (this.displays.query) { + this.displays.query.errorContainer.innerHTML == ""; + this.displays.query.setVisibilityByStatus("waiting"); + } + nopaque.socket.emit("corpus_analysis_query", query); + } + + setCallback(type, callback) { + this.callbacks[type] = callback; + } + + setDisplay(type, display) { + this.displays[type] = display; + } +} + + +class CorpusAnalysisDisplay { + constructor(element) { + this.element = element; + this.errorContainer = element.querySelector(".error-container"); + this.showOnError = element.querySelectorAll(".show-on-error"); + this.showOnSuccess = element.querySelectorAll(".show-on-success"); + this.showWhileWaiting = element.querySelectorAll(".show-while-waiting"); + } + + setVisibilityByStatus(status) { + switch (status) { + case "error": + for (let element of this.showOnError) {element.classList.remove("hide");} + for (let element of this.showOnSuccess) {element.classList.add("hide");} + for (let element of this.showWhileWaiting) {element.classList.add("hide");} + break; + case "success": + for (let element of this.showOnError) {element.classList.add("hide");} + for (let element of this.showOnSuccess) {element.classList.remove("hide");} + for (let element of this.showWhileWaiting) {element.classList.add("hide");} + break; + case "waiting": + for (let element of this.showOnError) {element.classList.add("hide");} + for (let element of this.showOnSuccess) {element.classList.add("hide");} + for (let element of this.showWhileWaiting) {element.classList.remove("hide");} + break; + default: + // Hide all + for (let element of this.showOnError) {element.classList.add("hide");} + for (let element of this.showOnSuccess) {element.classList.add("hide");} + for (let element of this.showWhileWaiting) {element.classList.add("hide");} + } + } +} diff --git a/app/static/js/nopaque.analyse_corpus.js b/app/static/js/nopaque.analyse_corpus.js index 18058c9a..86dd701f 100644 --- a/app/static/js/nopaque.analyse_corpus.js +++ b/app/static/js/nopaque.analyse_corpus.js @@ -56,7 +56,7 @@ async function sendQuery(event) { queryData = getQueryData(queryFormElement); if (analysisStatus === "idle") { - nopaque.toast("Query has been sent!"); + nopaque.flash("Query has been sent!"); nopaque.socket.emit("corpus_analysis_query", queryData.query); helperSendQuery(queryData); analysisStatus = checkAnalysisStatus(sessionId); @@ -100,7 +100,7 @@ function helperSendQuery(queryData) { innerWindow: 8, outerWindow: 1 }], - valueNames: ["titles", "lc", "hit", "rc", {data: ["index"]}], + valueNames: ["titles", "lc", "c", "rc", {data: ["index"]}], item: ``}; resultList = new ResultList('result-list', resultListOptions); resultList.clear(); // empty list for new query @@ -132,7 +132,7 @@ function recieveResults(response) { } else if (response["code"] === 1) { queryResultsTableElement.classList.add("hide"); queryLoadingElement.classList.add("hide"); - nopaque.toast("error", "Invalid query entered!"); + nopaque.flash("error", "Invalid query entered!"); console.log("[ERROR] corpus_analysis_init"); console.log("Code:" + response["code"]); return; // no further code execution of this code block @@ -173,7 +173,7 @@ function recieveResults(response) { // check if query has any results if (chunk["matches"].length === 0) { queryResultsTableElement.classList.add("hide"); - nopaque.toast("No results for this query!"); + nopaque.flash("No results for this query!"); return; } @@ -234,9 +234,9 @@ function activateInspect() { //gets result cpos infos for one dataIndex to send back to the server function inspect(dataIndex) { console.log("Inspect!"); - console.log(result["matches"][dataIndex]["hit"]); + console.log(result["matches"][dataIndex]["c"]); contextModal.open(); - nopaque.socket.emit("inspect_match", {"cpos": result["matches"][dataIndex]["hit"]}); + nopaque.socket.emit("inspect_match", {"cpos": result["matches"][dataIndex]["c"]}); } function showMatchContext(message) { @@ -282,7 +282,7 @@ function changeHitsPerPage(event) { try { resultList.page = event.target.value; resultList.update(); - nopaque.toast("Updated matches per page.") + nopaque.flash("Updated matches per page.") } catch (e) { console.log("resultList has no results right now. Live update of items per page is useless for now."); } @@ -296,7 +296,7 @@ function changeContext(event) { let array; try { if (event.type === "change") { - nopaque.toast("Updated context per match!"); + nopaque.flash("Updated context per match!"); } } catch (e) { console.log(e); diff --git a/app/static/js/nopaque.lists.js b/app/static/js/nopaque.lists.js index ff29ec71..74c06c31 100644 --- a/app/static/js/nopaque.lists.js +++ b/app/static/js/nopaque.lists.js @@ -145,7 +145,7 @@ class ResultList extends List { matchNrElement = document.createElement("td"); matchNrElement.classList.add("match-nr"); matchRowElement.appendChild(hitCellElement); - for (cpos of values["hit"]) { + for (cpos of values["c"]) { token = chunk["cpos_lookup"][cpos]; hitCellElement.insertAdjacentHTML("beforeend", `${token["word"]} `); // get text titles of every hit cpos token diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2 index af6efe66..7f95bbe9 100644 --- a/app/templates/corpora/analyse_corpus.html.j2 +++ b/app/templates/corpora/analyse_corpus.html.j2 @@ -258,11 +258,6 @@ server side --> queryFinished = false; analysisClientStatus = {}; - nopaque.socket.on("connect", function() { - sessionId = nopaque.socket.id; - console.log("Clients current session id:", sessionId); - setAnalysisStatus("idle", sessionId); - }); // creates some models on DOMContentLoaded document.addEventListener("DOMContentLoaded", function() { @@ -276,22 +271,38 @@ server side --> {"dismissible": true}); M.Collapsible.init(elem, {accordion: false}); loadingModal.open(); + nopaque.socket.on("connect", function() { + sessionId = nopaque.socket.id; + console.log("Clients current session id:", sessionId); + setAnalysisStatus("idle", sessionId); + }); + // close loading modal if container for analysis has started + nopaque.socket.on("corpus_analysis_init", function(response) { + if (response.code === 201) { + loadingModal.close(); + } else { + console.log("[ERROR] corpus_analysis_init"); + console.log(response); + } + }); + // recieves server side analysis status updates and sets those accordingly in + // analysisClientStatus + nopaque.socket.on("send_analysis_status_server", (response) => { + console.log("Recieved new analysis status from server:", response.status); + setAnalysisStatus(response.status); + }); + + // recieves results on "corpus_analysis_query" via socket.io + nopaque.socket.on("corpus_analysis_query", recieveResults); + + // get context of one match if inspected via socket.io + nopaque.socket.on("match_context", showMatchContext); nopaque.socket.emit("corpus_analysis_init", {{ corpus_id }}); }); // sets collapsible display options to expandable isntead of accordion var elem = document.querySelector('.collapsible.expandable'); - // close loading modal if container for analysis has started - nopaque.socket.on("corpus_analysis_init", function(response) { - if (response.code === 201) { - loadingModal.close(); - } else { - console.log("[ERROR] corpus_analysis_init"); - console.log(response); - } - }); - // getting some HTML-elements to use/hide/remove/show or add some other // elements to them. // These elements will be used inside functions in nopaque.analyse_corpus.js @@ -304,19 +315,6 @@ server side --> queryFormElement = document.getElementById("query-form"); queryFormElement.addEventListener("submit", sendQuery); - // recieves server side analysis status updates and sets those accordingly in - // analysisClientStatus - nopaque.socket.on("send_analysis_status_server", (response) => { - console.log("Recieved new analysis status from server:", response.status); - setAnalysisStatus(response.status); - }); - - // recieves results on "corpus_analysis_query" via socket.io - nopaque.socket.on("corpus_analysis_query", recieveResults); - - // get context of one match if inspected via socket.io - nopaque.socket.on("match_context", showMatchContext); - // live update of hits per page if hits per page value is changed hitsPerPageInputElement = document.getElementById("hits-per-page"); hitsPerPageInputElement.onchange = changeHitsPerPage; diff --git a/app/templates/corpora/pj_analyse_corpus.html.j2 b/app/templates/corpora/pj_analyse_corpus.html.j2 new file mode 100644 index 00000000..c6d5df1d --- /dev/null +++ b/app/templates/corpora/pj_analyse_corpus.html.j2 @@ -0,0 +1,182 @@ +{% extends "nopaque.html.j2" %} + +{% set full_width = True %} + +{% block page_content %} +
+ +
+ +
+
+
+ Query Results +
+
+

+ of matches loaded.
+ Matches occured in corpus files. +

+
+
+
+
    + + + + + + + + + + + + +
    Nr.TitleLeft contextMatchRight Context
    +
      +
      +
      +
      +
      + + + + + + + + +{% endblock %}