From beb157092ea0d5c5da7adae3db079791056f2534 Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Mon, 3 Jul 2023 11:06:43 +0200 Subject: [PATCH] New visualizations for frequencies --- app/corpora/json_routes.py | 23 +++ app/jobs/json_routes.py | 1 - .../js/CorpusAnalysis/CorpusAnalysisApp.js | 190 +++++++++++++++--- app/static/js/Requests/Requests.js | 6 +- app/static/js/Requests/corpora/corpora.js | 9 +- app/templates/corpora/analysis.html.j2 | 28 ++- requirements.txt | 1 + 7 files changed, 221 insertions(+), 37 deletions(-) diff --git a/app/corpora/json_routes.py b/app/corpora/json_routes.py index 6005fc48..b6ef0110 100644 --- a/app/corpora/json_routes.py +++ b/app/corpora/json_routes.py @@ -7,6 +7,8 @@ from app.decorators import content_negotiation from app.models import Corpus, CorpusFollowerRole from . import bp from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required +import nltk +from string import punctuation @bp.route('/', methods=['DELETE']) @@ -56,6 +58,27 @@ def build_corpus(corpus_id): } return response_data, 202 +@bp.route('/stopwords') +@content_negotiation(produces='application/json') +def get_stopwords(): + # data = request.json + # if not isinstance(data, dict): + # abort(400) + # language = data.get('language') + # if not isinstance(language, str): + # abort(400) + nltk.download('stopwords') + languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"] + stopwords = {} + for language in languages: + stopwords[language] = nltk.corpus.stopwords.words(language) + stopwords['punctuation'] = list(punctuation) + ['—', '|'] + stopwords['user_stopwords'] = [] + print(stopwords) + response_data = { + 'stopwords': stopwords + } + return response_data, 202 # @bp.route('//generate-share-link', methods=['POST']) # @corpus_follower_permission_required('MANAGE_FOLLOWERS') diff --git a/app/jobs/json_routes.py b/app/jobs/json_routes.py index 7bedc726..9f1e1b2f 100644 --- a/app/jobs/json_routes.py +++ b/app/jobs/json_routes.py @@ -42,7 +42,6 @@ def job_log(job_id): with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file: log = log_file.read() response_data = { - 'message': '', 'jobLog': log } return response_data, 200 diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js index 5248b0ba..0be32a3f 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js @@ -1,6 +1,8 @@ class CorpusAnalysisApp { constructor(corpusId) { - this.data = {}; + this.data = { + promises: {getStopwords: []} + }; // HTML elements this.elements = { @@ -22,6 +24,49 @@ class CorpusAnalysisApp { }; } + // getStopwords(language) { + // if (language in this.data.promises.getStopwords) { + // console.log('Stopwords already loaded'); + // return this.data.promises.getStopwords[language]; + // } + // this.data.promises.getStopwords[language] = new Promise((resolve, reject) => { + // Requests.corpora.entity.getStopwords(language) + // .then((response) => { + // response.json() + // .then((json) => { + // let stopwords = json.stopwords; + // resolve(stopwords); + // }) + // .catch((error) => { + // reject(error); + // }); + // }); + // }); + // return this.data.promises.getStopwords[language]; + // } + + getStopwords() { + if (this.data.promises.getStopwords.length !== 0) { + console.log('Stopwords already loaded'); + return this.data.promises.getStopwords; + } + this.data.promises.getStopwords = new Promise((resolve, reject) => { + Requests.corpora.entity.getStopwords() + .then((response) => { + response.json() + .then((json) => { + let stopwords = json.stopwords; + resolve(stopwords); + }) + .catch((error) => { + reject(error); + }); + }); + }); + return this.data.promises.getStopwords; + } + + init() { this.disableActionElements(); this.elements.m.initModal.open(); @@ -161,9 +206,28 @@ class CorpusAnalysisApp { type: 'pie' } ]; - let config = {responsive: true}; + let graphLayout = { + showlegend: true, + height: 486, + margin: { + l: 10, + r: 10, + b: 10, + t: 10 + }, + legend: { + "orientation": "h", + font: { + size: 10 + } + } + }; + let config = { + responsive: true, + displaylogo: false + }; - Plotly.newPlot(textProportionsGraphicElement, graphData, config); + Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config); } renderFrequenciesGraphic(corpusData) { @@ -171,42 +235,106 @@ class CorpusAnalysisApp { let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); let texts = Object.entries(corpusData.s_attrs.text.lexicon); - - + let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType; + let graphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button'); + frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML; this.renderFrequenciesGraphic(corpusData); }); + + graphModeButtons.forEach(graphModeButton => { + graphModeButton.addEventListener('click', (event) => { + graphModeButtons.forEach(btn => { + btn.classList.remove('disabled'); + }); + event.target.closest('.frequencies-graph-mode-button').classList.add('disabled'); + this.renderFrequenciesGraphic(corpusData); + }); + }); let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); - let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData); - let graphLayout = { - barmode: 'stack', - type: 'bar' - }; - let config = {responsive: true}; - - Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config); + this.createFrequenciesGraphData(tokenCategory, texts, corpusData, graphtype) + .then(graphData => { + let graphLayout = { + barmode: graphtype === 'bar' ? 'stack' : '', + margin: { + t: 20, + l: 50 + }, + yaxis: { + showticklabels: graphtype === 'markers' ? false : true + }, + }; + let config = { + responsive: true, + modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'], + displaylogo: false + }; + Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config); + }); } - createFrequenciesGraphData(category, texts, corpusData) { - let graphData = []; - let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5); - - for (let item of sortedData) { - let data = { - x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), - y: texts.map(text => text[1].freqs[category][item[0]]), - name: corpusData.values.p_attrs[category][item[0]], - type: 'bar' - }; - graphData.push(data); - } - - return graphData; + createFrequenciesGraphData(category, texts, corpusData, graphtype) { + return new Promise((resolve, reject) => { + this.getStopwords() + .then(stopwords => { + this.renderStopwordSettingsModal(stopwords); + let stopwordList = []; + Object.values(stopwords).forEach(stopwordItems => { + stopwordItems.forEach(stopword => { + stopwordList.push(stopword); + }); + }); + let graphData = []; + let filteredData = Object.entries(corpusData.corpus.freqs[category]) + .sort((a, b) => b[1] - a[1]) + .filter(item => !stopwordList.includes(corpusData.values.p_attrs[category][item[0]].toLowerCase())) + .slice(0, 5); + if (graphtype !== 'markers') { + for (let item of filteredData) { + let data = { + x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), + y: texts.map(text => text[1].freqs[category][item[0]] || 0), + name: corpusData.values.p_attrs[category][item[0]], + type: graphtype + }; + graphData.push(data); + } + } else { + for (let item of filteredData) { + let size = texts.map(text => text[1].freqs[category][item[0]] || 0); + let data = { + x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), + y: texts.map(text => corpusData.values.p_attrs[category][item[0]]), + name: corpusData.values.p_attrs[category][item[0]], + text: texts.map(text => `${corpusData.values.p_attrs[category][item[0]]}
${text[1].freqs[category][item[0]] || 0}`), + mode: 'markers', + marker: { + size: size, + // sizeref: 2.0 * Math.max(...size) / (80**2), + // sizemode: 'area', + sizeref: 0.2 + } + }; + graphData.push(data); + } + } + resolve(graphData); + }) + .catch(error => { + reject(error); + }); + }); } + renderStopwordSettingsModal(stopwords) { + let stopwordInputField = document.querySelector('.stopword-input-field'); + } + + + renderBoundsGraphic(corpusData) { let boundsGraphicElement = document.querySelector('#bounds-graphic'); @@ -238,7 +366,11 @@ class CorpusAnalysisApp { } }; - let config = {responsive: true}; + let config = { + responsive: true, + modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'], + displaylogo: false + }; Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config); } diff --git a/app/static/js/Requests/Requests.js b/app/static/js/Requests/Requests.js index 0504d8a0..7282f0f7 100644 --- a/app/static/js/Requests/Requests.js +++ b/app/static/js/Requests/Requests.js @@ -22,9 +22,11 @@ Requests.JSONfetch = (input, init={}) => { response.json() .then( (json) => { - let message = json.message || json; + let message = json.message; let category = json.category || 'message'; - app.flash(message, category); + if (message) { + app.flash(message, category); + } }, (error) => { app.flash(`[${response.status}]: ${response.statusText}`, 'error'); diff --git a/app/static/js/Requests/corpora/corpora.js b/app/static/js/Requests/corpora/corpora.js index 55f6b899..3118a153 100644 --- a/app/static/js/Requests/corpora/corpora.js +++ b/app/static/js/Requests/corpora/corpora.js @@ -31,6 +31,14 @@ Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => { return Requests.JSONfetch(input, init); }; +Requests.corpora.entity.getStopwords = () => { + let input = `/corpora/stopwords`; + let init = { + method: 'GET' + }; + return Requests.JSONfetch(input, init); +}; + Requests.corpora.entity.isPublic = {}; Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => { @@ -43,4 +51,3 @@ Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => { }; - diff --git a/app/templates/corpora/analysis.html.j2 b/app/templates/corpora/analysis.html.j2 index fbcd0be6..b9a80c97 100644 --- a/app/templates/corpora/analysis.html.j2 +++ b/app/templates/corpora/analysis.html.j2 @@ -98,19 +98,19 @@
-
+
Proportions

of texts within the corpus

-
+
-
+
- Wordarrow_drop_down Frequencies + Frequencies

within the texts of the 5 most frequent words in the corpus

+ Wordarrow_drop_down + equalizer + show_chart + bubble_chart + settings
@@ -161,6 +166,21 @@
+ + + {% for extension in extensions %} {{ extension.modals }} {% endfor %} diff --git a/requirements.txt b/requirements.txt index 404b9e87..18593b1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,7 @@ Flask-WTF hiredis MarkupSafe==2.0.1 marshmallow-sqlalchemy==0.29.0 +nltk psycopg2 PyJWT pyScss