New visualizations for frequencies

This commit is contained in:
Inga Kirschnick 2023-07-03 11:06:43 +02:00
parent 6c31788402
commit beb157092e
7 changed files with 221 additions and 37 deletions

View File

@ -7,6 +7,8 @@ from app.decorators import content_negotiation
from app.models import Corpus, CorpusFollowerRole from app.models import Corpus, CorpusFollowerRole
from . import bp from . import bp
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
import nltk
from string import punctuation
@bp.route('/<hashid:corpus_id>', methods=['DELETE']) @bp.route('/<hashid:corpus_id>', methods=['DELETE'])
@ -56,6 +58,27 @@ def build_corpus(corpus_id):
} }
return response_data, 202 return response_data, 202
@bp.route('/stopwords')
@content_negotiation(produces='application/json')
def get_stopwords():
# data = request.json
# if not isinstance(data, dict):
# abort(400)
# language = data.get('language')
# if not isinstance(language, str):
# abort(400)
nltk.download('stopwords')
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
stopwords = {}
for language in languages:
stopwords[language] = nltk.corpus.stopwords.words(language)
stopwords['punctuation'] = list(punctuation) + ['', '|']
stopwords['user_stopwords'] = []
print(stopwords)
response_data = {
'stopwords': stopwords
}
return response_data, 202
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST']) # @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
# @corpus_follower_permission_required('MANAGE_FOLLOWERS') # @corpus_follower_permission_required('MANAGE_FOLLOWERS')

View File

@ -42,7 +42,6 @@ def job_log(job_id):
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file: with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
log = log_file.read() log = log_file.read()
response_data = { response_data = {
'message': '',
'jobLog': log 'jobLog': log
} }
return response_data, 200 return response_data, 200

View File

@ -1,6 +1,8 @@
class CorpusAnalysisApp { class CorpusAnalysisApp {
constructor(corpusId) { constructor(corpusId) {
this.data = {}; this.data = {
promises: {getStopwords: []}
};
// HTML elements // HTML elements
this.elements = { this.elements = {
@ -22,6 +24,49 @@ class CorpusAnalysisApp {
}; };
} }
// getStopwords(language) {
// if (language in this.data.promises.getStopwords) {
// console.log('Stopwords already loaded');
// return this.data.promises.getStopwords[language];
// }
// this.data.promises.getStopwords[language] = new Promise((resolve, reject) => {
// Requests.corpora.entity.getStopwords(language)
// .then((response) => {
// response.json()
// .then((json) => {
// let stopwords = json.stopwords;
// resolve(stopwords);
// })
// .catch((error) => {
// reject(error);
// });
// });
// });
// return this.data.promises.getStopwords[language];
// }
getStopwords() {
if (this.data.promises.getStopwords.length !== 0) {
console.log('Stopwords already loaded');
return this.data.promises.getStopwords;
}
this.data.promises.getStopwords = new Promise((resolve, reject) => {
Requests.corpora.entity.getStopwords()
.then((response) => {
response.json()
.then((json) => {
let stopwords = json.stopwords;
resolve(stopwords);
})
.catch((error) => {
reject(error);
});
});
});
return this.data.promises.getStopwords;
}
init() { init() {
this.disableActionElements(); this.disableActionElements();
this.elements.m.initModal.open(); this.elements.m.initModal.open();
@ -161,9 +206,28 @@ class CorpusAnalysisApp {
type: 'pie' type: 'pie'
} }
]; ];
let config = {responsive: true}; let graphLayout = {
showlegend: true,
height: 486,
margin: {
l: 10,
r: 10,
b: 10,
t: 10
},
legend: {
"orientation": "h",
font: {
size: 10
}
}
};
let config = {
responsive: true,
displaylogo: false
};
Plotly.newPlot(textProportionsGraphicElement, graphData, config); Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
} }
renderFrequenciesGraphic(corpusData) { renderFrequenciesGraphic(corpusData) {
@ -171,42 +235,106 @@ class CorpusAnalysisApp {
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
let texts = Object.entries(corpusData.s_attrs.text.lexicon); let texts = Object.entries(corpusData.s_attrs.text.lexicon);
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
let graphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML; frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
this.renderFrequenciesGraphic(corpusData); this.renderFrequenciesGraphic(corpusData);
}); });
graphModeButtons.forEach(graphModeButton => {
graphModeButton.addEventListener('click', (event) => {
graphModeButtons.forEach(btn => {
btn.classList.remove('disabled');
});
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
this.renderFrequenciesGraphic(corpusData);
});
});
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData); this.createFrequenciesGraphData(tokenCategory, texts, corpusData, graphtype)
let graphLayout = { .then(graphData => {
barmode: 'stack', let graphLayout = {
type: 'bar' barmode: graphtype === 'bar' ? 'stack' : '',
}; margin: {
let config = {responsive: true}; t: 20,
l: 50
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config); },
yaxis: {
showticklabels: graphtype === 'markers' ? false : true
},
};
let config = {
responsive: true,
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
displaylogo: false
};
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
});
} }
createFrequenciesGraphData(category, texts, corpusData) { createFrequenciesGraphData(category, texts, corpusData, graphtype) {
let graphData = []; return new Promise((resolve, reject) => {
let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5); this.getStopwords()
.then(stopwords => {
for (let item of sortedData) { this.renderStopwordSettingsModal(stopwords);
let data = { let stopwordList = [];
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), Object.values(stopwords).forEach(stopwordItems => {
y: texts.map(text => text[1].freqs[category][item[0]]), stopwordItems.forEach(stopword => {
name: corpusData.values.p_attrs[category][item[0]], stopwordList.push(stopword);
type: 'bar' });
}; });
graphData.push(data); let graphData = [];
} let filteredData = Object.entries(corpusData.corpus.freqs[category])
.sort((a, b) => b[1] - a[1])
return graphData; .filter(item => !stopwordList.includes(corpusData.values.p_attrs[category][item[0]].toLowerCase()))
.slice(0, 5);
if (graphtype !== 'markers') {
for (let item of filteredData) {
let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
y: texts.map(text => text[1].freqs[category][item[0]] || 0),
name: corpusData.values.p_attrs[category][item[0]],
type: graphtype
};
graphData.push(data);
}
} else {
for (let item of filteredData) {
let size = texts.map(text => text[1].freqs[category][item[0]] || 0);
let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
y: texts.map(text => corpusData.values.p_attrs[category][item[0]]),
name: corpusData.values.p_attrs[category][item[0]],
text: texts.map(text => `${corpusData.values.p_attrs[category][item[0]]}<br>${text[1].freqs[category][item[0]] || 0}`),
mode: 'markers',
marker: {
size: size,
// sizeref: 2.0 * Math.max(...size) / (80**2),
// sizemode: 'area',
sizeref: 0.2
}
};
graphData.push(data);
}
}
resolve(graphData);
})
.catch(error => {
reject(error);
});
});
} }
renderStopwordSettingsModal(stopwords) {
let stopwordInputField = document.querySelector('.stopword-input-field');
}
renderBoundsGraphic(corpusData) { renderBoundsGraphic(corpusData) {
let boundsGraphicElement = document.querySelector('#bounds-graphic'); let boundsGraphicElement = document.querySelector('#bounds-graphic');
@ -238,7 +366,11 @@ class CorpusAnalysisApp {
} }
}; };
let config = {responsive: true}; let config = {
responsive: true,
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
displaylogo: false
};
Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config); Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
} }

View File

@ -22,9 +22,11 @@ Requests.JSONfetch = (input, init={}) => {
response.json() response.json()
.then( .then(
(json) => { (json) => {
let message = json.message || json; let message = json.message;
let category = json.category || 'message'; let category = json.category || 'message';
app.flash(message, category); if (message) {
app.flash(message, category);
}
}, },
(error) => { (error) => {
app.flash(`[${response.status}]: ${response.statusText}`, 'error'); app.flash(`[${response.status}]: ${response.statusText}`, 'error');

View File

@ -31,6 +31,14 @@ Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => {
return Requests.JSONfetch(input, init); return Requests.JSONfetch(input, init);
}; };
Requests.corpora.entity.getStopwords = () => {
let input = `/corpora/stopwords`;
let init = {
method: 'GET'
};
return Requests.JSONfetch(input, init);
};
Requests.corpora.entity.isPublic = {}; Requests.corpora.entity.isPublic = {};
Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => { Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
@ -43,4 +51,3 @@ Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
}; };

View File

@ -98,19 +98,19 @@
</div> </div>
</div> </div>
<div class="row"> <div class="row">
<div class="col s6"> <div class="col s4">
<div class="card hoverable"> <div class="card hoverable">
<div class="card-content"> <div class="card-content">
<span class="card-title">Proportions</span> <span class="card-title">Proportions</span>
<p>of texts within the corpus</p> <p>of texts within the corpus</p>
<div id="text-proportions-graphic"></div> <div id="text-proportions-graphic" style="width:100"></div>
</div> </div>
</div> </div>
</div> </div>
<div class="col s6"> <div class="col s8">
<div class="card hoverable"> <div class="card hoverable">
<div class="card-content"> <div class="card-content">
<span class="card-title"><a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> Frequencies</span> <span class="card-title">Frequencies</span>
<ul id="frequencies-token-category-dropdown" class="dropdown-content"> <ul id="frequencies-token-category-dropdown" class="dropdown-content">
<li><a data-token-category="word">Word</a></li> <li><a data-token-category="word">Word</a></li>
<li><a data-token-category="lemma">Lemma</a></li> <li><a data-token-category="lemma">Lemma</a></li>
@ -119,6 +119,11 @@
</ul> </ul>
<p>within the texts of the 5 most frequent words in the corpus</p> <p>within the texts of the 5 most frequent words in the corpus</p>
<div id="frequencies-graphic"></div> <div id="frequencies-graphic"></div>
<a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a>
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">equalizer</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
<a class="btn-flat modal-trigger" href="#frequencies-stopwords-setting-modal"><i class="material-icons grey-text text-darken-2">settings</i></a>
</div> </div>
</div> </div>
</div> </div>
@ -161,6 +166,21 @@
</div> </div>
</div> </div>
<div class="modal" id="frequencies-stopwords-setting-modal">
<div class="modal-content">
<h4>Settings</h4>
<p>Here you can change the stopword-lists. Add your own stopwords or change the already existing below.</p>
<div class="chips chips-placeholder stopword-input-field"></div>
<div class="row">
<div class="input-field col s3">
<select class="stopword-language-selection"></select>
<label>Stopword language select</label>
</div>
</div>
</div>
</div>
{% for extension in extensions %} {% for extension in extensions %}
{{ extension.modals }} {{ extension.modals }}
{% endfor %} {% endfor %}

View File

@ -20,6 +20,7 @@ Flask-WTF
hiredis hiredis
MarkupSafe==2.0.1 MarkupSafe==2.0.1
marshmallow-sqlalchemy==0.29.0 marshmallow-sqlalchemy==0.29.0
nltk
psycopg2 psycopg2
PyJWT PyJWT
pyScss pyScss