mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-24 16:40:35 +00:00
New visualizations for frequencies
This commit is contained in:
parent
6c31788402
commit
beb157092e
@ -7,6 +7,8 @@ from app.decorators import content_negotiation
|
||||
from app.models import Corpus, CorpusFollowerRole
|
||||
from . import bp
|
||||
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
|
||||
import nltk
|
||||
from string import punctuation
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
|
||||
@ -56,6 +58,27 @@ def build_corpus(corpus_id):
|
||||
}
|
||||
return response_data, 202
|
||||
|
||||
@bp.route('/stopwords')
|
||||
@content_negotiation(produces='application/json')
|
||||
def get_stopwords():
|
||||
# data = request.json
|
||||
# if not isinstance(data, dict):
|
||||
# abort(400)
|
||||
# language = data.get('language')
|
||||
# if not isinstance(language, str):
|
||||
# abort(400)
|
||||
nltk.download('stopwords')
|
||||
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
|
||||
stopwords = {}
|
||||
for language in languages:
|
||||
stopwords[language] = nltk.corpus.stopwords.words(language)
|
||||
stopwords['punctuation'] = list(punctuation) + ['—', '|']
|
||||
stopwords['user_stopwords'] = []
|
||||
print(stopwords)
|
||||
response_data = {
|
||||
'stopwords': stopwords
|
||||
}
|
||||
return response_data, 202
|
||||
|
||||
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
|
||||
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||
|
@ -42,7 +42,6 @@ def job_log(job_id):
|
||||
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
|
||||
log = log_file.read()
|
||||
response_data = {
|
||||
'message': '',
|
||||
'jobLog': log
|
||||
}
|
||||
return response_data, 200
|
||||
|
@ -1,6 +1,8 @@
|
||||
class CorpusAnalysisApp {
|
||||
constructor(corpusId) {
|
||||
this.data = {};
|
||||
this.data = {
|
||||
promises: {getStopwords: []}
|
||||
};
|
||||
|
||||
// HTML elements
|
||||
this.elements = {
|
||||
@ -22,6 +24,49 @@ class CorpusAnalysisApp {
|
||||
};
|
||||
}
|
||||
|
||||
// getStopwords(language) {
|
||||
// if (language in this.data.promises.getStopwords) {
|
||||
// console.log('Stopwords already loaded');
|
||||
// return this.data.promises.getStopwords[language];
|
||||
// }
|
||||
// this.data.promises.getStopwords[language] = new Promise((resolve, reject) => {
|
||||
// Requests.corpora.entity.getStopwords(language)
|
||||
// .then((response) => {
|
||||
// response.json()
|
||||
// .then((json) => {
|
||||
// let stopwords = json.stopwords;
|
||||
// resolve(stopwords);
|
||||
// })
|
||||
// .catch((error) => {
|
||||
// reject(error);
|
||||
// });
|
||||
// });
|
||||
// });
|
||||
// return this.data.promises.getStopwords[language];
|
||||
// }
|
||||
|
||||
getStopwords() {
|
||||
if (this.data.promises.getStopwords.length !== 0) {
|
||||
console.log('Stopwords already loaded');
|
||||
return this.data.promises.getStopwords;
|
||||
}
|
||||
this.data.promises.getStopwords = new Promise((resolve, reject) => {
|
||||
Requests.corpora.entity.getStopwords()
|
||||
.then((response) => {
|
||||
response.json()
|
||||
.then((json) => {
|
||||
let stopwords = json.stopwords;
|
||||
resolve(stopwords);
|
||||
})
|
||||
.catch((error) => {
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
});
|
||||
return this.data.promises.getStopwords;
|
||||
}
|
||||
|
||||
|
||||
init() {
|
||||
this.disableActionElements();
|
||||
this.elements.m.initModal.open();
|
||||
@ -161,9 +206,28 @@ class CorpusAnalysisApp {
|
||||
type: 'pie'
|
||||
}
|
||||
];
|
||||
let config = {responsive: true};
|
||||
let graphLayout = {
|
||||
showlegend: true,
|
||||
height: 486,
|
||||
margin: {
|
||||
l: 10,
|
||||
r: 10,
|
||||
b: 10,
|
||||
t: 10
|
||||
},
|
||||
legend: {
|
||||
"orientation": "h",
|
||||
font: {
|
||||
size: 10
|
||||
}
|
||||
}
|
||||
};
|
||||
let config = {
|
||||
responsive: true,
|
||||
displaylogo: false
|
||||
};
|
||||
|
||||
Plotly.newPlot(textProportionsGraphicElement, graphData, config);
|
||||
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
|
||||
}
|
||||
|
||||
renderFrequenciesGraphic(corpusData) {
|
||||
@ -171,42 +235,106 @@ class CorpusAnalysisApp {
|
||||
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
|
||||
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
|
||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
||||
|
||||
|
||||
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
|
||||
let graphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
|
||||
|
||||
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
|
||||
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
|
||||
this.renderFrequenciesGraphic(corpusData);
|
||||
});
|
||||
|
||||
graphModeButtons.forEach(graphModeButton => {
|
||||
graphModeButton.addEventListener('click', (event) => {
|
||||
graphModeButtons.forEach(btn => {
|
||||
btn.classList.remove('disabled');
|
||||
});
|
||||
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
|
||||
this.renderFrequenciesGraphic(corpusData);
|
||||
});
|
||||
});
|
||||
|
||||
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
|
||||
|
||||
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData);
|
||||
let graphLayout = {
|
||||
barmode: 'stack',
|
||||
type: 'bar'
|
||||
};
|
||||
let config = {responsive: true};
|
||||
|
||||
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
|
||||
this.createFrequenciesGraphData(tokenCategory, texts, corpusData, graphtype)
|
||||
.then(graphData => {
|
||||
let graphLayout = {
|
||||
barmode: graphtype === 'bar' ? 'stack' : '',
|
||||
margin: {
|
||||
t: 20,
|
||||
l: 50
|
||||
},
|
||||
yaxis: {
|
||||
showticklabels: graphtype === 'markers' ? false : true
|
||||
},
|
||||
};
|
||||
let config = {
|
||||
responsive: true,
|
||||
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
|
||||
displaylogo: false
|
||||
};
|
||||
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
|
||||
});
|
||||
}
|
||||
|
||||
createFrequenciesGraphData(category, texts, corpusData) {
|
||||
let graphData = [];
|
||||
let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
|
||||
|
||||
for (let item of sortedData) {
|
||||
let data = {
|
||||
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
y: texts.map(text => text[1].freqs[category][item[0]]),
|
||||
name: corpusData.values.p_attrs[category][item[0]],
|
||||
type: 'bar'
|
||||
};
|
||||
graphData.push(data);
|
||||
}
|
||||
|
||||
return graphData;
|
||||
createFrequenciesGraphData(category, texts, corpusData, graphtype) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.getStopwords()
|
||||
.then(stopwords => {
|
||||
this.renderStopwordSettingsModal(stopwords);
|
||||
let stopwordList = [];
|
||||
Object.values(stopwords).forEach(stopwordItems => {
|
||||
stopwordItems.forEach(stopword => {
|
||||
stopwordList.push(stopword);
|
||||
});
|
||||
});
|
||||
let graphData = [];
|
||||
let filteredData = Object.entries(corpusData.corpus.freqs[category])
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.filter(item => !stopwordList.includes(corpusData.values.p_attrs[category][item[0]].toLowerCase()))
|
||||
.slice(0, 5);
|
||||
if (graphtype !== 'markers') {
|
||||
for (let item of filteredData) {
|
||||
let data = {
|
||||
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
y: texts.map(text => text[1].freqs[category][item[0]] || 0),
|
||||
name: corpusData.values.p_attrs[category][item[0]],
|
||||
type: graphtype
|
||||
};
|
||||
graphData.push(data);
|
||||
}
|
||||
} else {
|
||||
for (let item of filteredData) {
|
||||
let size = texts.map(text => text[1].freqs[category][item[0]] || 0);
|
||||
let data = {
|
||||
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
y: texts.map(text => corpusData.values.p_attrs[category][item[0]]),
|
||||
name: corpusData.values.p_attrs[category][item[0]],
|
||||
text: texts.map(text => `${corpusData.values.p_attrs[category][item[0]]}<br>${text[1].freqs[category][item[0]] || 0}`),
|
||||
mode: 'markers',
|
||||
marker: {
|
||||
size: size,
|
||||
// sizeref: 2.0 * Math.max(...size) / (80**2),
|
||||
// sizemode: 'area',
|
||||
sizeref: 0.2
|
||||
}
|
||||
};
|
||||
graphData.push(data);
|
||||
}
|
||||
}
|
||||
resolve(graphData);
|
||||
})
|
||||
.catch(error => {
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
renderStopwordSettingsModal(stopwords) {
|
||||
let stopwordInputField = document.querySelector('.stopword-input-field');
|
||||
}
|
||||
|
||||
|
||||
|
||||
renderBoundsGraphic(corpusData) {
|
||||
let boundsGraphicElement = document.querySelector('#bounds-graphic');
|
||||
|
||||
@ -238,7 +366,11 @@ class CorpusAnalysisApp {
|
||||
}
|
||||
};
|
||||
|
||||
let config = {responsive: true};
|
||||
let config = {
|
||||
responsive: true,
|
||||
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
|
||||
displaylogo: false
|
||||
};
|
||||
|
||||
Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
|
||||
}
|
||||
|
@ -22,9 +22,11 @@ Requests.JSONfetch = (input, init={}) => {
|
||||
response.json()
|
||||
.then(
|
||||
(json) => {
|
||||
let message = json.message || json;
|
||||
let message = json.message;
|
||||
let category = json.category || 'message';
|
||||
app.flash(message, category);
|
||||
if (message) {
|
||||
app.flash(message, category);
|
||||
}
|
||||
},
|
||||
(error) => {
|
||||
app.flash(`[${response.status}]: ${response.statusText}`, 'error');
|
||||
|
@ -31,6 +31,14 @@ Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => {
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
Requests.corpora.entity.getStopwords = () => {
|
||||
let input = `/corpora/stopwords`;
|
||||
let init = {
|
||||
method: 'GET'
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
Requests.corpora.entity.isPublic = {};
|
||||
|
||||
Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
|
||||
@ -43,4 +51,3 @@ Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
@ -98,19 +98,19 @@
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col s6">
|
||||
<div class="col s4">
|
||||
<div class="card hoverable">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Proportions</span>
|
||||
<p>of texts within the corpus</p>
|
||||
<div id="text-proportions-graphic"></div>
|
||||
<div id="text-proportions-graphic" style="width:100"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s6">
|
||||
<div class="col s8">
|
||||
<div class="card hoverable">
|
||||
<div class="card-content">
|
||||
<span class="card-title"><a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> Frequencies</span>
|
||||
<span class="card-title">Frequencies</span>
|
||||
<ul id="frequencies-token-category-dropdown" class="dropdown-content">
|
||||
<li><a data-token-category="word">Word</a></li>
|
||||
<li><a data-token-category="lemma">Lemma</a></li>
|
||||
@ -119,6 +119,11 @@
|
||||
</ul>
|
||||
<p>within the texts of the 5 most frequent words in the corpus</p>
|
||||
<div id="frequencies-graphic"></div>
|
||||
<a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a>
|
||||
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">equalizer</i></a>
|
||||
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
|
||||
<a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
|
||||
<a class="btn-flat modal-trigger" href="#frequencies-stopwords-setting-modal"><i class="material-icons grey-text text-darken-2">settings</i></a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -161,6 +166,21 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="modal" id="frequencies-stopwords-setting-modal">
|
||||
<div class="modal-content">
|
||||
<h4>Settings</h4>
|
||||
<p>Here you can change the stopword-lists. Add your own stopwords or change the already existing below.</p>
|
||||
<div class="chips chips-placeholder stopword-input-field"></div>
|
||||
<div class="row">
|
||||
<div class="input-field col s3">
|
||||
<select class="stopword-language-selection"></select>
|
||||
<label>Stopword language select</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
{% for extension in extensions %}
|
||||
{{ extension.modals }}
|
||||
{% endfor %}
|
||||
|
@ -20,6 +20,7 @@ Flask-WTF
|
||||
hiredis
|
||||
MarkupSafe==2.0.1
|
||||
marshmallow-sqlalchemy==0.29.0
|
||||
nltk
|
||||
psycopg2
|
||||
PyJWT
|
||||
pyScss
|
||||
|
Loading…
x
Reference in New Issue
Block a user