mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-14 16:55:42 +00:00
New visualizations for frequencies
This commit is contained in:
parent
6c31788402
commit
beb157092e
@ -7,6 +7,8 @@ from app.decorators import content_negotiation
|
|||||||
from app.models import Corpus, CorpusFollowerRole
|
from app.models import Corpus, CorpusFollowerRole
|
||||||
from . import bp
|
from . import bp
|
||||||
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
|
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
|
||||||
|
import nltk
|
||||||
|
from string import punctuation
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
|
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
|
||||||
@ -56,6 +58,27 @@ def build_corpus(corpus_id):
|
|||||||
}
|
}
|
||||||
return response_data, 202
|
return response_data, 202
|
||||||
|
|
||||||
|
@bp.route('/stopwords')
|
||||||
|
@content_negotiation(produces='application/json')
|
||||||
|
def get_stopwords():
|
||||||
|
# data = request.json
|
||||||
|
# if not isinstance(data, dict):
|
||||||
|
# abort(400)
|
||||||
|
# language = data.get('language')
|
||||||
|
# if not isinstance(language, str):
|
||||||
|
# abort(400)
|
||||||
|
nltk.download('stopwords')
|
||||||
|
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
|
||||||
|
stopwords = {}
|
||||||
|
for language in languages:
|
||||||
|
stopwords[language] = nltk.corpus.stopwords.words(language)
|
||||||
|
stopwords['punctuation'] = list(punctuation) + ['—', '|']
|
||||||
|
stopwords['user_stopwords'] = []
|
||||||
|
print(stopwords)
|
||||||
|
response_data = {
|
||||||
|
'stopwords': stopwords
|
||||||
|
}
|
||||||
|
return response_data, 202
|
||||||
|
|
||||||
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
|
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
|
||||||
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||||
|
@ -42,7 +42,6 @@ def job_log(job_id):
|
|||||||
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
|
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
|
||||||
log = log_file.read()
|
log = log_file.read()
|
||||||
response_data = {
|
response_data = {
|
||||||
'message': '',
|
|
||||||
'jobLog': log
|
'jobLog': log
|
||||||
}
|
}
|
||||||
return response_data, 200
|
return response_data, 200
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
class CorpusAnalysisApp {
|
class CorpusAnalysisApp {
|
||||||
constructor(corpusId) {
|
constructor(corpusId) {
|
||||||
this.data = {};
|
this.data = {
|
||||||
|
promises: {getStopwords: []}
|
||||||
|
};
|
||||||
|
|
||||||
// HTML elements
|
// HTML elements
|
||||||
this.elements = {
|
this.elements = {
|
||||||
@ -22,6 +24,49 @@ class CorpusAnalysisApp {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getStopwords(language) {
|
||||||
|
// if (language in this.data.promises.getStopwords) {
|
||||||
|
// console.log('Stopwords already loaded');
|
||||||
|
// return this.data.promises.getStopwords[language];
|
||||||
|
// }
|
||||||
|
// this.data.promises.getStopwords[language] = new Promise((resolve, reject) => {
|
||||||
|
// Requests.corpora.entity.getStopwords(language)
|
||||||
|
// .then((response) => {
|
||||||
|
// response.json()
|
||||||
|
// .then((json) => {
|
||||||
|
// let stopwords = json.stopwords;
|
||||||
|
// resolve(stopwords);
|
||||||
|
// })
|
||||||
|
// .catch((error) => {
|
||||||
|
// reject(error);
|
||||||
|
// });
|
||||||
|
// });
|
||||||
|
// });
|
||||||
|
// return this.data.promises.getStopwords[language];
|
||||||
|
// }
|
||||||
|
|
||||||
|
getStopwords() {
|
||||||
|
if (this.data.promises.getStopwords.length !== 0) {
|
||||||
|
console.log('Stopwords already loaded');
|
||||||
|
return this.data.promises.getStopwords;
|
||||||
|
}
|
||||||
|
this.data.promises.getStopwords = new Promise((resolve, reject) => {
|
||||||
|
Requests.corpora.entity.getStopwords()
|
||||||
|
.then((response) => {
|
||||||
|
response.json()
|
||||||
|
.then((json) => {
|
||||||
|
let stopwords = json.stopwords;
|
||||||
|
resolve(stopwords);
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
reject(error);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
return this.data.promises.getStopwords;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
init() {
|
init() {
|
||||||
this.disableActionElements();
|
this.disableActionElements();
|
||||||
this.elements.m.initModal.open();
|
this.elements.m.initModal.open();
|
||||||
@ -161,9 +206,28 @@ class CorpusAnalysisApp {
|
|||||||
type: 'pie'
|
type: 'pie'
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
let config = {responsive: true};
|
let graphLayout = {
|
||||||
|
showlegend: true,
|
||||||
|
height: 486,
|
||||||
|
margin: {
|
||||||
|
l: 10,
|
||||||
|
r: 10,
|
||||||
|
b: 10,
|
||||||
|
t: 10
|
||||||
|
},
|
||||||
|
legend: {
|
||||||
|
"orientation": "h",
|
||||||
|
font: {
|
||||||
|
size: 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let config = {
|
||||||
|
responsive: true,
|
||||||
|
displaylogo: false
|
||||||
|
};
|
||||||
|
|
||||||
Plotly.newPlot(textProportionsGraphicElement, graphData, config);
|
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
renderFrequenciesGraphic(corpusData) {
|
renderFrequenciesGraphic(corpusData) {
|
||||||
@ -171,42 +235,106 @@ class CorpusAnalysisApp {
|
|||||||
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
|
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
|
||||||
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
|
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
|
||||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
||||||
|
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
|
||||||
|
let graphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
|
||||||
|
|
||||||
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
|
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
|
||||||
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
|
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
|
||||||
this.renderFrequenciesGraphic(corpusData);
|
this.renderFrequenciesGraphic(corpusData);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
graphModeButtons.forEach(graphModeButton => {
|
||||||
|
graphModeButton.addEventListener('click', (event) => {
|
||||||
|
graphModeButtons.forEach(btn => {
|
||||||
|
btn.classList.remove('disabled');
|
||||||
|
});
|
||||||
|
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
|
||||||
|
this.renderFrequenciesGraphic(corpusData);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
|
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
|
||||||
|
|
||||||
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData);
|
this.createFrequenciesGraphData(tokenCategory, texts, corpusData, graphtype)
|
||||||
let graphLayout = {
|
.then(graphData => {
|
||||||
barmode: 'stack',
|
let graphLayout = {
|
||||||
type: 'bar'
|
barmode: graphtype === 'bar' ? 'stack' : '',
|
||||||
};
|
margin: {
|
||||||
let config = {responsive: true};
|
t: 20,
|
||||||
|
l: 50
|
||||||
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
|
},
|
||||||
|
yaxis: {
|
||||||
|
showticklabels: graphtype === 'markers' ? false : true
|
||||||
|
},
|
||||||
|
};
|
||||||
|
let config = {
|
||||||
|
responsive: true,
|
||||||
|
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
|
||||||
|
displaylogo: false
|
||||||
|
};
|
||||||
|
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
createFrequenciesGraphData(category, texts, corpusData) {
|
createFrequenciesGraphData(category, texts, corpusData, graphtype) {
|
||||||
let graphData = [];
|
return new Promise((resolve, reject) => {
|
||||||
let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
|
this.getStopwords()
|
||||||
|
.then(stopwords => {
|
||||||
for (let item of sortedData) {
|
this.renderStopwordSettingsModal(stopwords);
|
||||||
let data = {
|
let stopwordList = [];
|
||||||
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
Object.values(stopwords).forEach(stopwordItems => {
|
||||||
y: texts.map(text => text[1].freqs[category][item[0]]),
|
stopwordItems.forEach(stopword => {
|
||||||
name: corpusData.values.p_attrs[category][item[0]],
|
stopwordList.push(stopword);
|
||||||
type: 'bar'
|
});
|
||||||
};
|
});
|
||||||
graphData.push(data);
|
let graphData = [];
|
||||||
}
|
let filteredData = Object.entries(corpusData.corpus.freqs[category])
|
||||||
|
.sort((a, b) => b[1] - a[1])
|
||||||
return graphData;
|
.filter(item => !stopwordList.includes(corpusData.values.p_attrs[category][item[0]].toLowerCase()))
|
||||||
|
.slice(0, 5);
|
||||||
|
if (graphtype !== 'markers') {
|
||||||
|
for (let item of filteredData) {
|
||||||
|
let data = {
|
||||||
|
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||||
|
y: texts.map(text => text[1].freqs[category][item[0]] || 0),
|
||||||
|
name: corpusData.values.p_attrs[category][item[0]],
|
||||||
|
type: graphtype
|
||||||
|
};
|
||||||
|
graphData.push(data);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (let item of filteredData) {
|
||||||
|
let size = texts.map(text => text[1].freqs[category][item[0]] || 0);
|
||||||
|
let data = {
|
||||||
|
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||||
|
y: texts.map(text => corpusData.values.p_attrs[category][item[0]]),
|
||||||
|
name: corpusData.values.p_attrs[category][item[0]],
|
||||||
|
text: texts.map(text => `${corpusData.values.p_attrs[category][item[0]]}<br>${text[1].freqs[category][item[0]] || 0}`),
|
||||||
|
mode: 'markers',
|
||||||
|
marker: {
|
||||||
|
size: size,
|
||||||
|
// sizeref: 2.0 * Math.max(...size) / (80**2),
|
||||||
|
// sizemode: 'area',
|
||||||
|
sizeref: 0.2
|
||||||
|
}
|
||||||
|
};
|
||||||
|
graphData.push(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resolve(graphData);
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
reject(error);
|
||||||
|
});
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
renderStopwordSettingsModal(stopwords) {
|
||||||
|
let stopwordInputField = document.querySelector('.stopword-input-field');
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
renderBoundsGraphic(corpusData) {
|
renderBoundsGraphic(corpusData) {
|
||||||
let boundsGraphicElement = document.querySelector('#bounds-graphic');
|
let boundsGraphicElement = document.querySelector('#bounds-graphic');
|
||||||
|
|
||||||
@ -238,7 +366,11 @@ class CorpusAnalysisApp {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let config = {responsive: true};
|
let config = {
|
||||||
|
responsive: true,
|
||||||
|
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
|
||||||
|
displaylogo: false
|
||||||
|
};
|
||||||
|
|
||||||
Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
|
Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
|
||||||
}
|
}
|
||||||
|
@ -22,9 +22,11 @@ Requests.JSONfetch = (input, init={}) => {
|
|||||||
response.json()
|
response.json()
|
||||||
.then(
|
.then(
|
||||||
(json) => {
|
(json) => {
|
||||||
let message = json.message || json;
|
let message = json.message;
|
||||||
let category = json.category || 'message';
|
let category = json.category || 'message';
|
||||||
app.flash(message, category);
|
if (message) {
|
||||||
|
app.flash(message, category);
|
||||||
|
}
|
||||||
},
|
},
|
||||||
(error) => {
|
(error) => {
|
||||||
app.flash(`[${response.status}]: ${response.statusText}`, 'error');
|
app.flash(`[${response.status}]: ${response.statusText}`, 'error');
|
||||||
|
@ -31,6 +31,14 @@ Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => {
|
|||||||
return Requests.JSONfetch(input, init);
|
return Requests.JSONfetch(input, init);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Requests.corpora.entity.getStopwords = () => {
|
||||||
|
let input = `/corpora/stopwords`;
|
||||||
|
let init = {
|
||||||
|
method: 'GET'
|
||||||
|
};
|
||||||
|
return Requests.JSONfetch(input, init);
|
||||||
|
};
|
||||||
|
|
||||||
Requests.corpora.entity.isPublic = {};
|
Requests.corpora.entity.isPublic = {};
|
||||||
|
|
||||||
Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
|
Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
|
||||||
@ -43,4 +51,3 @@ Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -98,19 +98,19 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col s6">
|
<div class="col s4">
|
||||||
<div class="card hoverable">
|
<div class="card hoverable">
|
||||||
<div class="card-content">
|
<div class="card-content">
|
||||||
<span class="card-title">Proportions</span>
|
<span class="card-title">Proportions</span>
|
||||||
<p>of texts within the corpus</p>
|
<p>of texts within the corpus</p>
|
||||||
<div id="text-proportions-graphic"></div>
|
<div id="text-proportions-graphic" style="width:100"></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="col s6">
|
<div class="col s8">
|
||||||
<div class="card hoverable">
|
<div class="card hoverable">
|
||||||
<div class="card-content">
|
<div class="card-content">
|
||||||
<span class="card-title"><a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> Frequencies</span>
|
<span class="card-title">Frequencies</span>
|
||||||
<ul id="frequencies-token-category-dropdown" class="dropdown-content">
|
<ul id="frequencies-token-category-dropdown" class="dropdown-content">
|
||||||
<li><a data-token-category="word">Word</a></li>
|
<li><a data-token-category="word">Word</a></li>
|
||||||
<li><a data-token-category="lemma">Lemma</a></li>
|
<li><a data-token-category="lemma">Lemma</a></li>
|
||||||
@ -119,6 +119,11 @@
|
|||||||
</ul>
|
</ul>
|
||||||
<p>within the texts of the 5 most frequent words in the corpus</p>
|
<p>within the texts of the 5 most frequent words in the corpus</p>
|
||||||
<div id="frequencies-graphic"></div>
|
<div id="frequencies-graphic"></div>
|
||||||
|
<a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a>
|
||||||
|
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">equalizer</i></a>
|
||||||
|
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
|
||||||
|
<a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
|
||||||
|
<a class="btn-flat modal-trigger" href="#frequencies-stopwords-setting-modal"><i class="material-icons grey-text text-darken-2">settings</i></a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -161,6 +166,21 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="modal" id="frequencies-stopwords-setting-modal">
|
||||||
|
<div class="modal-content">
|
||||||
|
<h4>Settings</h4>
|
||||||
|
<p>Here you can change the stopword-lists. Add your own stopwords or change the already existing below.</p>
|
||||||
|
<div class="chips chips-placeholder stopword-input-field"></div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="input-field col s3">
|
||||||
|
<select class="stopword-language-selection"></select>
|
||||||
|
<label>Stopword language select</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
{% for extension in extensions %}
|
{% for extension in extensions %}
|
||||||
{{ extension.modals }}
|
{{ extension.modals }}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
@ -20,6 +20,7 @@ Flask-WTF
|
|||||||
hiredis
|
hiredis
|
||||||
MarkupSafe==2.0.1
|
MarkupSafe==2.0.1
|
||||||
marshmallow-sqlalchemy==0.29.0
|
marshmallow-sqlalchemy==0.29.0
|
||||||
|
nltk
|
||||||
psycopg2
|
psycopg2
|
||||||
PyJWT
|
PyJWT
|
||||||
pyScss
|
pyScss
|
||||||
|
Loading…
Reference in New Issue
Block a user