Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update

This commit is contained in:
Patrick Jentsch
2023-06-22 12:46:36 +02:00
5 changed files with 17927 additions and 257 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -34,25 +34,25 @@ class CorpusAnalysisApp {
.then(
cQiCorpus => {
this.data.corpus = {o: cQiCorpus};
this.data.corpus.o.getVisualizationData().then(
(data) => {
console.log(data);
}
);
// this.data.corpus.o.getVisualizationData()
// .then(
// (visualizationData) => {
// console.log(visualizationData);
// this.renderGeneralCorpusInfo(visualizationData);
// this.renderTextInfoList(visualizationData);
// this.renderTextProportionsGraphic(visualizationData);
// }
// );
this.data.corpus.o.getVisualizationData()
.then(
(data) => {
console.log(data);
// this.renderGeneralCorpusInfo(data);
// this.renderTextInfoList(data);
// this.renderTextProportionsGraphic(data);
// this.renderWordFrequenciesGraphic(data);
// this.renderBoundsGraphic(data);
}
);
this.data.corpus.o.getCorpusData()
.then(corpusData => {
console.log(corpusData);
this.renderGeneralCorpusInfo(corpusData);
this.renderTextInfoList(corpusData);
this.renderTextProportionsGraphic(corpusData);
this.renderFrequenciesGraphic(corpusData);
this.renderBoundsGraphic(corpusData);
});
// TODO: Don't do this hgere
cQiCorpus.updateDb();
@@ -117,38 +117,135 @@ class CorpusAnalysisApp {
}
renderGeneralCorpusInfo(corpusData) {
let corpusGeneralInfoListElement = document.querySelector('.corpus-general-info-list');
corpusGeneralInfoListElement.querySelector('.corpus-num-tokens').innerHTML = `<b>Number of tokens:</b> ${this.data.corpus.o.size}`;
corpusGeneralInfoListElement.querySelector('.corpus-text-count').innerHTML = `<b>Corpus text count:</b> ${corpusData.texts.length}`;
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-words').innerHTML = `<b>Corpus unique word count:</b> ${corpusData.num_unique_words}`;
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-lemmas').innerHTML = `<b>Corpus unique lemma count:</b> ${corpusData.num_unique_lemmas}`;
// corpusGeneralInfoListElement.querySelector('.corpus-most-frequent-words').innerHTML = `<b>Corpus most frequent words:</b> ${corpusData.most_frequent_words.join(', ');
corpusGeneralInfoListElement.querySelector('.corpus-num-sentences').innerHTML = `<b>Corpus sentence count:</b> ${corpusData.num_sentences}`;
corpusGeneralInfoListElement.querySelector('.corpus-average-sentence-length').innerHTML = `<b>Corpus average sentence length:</b> ${corpusData.average_sentence_length}`;
corpusGeneralInfoListElement.querySelector('.corpus-num-ent-types').innerHTML = `<b>Corpus entity count:</b> ${corpusData.num_ent_types}`;
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-ent-types').innerHTML = `<b>Corpus unique entity count:</b> ${corpusData.num_unique_ent_types}`;
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.lexicon[0].counts.token;
document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.lexicon[0].counts.s;
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.word).length;
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.lemma).length;
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.pos).length;
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.simple_pos).length;
}
renderTextInfoList(corpusData) {
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
corpusTextInfoList.add(corpusData.texts);
let texts = corpusData.text.lexicon;
let textData = [];
for (let i = 0; i < Object.entries(texts).length; i++) {
let resource = {
title: corpusData.lookups.text[i].title,
publishing_year: corpusData.lookups.text[i].publishing_year,
num_tokens: corpusData.text.lexicon[i].counts.token,
num_sentences: corpusData.text.lexicon[i].counts.s,
num_unique_words: Object.entries(corpusData.text.lexicon[i].freqs.word).length,
num_unique_lemmas: Object.entries(corpusData.text.lexicon[i].freqs.lemma).length,
num_unique_pos: Object.entries(corpusData.text.lexicon[i].freqs.pos).length,
num_unique_simple_pos: Object.entries(corpusData.text.lexicon[i].freqs.simple_pos).length
};
textData.push(resource);
}
corpusTextInfoList.add(textData);
let textCountChipElement = document.querySelector('.text-count-chip');
textCountChipElement.innerHTML = `Text count: ${Object.values(corpusData.text.lexicon).length}`;
}
renderTextProportionsGraphic(corpusData) {
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
let texts = Object.entries(corpusData.text.lexicon);
let graphData = [
{
values: corpusData.texts.map(text => text.num_tokens),
labels: corpusData.texts.map(text => `${text.title} (${text.publishing_year})`),
values: texts.map(text => text[1].counts.token),
labels: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
type: 'pie'
}
];
let graphLayout = {
height: 400,
width: 500
// height: 600,
// width: 900
};
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout);
let config = {responsive: true};
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
}
renderFrequenciesGraphic(corpusData) {
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
let texts = Object.entries(corpusData.text.lexicon);
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
this.renderFrequenciesGraphic(corpusData);
});
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData);
let graphLayout = {
barmode: 'stack',
type: 'bar'
};
let config = {responsive: true};
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
}
createFrequenciesGraphData(category, texts, corpusData) {
let graphData = [];
let sortedData = Object.entries(corpusData.corpus.lexicon[0].freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
for (let item of sortedData) {
let data = {
x: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
y: texts.map(text => text[1].freqs[category][item[0]]),
name: corpusData.lookups[category][item[0]],
type: 'bar'
};
graphData.push(data);
}
return graphData;
}
renderBoundsGraphic(corpusData) {
let boundsGraphicElement = document.querySelector('#bounds-graphic');
let graphData = [];
let texts = Object.entries(corpusData.text.lexicon);
graphData = [{
type: 'bar',
x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
y: texts.map(text => corpusData.lookups.text[text[0]].title),
base: texts.map(text => text[1].bounds[0]),
text: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
orientation: 'h',
hovertemplate: '%{base} - %{x} <br>%{y}',
showlegend: false
}];
let graphLayout = {
// height: 600,
// width: 2000,
barmode: 'stack',
type: 'bar',
showgrid: false,
xaxis: {
rangemode: 'nonnegative',
autorange: true
},
yaxis: {
autorange: true,
showticklabels: false
}
};
let config = {responsive: true};
Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
}
}