Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update

This commit is contained in:
Patrick Jentsch 2023-06-22 12:46:36 +02:00
commit eb2abf8282
5 changed files with 17927 additions and 257 deletions

File diff suppressed because it is too large Load Diff

View File

@ -34,25 +34,25 @@ class CorpusAnalysisApp {
.then(
cQiCorpus => {
this.data.corpus = {o: cQiCorpus};
this.data.corpus.o.getVisualizationData().then(
this.data.corpus.o.getVisualizationData()
.then(
(data) => {
console.log(data);
// this.renderGeneralCorpusInfo(data);
// this.renderTextInfoList(data);
// this.renderTextProportionsGraphic(data);
// this.renderWordFrequenciesGraphic(data);
// this.renderBoundsGraphic(data);
}
);
// this.data.corpus.o.getVisualizationData()
// .then(
// (visualizationData) => {
// console.log(visualizationData);
// this.renderGeneralCorpusInfo(visualizationData);
// this.renderTextInfoList(visualizationData);
// this.renderTextProportionsGraphic(visualizationData);
// }
// );
this.data.corpus.o.getCorpusData()
.then(corpusData => {
console.log(corpusData);
this.renderGeneralCorpusInfo(corpusData);
this.renderTextInfoList(corpusData);
this.renderTextProportionsGraphic(corpusData);
this.renderFrequenciesGraphic(corpusData);
this.renderBoundsGraphic(corpusData);
});
// TODO: Don't do this hgere
cQiCorpus.updateDb();
@ -117,38 +117,135 @@ class CorpusAnalysisApp {
}
renderGeneralCorpusInfo(corpusData) {
let corpusGeneralInfoListElement = document.querySelector('.corpus-general-info-list');
corpusGeneralInfoListElement.querySelector('.corpus-num-tokens').innerHTML = `<b>Number of tokens:</b> ${this.data.corpus.o.size}`;
corpusGeneralInfoListElement.querySelector('.corpus-text-count').innerHTML = `<b>Corpus text count:</b> ${corpusData.texts.length}`;
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-words').innerHTML = `<b>Corpus unique word count:</b> ${corpusData.num_unique_words}`;
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-lemmas').innerHTML = `<b>Corpus unique lemma count:</b> ${corpusData.num_unique_lemmas}`;
// corpusGeneralInfoListElement.querySelector('.corpus-most-frequent-words').innerHTML = `<b>Corpus most frequent words:</b> ${corpusData.most_frequent_words.join(', ');
corpusGeneralInfoListElement.querySelector('.corpus-num-sentences').innerHTML = `<b>Corpus sentence count:</b> ${corpusData.num_sentences}`;
corpusGeneralInfoListElement.querySelector('.corpus-average-sentence-length').innerHTML = `<b>Corpus average sentence length:</b> ${corpusData.average_sentence_length}`;
corpusGeneralInfoListElement.querySelector('.corpus-num-ent-types').innerHTML = `<b>Corpus entity count:</b> ${corpusData.num_ent_types}`;
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-ent-types').innerHTML = `<b>Corpus unique entity count:</b> ${corpusData.num_unique_ent_types}`;
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.lexicon[0].counts.token;
document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.lexicon[0].counts.s;
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.word).length;
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.lemma).length;
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.pos).length;
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.simple_pos).length;
}
renderTextInfoList(corpusData) {
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
corpusTextInfoList.add(corpusData.texts);
let texts = corpusData.text.lexicon;
let textData = [];
for (let i = 0; i < Object.entries(texts).length; i++) {
let resource = {
title: corpusData.lookups.text[i].title,
publishing_year: corpusData.lookups.text[i].publishing_year,
num_tokens: corpusData.text.lexicon[i].counts.token,
num_sentences: corpusData.text.lexicon[i].counts.s,
num_unique_words: Object.entries(corpusData.text.lexicon[i].freqs.word).length,
num_unique_lemmas: Object.entries(corpusData.text.lexicon[i].freqs.lemma).length,
num_unique_pos: Object.entries(corpusData.text.lexicon[i].freqs.pos).length,
num_unique_simple_pos: Object.entries(corpusData.text.lexicon[i].freqs.simple_pos).length
};
textData.push(resource);
}
corpusTextInfoList.add(textData);
let textCountChipElement = document.querySelector('.text-count-chip');
textCountChipElement.innerHTML = `Text count: ${Object.values(corpusData.text.lexicon).length}`;
}
renderTextProportionsGraphic(corpusData) {
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
let texts = Object.entries(corpusData.text.lexicon);
let graphData = [
{
values: corpusData.texts.map(text => text.num_tokens),
labels: corpusData.texts.map(text => `${text.title} (${text.publishing_year})`),
values: texts.map(text => text[1].counts.token),
labels: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
type: 'pie'
}
];
let graphLayout = {
height: 400,
width: 500
// height: 600,
// width: 900
};
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout);
let config = {responsive: true};
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
}
renderFrequenciesGraphic(corpusData) {
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
let texts = Object.entries(corpusData.text.lexicon);
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
this.renderFrequenciesGraphic(corpusData);
});
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData);
let graphLayout = {
barmode: 'stack',
type: 'bar'
};
let config = {responsive: true};
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
}
createFrequenciesGraphData(category, texts, corpusData) {
let graphData = [];
let sortedData = Object.entries(corpusData.corpus.lexicon[0].freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
for (let item of sortedData) {
let data = {
x: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
y: texts.map(text => text[1].freqs[category][item[0]]),
name: corpusData.lookups[category][item[0]],
type: 'bar'
};
graphData.push(data);
}
return graphData;
}
renderBoundsGraphic(corpusData) {
let boundsGraphicElement = document.querySelector('#bounds-graphic');
let graphData = [];
let texts = Object.entries(corpusData.text.lexicon);
graphData = [{
type: 'bar',
x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
y: texts.map(text => corpusData.lookups.text[text[0]].title),
base: texts.map(text => text[1].bounds[0]),
text: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
orientation: 'h',
hovertemplate: '%{base} - %{x} <br>%{y}',
showlegend: false
}];
let graphLayout = {
// height: 600,
// width: 2000,
barmode: 'stack',
type: 'bar',
showgrid: false,
xaxis: {
rangemode: 'nonnegative',
autorange: true
},
yaxis: {
autorange: true,
showticklabels: false
}
};
let config = {responsive: true};
Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
}
}

View File

@ -29,11 +29,11 @@ class CorpusTextInfoList extends ResourceList {
<tr class="list-item clickable hoverable">
<td><span class="title"></span> (<span class="publishing_year"></span>)</td>
<td><span class="num_tokens"></span></td>
<td><span class="num_sentences"></span></td>
<td><span class="num_unique_words"></span></td>
<td><span class="num_unique_lemmas"></span></td>
<td><span class="num_sentences"></span></td>
<td><span class="average_sentence_length"></span></td>
<td><span class="num_unique_ent_types"></span></td>
<td><span class="num_unique_pos"></span></td>
<td><span class="num_unique_simple_pos"></span></td>
</tr>
`.trim();
}
@ -44,11 +44,11 @@ class CorpusTextInfoList extends ResourceList {
'title',
'publishing_year',
'num_tokens',
'num_sentences',
'num_unique_words',
'num_unique_lemmas',
'num_sentences',
'average_sentence_length',
'num_unique_ent_types'
'num_unique_pos',
'num_unique_simple_pos'
];
}
@ -68,11 +68,11 @@ class CorpusTextInfoList extends ResourceList {
<tr>
<th>Text<span class="sort right material-icons" data-sort="title" style="cursor:pointer; color:#aa9cc9">arrow_drop_down</span></th>
<th>Number of tokens<span class="sort right material-icons" data-sort="num_tokens" style="cursor:pointer">arrow_drop_down</span></th>
<th>Number of sentences<span class="sort right material-icons" data-sort="num_sentences" style="cursor:pointer">arrow_drop_down</span></th>
<th>Number of unique words<span class="sort right material-icons" data-sort="num_unique_words" style="cursor:pointer">arrow_drop_down</span></th>
<th>Number of unique lemmas<span class="sort right material-icons" data-sort="num_unique_lemmas" style="cursor:pointer">arrow_drop_down</span></th>
<th>Number of sentences<span class="sort right material-icons" data-sort="num_sentences" style="cursor:pointer">arrow_drop_down</span></th>
<th>Average sentence length<span class="sort right material-icons" data-sort="average_sentence_length" style="cursor:pointer">arrow_drop_down</span></th>
<th>Number of unique entity types<span class="sort right material-icons" data-sort="num_unique_ent_types" style="cursor:pointer">arrow_drop_down</span></th>
<th>Number of unique pos<span class="sort right material-icons" data-sort="num_unique_pos" style="cursor:pointer">arrow_drop_down</span></th>
<th>Number of unique simple pos<span class="sort right material-icons" data-sort="num_unique_simple_pos" style="cursor:pointer">arrow_drop_down</span></th>
</tr>
</thead>
<tbody class="list"></tbody>
@ -86,11 +86,11 @@ class CorpusTextInfoList extends ResourceList {
title: corpusTextData.title,
publishing_year: corpusTextData.publishing_year,
num_tokens: corpusTextData.num_tokens,
num_sentences: corpusTextData.num_sentences,
num_unique_words: corpusTextData.num_unique_words,
num_unique_lemmas: corpusTextData.num_unique_lemmas,
num_sentences: corpusTextData.num_sentences,
average_sentence_length: corpusTextData.average_sentence_length,
num_unique_ent_types: corpusTextData.num_unique_ent_types
num_unique_pos: corpusTextData.num_unique_pos,
num_unique_simple_pos: corpusTextData.num_unique_simple_pos
};
}

View File

@ -1,7 +1,7 @@
<script src="https://cdnjs.cloudflare.com/ajax/libs/fast-json-patch/3.1.1/fast-json-patch.min.js" integrity="sha512-5uDdefwnzyq4N+SkmMBmekZLZNmc6dLixvVxCdlHBfqpyz0N3bzLdrJ55OLm7QrZmgZuhLGgHLDtJwU6RZoFCA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/2.3.1/list.min.js" integrity="sha512-93wYgwrIFL+b+P3RvYxi/WUFRXXUDSLCT2JQk9zhVGXuS2mHl2axj6d+R6pP+gcU5isMHRj1u0oYE/mWyt/RjA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.5.4/socket.io.min.js" integrity="sha512-HTENHrkQ/P0NGDFd5nk6ibVtCkcM7jhr2c7GyvXp5O+4X6O5cQO9AhqFzM+MdeBivsX7Hoys2J7pp2wdgMpCvw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script src="https://cdn.plot.ly/plotly-2.24.1.min.js" charset="utf-8"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/plotly.js/2.24.2/plotly.min.js" integrity="sha512-dAXqGCq94D0kgLSPnfvd/pZpCMoJQpGj2S2XQmFQ9Ay1+96kbjss02ISEh+TBNXMggGg/1qoMcOHcxg+Op/Jmw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
{%- assets
filters='rjsmin',

View File

@ -19,7 +19,6 @@
<div class="row" id="corpus-analysis-app-overview">
<div class="col s12">
<h1>{{ title }}</h1>
</div>
{% for extension in extensions %}
<div class="col s3">
@ -31,57 +30,112 @@
</div>
</div>
{% endfor %}
</div>
<div class="row">
<div class="row">
<div class="col s12">
<h4><i class="material-icons left">query_stats</i>Visualizations</h4>
</div>
<div class="col s4" >
</div>
<div class="row">
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center;">
<p><b>Tokens</b></p>
<span class="card-title corpus-num-tokens"></span>
</div>
</div>
</div>
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center">
<p><b>Sentences</b></p>
<span class="card-title corpus-num-s"></span>
</div>
</div>
</div>
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center">
<p><b>Unique words</b></p>
<span class="card-title corpus-num-unique-words"></span>
</div>
</div>
</div>
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center">
<p><b>Unique lemmas</b></p>
<span class="card-title corpus-num-unique-lemmas"></span>
</div>
</div>
</div>
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center">
<p><b>Unique pos</b></p>
<span class="card-title corpus-num-unique-pos"></span>
</div>
</div>
</div>
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center">
<p><b>Unique simple_pos</b></p>
<span class="card-title corpus-num-unique-simple-pos"></span>
</div>
</div>
</div>
</div>
<div class="row">
<div class="col s12">
<div class="card hoverable">
<div class="card-content">
<span class="card-title">General information about the Corpus</span>
<p></p>
<br>
<ul class="corpus-general-info-list">
<li class="corpus-num-tokens"></li>
<br>
<li class="corpus-text-count"></li>
<br>
<li class="corpus-num-unique-words"></li>
<br>
<li class="corpus-num-unique-lemmas"></li>
<br>
<li class="corpus-num-sentences"></li>
<br>
<li class="corpus-average-sentence-length"></li>
<br>
<li class="corpus-num-ent-types"></li>
<br>
<li class="corpus-num-unique-ent-types"></li>
<br>
</ul>
</div>
</div>
</div>
<div class="col s8">
<div class="card hoverable">
<div class="card-content">
<span class="card-title">Text information</span>
<span class="card-title">Text Information Overview</span>
<div class="chip text-count-chip" style="background-color:#6b3f89; color:white""></div>
<div class="corpus-text-info-list no-autoinit"></div>
</div>
</div>
</div>
</div>
<div class="row">
<div class="col 12">
</div>
<div class="row">
<div class="col s6">
<div class="card hoverable">
<div class="card-content">
<span class="card-title">Text proportions within the corpus</span>
<span class="card-title">Proportions</span>
<p>of texts within the corpus</p>
<div id="text-proportions-graphic"></div>
</div>
</div>
</div>
<div class="col s6">
<div class="card hoverable">
<div class="card-content">
<span class="card-title"><a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> Frequencies</span>
<ul id="frequencies-token-category-dropdown" class="dropdown-content">
<li><a data-token-category="word">Word</a></li>
<li><a data-token-category="lemma">Lemma</a></li>
<li><a data-token-category="pos">Pos</a></li>
<li><a data-token-category="simple_pos">Simple_pos</a></li>
</ul>
<p>within the texts of the 5 most frequent words in the corpus</p>
<div id="frequencies-graphic"></div>
</div>
</div>
</div>
</div>
<div class="row">
<div class="col s12">
<div class="card hoverable">
<div class="card-content">
<span class="card-title">Text Bounds</span>
<div id="bounds-graphic"></div>
</div>
</div>
</div>
</div>
</div>
</div>