mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-13 11:40:35 +00:00
435 lines
18 KiB
JavaScript
435 lines
18 KiB
JavaScript
nopaque.corpus_analysis.StaticVisualizationExtension = class StaticVisualizationExtension {
|
|
name = 'Static Visualization (beta)';
|
|
|
|
constructor(app) {
|
|
this.app = app;
|
|
this.data = {
|
|
stopwords: undefined,
|
|
originalStopwords: {},
|
|
stopwordCache: {},
|
|
tokenSet: new Set()
|
|
};
|
|
|
|
this.app.registerExtension(this);
|
|
}
|
|
|
|
init() {
|
|
// Init data
|
|
this.data.corpus = this.app.data.corpus;
|
|
this.renderGeneralCorpusInfo();
|
|
this.renderTextInfoList();
|
|
this.renderTextProportionsGraphic();
|
|
this.renderTokenList();
|
|
// this.renderFrequenciesGraphic();
|
|
|
|
// Add event listeners
|
|
let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal');
|
|
let frequenciesStopwordSettingModalButton = document.querySelector('#frequencies-stopwords-setting-modal-button');
|
|
frequenciesStopwordSettingModalButton.addEventListener('click', () => {
|
|
this.data.stopwordCache = structuredClone(this.data.stopwords);
|
|
this.renderStopwordSettingsModal(this.data.stopwords);
|
|
M.Modal.init(frequenciesStopwordSettingModal, {dismissible: false});
|
|
});
|
|
|
|
let textProportionsGraphModeButtons = document.querySelectorAll('.text-proportions-graph-mode-button');
|
|
textProportionsGraphModeButtons.forEach(graphModeButton => {
|
|
graphModeButton.addEventListener('click', (event) => {
|
|
textProportionsGraphModeButtons.forEach(btn => {
|
|
btn.classList.remove('disabled');
|
|
});
|
|
event.target.closest('.text-proportions-graph-mode-button').classList.add('disabled');
|
|
this.renderTextProportionsGraphic();
|
|
});
|
|
});
|
|
|
|
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
|
|
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
|
|
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
|
|
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
|
|
this.renderTokenList();
|
|
});
|
|
|
|
let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
|
|
frequenciesGraphModeButtons.forEach(graphModeButton => {
|
|
graphModeButton.addEventListener('click', (event) => {
|
|
frequenciesGraphModeButtons.forEach(btn => {
|
|
btn.classList.remove('disabled');
|
|
});
|
|
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
|
|
this.renderFrequenciesGraphic(this.data.tokenSet);
|
|
});
|
|
});
|
|
|
|
for (let actionButton of document.querySelectorAll('.frequencies-stopword-setting-modal-action-buttons')) {
|
|
actionButton.addEventListener('click', (event) => {
|
|
let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action;
|
|
if (action === 'submit') {
|
|
this.renderTokenList();
|
|
} else if (action === 'cancel') {
|
|
this.data.stopwords = structuredClone(this.data.stopwordCache);
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
async getStopwords() {
|
|
const stopwords = await app.corpora.getStopwords();
|
|
this.data.originalStopwords = structuredClone(stopwords);
|
|
this.data.stopwords = structuredClone(stopwords);
|
|
return stopwords;
|
|
}
|
|
|
|
renderGeneralCorpusInfo() {
|
|
let corpusData = this.data.corpus.o.staticData;
|
|
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.bounds[1] - corpusData.corpus.bounds[0];
|
|
document.querySelector('.corpus-num-s').innerHTML = corpusData.s_attrs.s.lexicon.length;
|
|
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
|
|
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
|
|
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
|
|
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length;
|
|
}
|
|
|
|
renderTextInfoList() {
|
|
let corpusData = this.data.corpus.o.staticData;
|
|
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
|
|
let corpusTextInfoList = new nopaque.resource_lists.CorpusTextInfoList(corpusTextInfoListElement);
|
|
let texts = corpusData.s_attrs.text.lexicon;
|
|
let textData = [];
|
|
for (let i = 0; i < Object.entries(texts).length; i++) {
|
|
let resource = {
|
|
title: corpusData.values.s_attrs.text[i].title,
|
|
publishing_year: corpusData.values.s_attrs.text[i].publishing_year,
|
|
// num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s,
|
|
num_tokens: corpusData.s_attrs.text.lexicon[i].bounds[1] - corpusData.s_attrs.text.lexicon[i].bounds[0],
|
|
num_sentences: corpusData.s_attrs.s.lexicon.filter((s) => {
|
|
return s.bounds[0] >= corpusData.s_attrs.text.lexicon[i].bounds[0] && s.bounds[1] <= corpusData.s_attrs.text.lexicon[i].bounds[1];
|
|
}).length,
|
|
num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length,
|
|
num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length,
|
|
num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length,
|
|
num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length
|
|
};
|
|
|
|
textData.push(resource);
|
|
}
|
|
|
|
corpusTextInfoList.add(textData);
|
|
|
|
let textCountChipElement = document.querySelector('.text-count-chip');
|
|
textCountChipElement.innerHTML = `Text count: ${corpusData.s_attrs.text.lexicon.length}`;
|
|
}
|
|
|
|
renderTextProportionsGraphic() {
|
|
let corpusData = this.data.corpus.o.staticData;
|
|
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
|
|
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
|
let graphtype = document.querySelector('.text-proportions-graph-mode-button.disabled').dataset.graphType;
|
|
let textProportionsTitleElement = document.querySelector('#text-proportions-title-element');
|
|
|
|
if (graphtype === 'bar') {
|
|
textProportionsTitleElement.innerHTML = 'Bounds';
|
|
} else if (graphtype === 'pie') {
|
|
textProportionsTitleElement.innerHTML = 'Proportions';
|
|
}
|
|
|
|
let graphData = this.createTextProportionsGraphData(texts, graphtype);
|
|
let graphLayout = {
|
|
barmode: graphtype === 'bar' ? 'relative' : '',
|
|
type: graphtype,
|
|
showgrid: false,
|
|
height: 447,
|
|
margin: {
|
|
l: 10,
|
|
r: 10,
|
|
b: graphtype === 'bar' ? 80 : 10,
|
|
t: graphtype === 'bar' ? 80 : 10,
|
|
},
|
|
legend: {
|
|
"orientation": "h",
|
|
font: {
|
|
size: 10
|
|
}
|
|
},
|
|
xaxis: {
|
|
rangemode: 'nonnegative',
|
|
autorange: true
|
|
},
|
|
yaxis: {
|
|
autorange: true,
|
|
showticklabels: false
|
|
}
|
|
};
|
|
let config = {
|
|
responsive: true,
|
|
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
|
|
displaylogo: false
|
|
};
|
|
|
|
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
|
|
}
|
|
|
|
createTextProportionsGraphData(texts, graphtype) {
|
|
let corpusData = this.data.corpus.o.staticData;
|
|
let graphData = [];
|
|
switch (graphtype) {
|
|
case 'bar':
|
|
for (let text of texts) {
|
|
let textData = {
|
|
type: 'bar',
|
|
orientation: 'h',
|
|
x: [text[1].bounds[1] - text[1].bounds[0]],
|
|
y: [0.5],
|
|
text: [`${text[1].bounds[0]} - ${text[1].bounds[1]}`],
|
|
name: `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`,
|
|
hovertemplate: `${text[1].bounds[0]} - ${text[1].bounds[1]}`,
|
|
};
|
|
graphData.push(textData);
|
|
}
|
|
break;
|
|
default:
|
|
graphData = [
|
|
{
|
|
values: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
|
|
labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
|
type: graphtype
|
|
}
|
|
];
|
|
break;
|
|
}
|
|
return graphData;
|
|
}
|
|
|
|
async renderTokenList() {
|
|
let corpusTokenListElement = document.querySelector('.corpus-token-list');
|
|
let corpusTokenList = new nopaque.resource_lists.CorpusTokenList(corpusTokenListElement);
|
|
let filteredData = this.filterData();
|
|
let stopwords = this.data.stopwords;
|
|
if (this.data.stopwords === undefined) {
|
|
stopwords = await this.getStopwords();
|
|
}
|
|
stopwords = Object.values(stopwords).flat();
|
|
let mostFrequent = Object.entries(filteredData)
|
|
.sort((a, b) => b[1].count - a[1].count)
|
|
.filter(item => !stopwords.includes(item[0].toLowerCase()))
|
|
.slice(0, 4)
|
|
.map(item => item[0])
|
|
|
|
let tokenData = [];
|
|
Object.entries(filteredData).forEach(item => {
|
|
let resource = {
|
|
term: item[0],
|
|
count: item[1].count,
|
|
mostFrequent: mostFrequent.includes(item[0])
|
|
};
|
|
if (!Object.values(stopwords).includes(resource.term)) {
|
|
tokenData.push(resource);
|
|
}
|
|
});
|
|
corpusTokenList.add(tokenData);
|
|
}
|
|
|
|
filterData() {
|
|
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
|
|
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
|
|
let corpusData = this.data.corpus.o.staticData;
|
|
let filteredData = {};
|
|
|
|
for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) {
|
|
let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase();
|
|
let count = corpusData.corpus.freqs[tokenCategory][i];
|
|
|
|
if (filteredData[term]) {
|
|
filteredData[term].count += count;
|
|
filteredData[term].originalIds.push(i);
|
|
} else {
|
|
filteredData[term] = {
|
|
count: count,
|
|
originalIds: [i]
|
|
};
|
|
}
|
|
}
|
|
return filteredData;
|
|
}
|
|
|
|
|
|
renderFrequenciesGraphic(tokenSet) {
|
|
this.data.tokenSet = tokenSet;
|
|
let corpusData = this.data.corpus.o.staticData;
|
|
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
|
|
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
|
|
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
|
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
|
|
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
|
|
|
|
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet);
|
|
let graphLayout = {
|
|
barmode: graphtype === 'bar' ? 'stack' : '',
|
|
yaxis: {
|
|
showticklabels: graphtype === 'markers' ? false : true
|
|
},
|
|
height: 627,
|
|
margin: {
|
|
l: 33
|
|
}
|
|
};
|
|
let config = {
|
|
responsive: true,
|
|
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
|
|
displaylogo: false
|
|
};
|
|
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
|
|
}
|
|
|
|
createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) {
|
|
let corpusData = this.data.corpus.o.staticData;
|
|
let graphData = [];
|
|
let filteredData = this.filterData();
|
|
switch (graphtype) {
|
|
case 'markers':
|
|
for (let item of tokenSet) {
|
|
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
|
|
let tokenCountPerText = [];
|
|
for (let originalId of filteredData[item].originalIds) {
|
|
for (let i = 0; i < texts.length; i++) {
|
|
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
|
|
}
|
|
}
|
|
let data = {
|
|
x: textTitles,
|
|
y: texts.map(text => item),
|
|
name: item,
|
|
text: texts.map(text => `${item}<br>${tokenCountPerText || 0}`),
|
|
mode: 'markers',
|
|
marker: {
|
|
size: tokenCountPerText,
|
|
sizeref: 0.4
|
|
}
|
|
};
|
|
graphData.push(data);
|
|
}
|
|
break;
|
|
default:
|
|
for (let item of tokenSet) {
|
|
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
|
|
let tokenCountPerText = [];
|
|
for (let originalId of filteredData[item].originalIds) {
|
|
for (let i = 0; i < texts.length; i++) {
|
|
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
|
|
}
|
|
}
|
|
let data = {
|
|
x: textTitles,
|
|
y: tokenCountPerText,
|
|
name: item,
|
|
type: graphtype
|
|
};
|
|
graphData.push(data);
|
|
}
|
|
break;
|
|
}
|
|
return graphData;
|
|
}
|
|
|
|
renderStopwordSettingsModal(stopwords) {
|
|
let stopwordInputField = document.querySelector('#stopword-input-field');
|
|
let userStopwordListContainer = document.querySelector('#user-stopword-list-container');
|
|
let stopwordLanguageSelection = document.querySelector('#stopword-language-selection');
|
|
let stopwordLanguageChipList = document.querySelector('#stopword-language-chip-list');
|
|
let deleteLanguageStopwordListEntriesButton = document.querySelector('#delete-language-stopword-list-entries-button');
|
|
let resetLanguageStopwordListEntriesButton = document.querySelector('#reset-language-stopword-list-entries-button');
|
|
|
|
stopwordLanguageChipList.innerHTML = '';
|
|
userStopwordListContainer.innerHTML = '';
|
|
stopwordInputField.value = '';
|
|
|
|
// Render stopword language selection. Set english as default language. Filter out user_stopwords.
|
|
if (stopwordLanguageSelection.children.length === 0) {
|
|
Object.keys(stopwords).forEach(language => {
|
|
if (language !== 'user_stopwords') {
|
|
let optionElement = nopaque.Utils.HTMLToElement(`<option value="${language}" ${language === 'english' ? 'selected' : ''}>${language}</option>`);
|
|
stopwordLanguageSelection.appendChild(optionElement);
|
|
}
|
|
});
|
|
}
|
|
|
|
// Render user stopwords over input field.
|
|
if (this.data.stopwords['user_stopwords'].length > 0) {
|
|
for (let word of this.data.stopwords['user_stopwords']) {
|
|
let chipElement = nopaque.Utils.HTMLToElement(`<div class="chip">${word}<i class="close material-icons">close</i></div>`);
|
|
chipElement.addEventListener('click', (event) => {
|
|
let removedListItem = event.target.closest('.chip').firstChild.textContent;
|
|
this.data.stopwords['user_stopwords'] = structuredClone(this.data.stopwords['user_stopwords'].filter(item => item !== removedListItem));
|
|
});
|
|
userStopwordListContainer.appendChild(chipElement);
|
|
}
|
|
}
|
|
|
|
// Render english stopwords as default ...
|
|
let selectedLanguage = document.querySelector('#stopword-language-selection').value;
|
|
this.renderStopwordLanguageChipList(selectedLanguage, stopwords[selectedLanguage]);
|
|
|
|
// ... or render selected language stopwords.
|
|
stopwordLanguageSelection.addEventListener('change', (event) => {
|
|
this.renderStopwordLanguageChipList(event.target.value, stopwords[event.target.value]);
|
|
});
|
|
|
|
// Eventlistener for deleting all stopwords of a language.
|
|
deleteLanguageStopwordListEntriesButton.addEventListener('click', (event) => {
|
|
let selectedLanguage = stopwordLanguageSelection.value;
|
|
this.data.stopwords[selectedLanguage] = [];
|
|
stopwordLanguageChipList.innerHTML = '';
|
|
this.buttonRendering();
|
|
});
|
|
|
|
// Eventlistener for resetting all stopwords of a language to the original stopwords.
|
|
resetLanguageStopwordListEntriesButton.addEventListener('click', () => {
|
|
let selectedLanguage = stopwordLanguageSelection.value;
|
|
this.data.stopwords[selectedLanguage] = structuredClone(this.data.originalStopwords[selectedLanguage]);
|
|
this.renderStopwordLanguageChipList(selectedLanguage, this.data.stopwords[selectedLanguage]);
|
|
});
|
|
|
|
// Initialize Materialize components.
|
|
M.Chips.init(
|
|
stopwordInputField,
|
|
{
|
|
placeholder: 'Add stopwords',
|
|
onChipAdd: (event) => {
|
|
for (let word of event[0].M_Chips.chipsData) {
|
|
if (!this.data.stopwords['user_stopwords'].includes(word.tag.toLowerCase())) {
|
|
this.data.stopwords['user_stopwords'].push(word.tag.toLowerCase());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
);
|
|
M.FormSelect.init(stopwordLanguageSelection);
|
|
|
|
}
|
|
|
|
buttonRendering() {
|
|
let deleteLanguageStopwordListEntriesButton = document.querySelector('#delete-language-stopword-list-entries-button');
|
|
let resetLanguageStopwordListEntriesButton = document.querySelector('#reset-language-stopword-list-entries-button');
|
|
let selectedLanguage = document.querySelector('#stopword-language-selection').value;
|
|
let stopwordLength = this.data.stopwords[selectedLanguage].length;
|
|
let originalStopwordListLength = this.data.originalStopwords[selectedLanguage].length;
|
|
|
|
deleteLanguageStopwordListEntriesButton.classList.toggle('disabled', stopwordLength === 0);
|
|
resetLanguageStopwordListEntriesButton.classList.toggle('disabled', stopwordLength === originalStopwordListLength);
|
|
}
|
|
|
|
renderStopwordLanguageChipList(language, stopwords) {
|
|
let stopwordLanguageChipList = document.querySelector('#stopword-language-chip-list');
|
|
stopwordLanguageChipList.innerHTML = '';
|
|
for (let word of stopwords) {
|
|
let chipElement = nopaque.Utils.HTMLToElement(`<div class="chip">${word}<i class="close material-icons">close</i></div>`);
|
|
chipElement.addEventListener('click', (event) => {
|
|
let removedListItem = event.target.closest('.chip').firstChild.textContent;
|
|
this.data.stopwords[language] = structuredClone(this.data.stopwords[language].filter(item => item !== removedListItem));
|
|
this.buttonRendering();
|
|
});
|
|
stopwordLanguageChipList.appendChild(chipElement);
|
|
}
|
|
this.buttonRendering();
|
|
}
|
|
}
|