nopaque.corpus_analysis.StaticVisualizationExtension = class StaticVisualizationExtension { name = 'Static Visualization (beta)'; constructor(app) { this.app = app; this.data = { stopwords: undefined, originalStopwords: {}, stopwordCache: {}, promises: {getStopwords: undefined}, tokenSet: new Set() }; this.app.registerExtension(this); } init() { // Init data this.data.corpus = this.app.data.corpus; this.renderGeneralCorpusInfo(); this.renderTextInfoList(); this.renderTextProportionsGraphic(); this.renderTokenList(); // this.renderFrequenciesGraphic(); // Add event listeners let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal'); let frequenciesStopwordSettingModalButton = document.querySelector('#frequencies-stopwords-setting-modal-button'); frequenciesStopwordSettingModalButton.addEventListener('click', () => { this.data.stopwordCache = structuredClone(this.data.stopwords); this.renderStopwordSettingsModal(this.data.stopwords); M.Modal.init(frequenciesStopwordSettingModal, {dismissible: false}); }); let textProportionsGraphModeButtons = document.querySelectorAll('.text-proportions-graph-mode-button'); textProportionsGraphModeButtons.forEach(graphModeButton => { graphModeButton.addEventListener('click', (event) => { textProportionsGraphModeButtons.forEach(btn => { btn.classList.remove('disabled'); }); event.target.closest('.text-proportions-graph-mode-button').classList.add('disabled'); this.renderTextProportionsGraphic(); }); }); let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML; this.renderTokenList(); }); let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button'); frequenciesGraphModeButtons.forEach(graphModeButton => { graphModeButton.addEventListener('click', (event) => { frequenciesGraphModeButtons.forEach(btn => { btn.classList.remove('disabled'); }); event.target.closest('.frequencies-graph-mode-button').classList.add('disabled'); this.renderFrequenciesGraphic(this.data.tokenSet); }); }); for (let actionButton of document.querySelectorAll('.frequencies-stopword-setting-modal-action-buttons')) { actionButton.addEventListener('click', (event) => { let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action; if (action === 'submit') { this.renderTokenList(); } else if (action === 'cancel') { this.data.stopwords = structuredClone(this.data.stopwordCache); } }); } } getStopwords() { this.data.promises.getStopwords = new Promise((resolve, reject) => { nopaque.requests.corpora.entity.getStopwords() .then((response) => { response.json() .then((json) => { this.data.originalStopwords = structuredClone(json); this.data.stopwords = structuredClone(json); resolve(this.data.stopwords); }) .catch((error) => { reject(error); }); }); }); return this.data.promises.getStopwords; } renderGeneralCorpusInfo() { let corpusData = this.data.corpus.o.staticData; document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.bounds[1] - corpusData.corpus.bounds[0]; document.querySelector('.corpus-num-s').innerHTML = corpusData.s_attrs.s.lexicon.length; document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length; document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length; document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length; document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length; } renderTextInfoList() { let corpusData = this.data.corpus.o.staticData; let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list'); let corpusTextInfoList = new nopaque.resource_lists.CorpusTextInfoList(corpusTextInfoListElement); let texts = corpusData.s_attrs.text.lexicon; let textData = []; for (let i = 0; i < Object.entries(texts).length; i++) { let resource = { title: corpusData.values.s_attrs.text[i].title, publishing_year: corpusData.values.s_attrs.text[i].publishing_year, // num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s, num_tokens: corpusData.s_attrs.text.lexicon[i].bounds[1] - corpusData.s_attrs.text.lexicon[i].bounds[0], num_sentences: corpusData.s_attrs.s.lexicon.filter((s) => { return s.bounds[0] >= corpusData.s_attrs.text.lexicon[i].bounds[0] && s.bounds[1] <= corpusData.s_attrs.text.lexicon[i].bounds[1]; }).length, num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length, num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length, num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length, num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length }; textData.push(resource); } corpusTextInfoList.add(textData); let textCountChipElement = document.querySelector('.text-count-chip'); textCountChipElement.innerHTML = `Text count: ${corpusData.s_attrs.text.lexicon.length}`; } renderTextProportionsGraphic() { let corpusData = this.data.corpus.o.staticData; let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic'); let texts = Object.entries(corpusData.s_attrs.text.lexicon); let graphtype = document.querySelector('.text-proportions-graph-mode-button.disabled').dataset.graphType; let textProportionsTitleElement = document.querySelector('#text-proportions-title-element'); if (graphtype === 'bar') { textProportionsTitleElement.innerHTML = 'Bounds'; } else if (graphtype === 'pie') { textProportionsTitleElement.innerHTML = 'Proportions'; } let graphData = this.createTextProportionsGraphData(texts, graphtype); let graphLayout = { barmode: graphtype === 'bar' ? 'relative' : '', type: graphtype, showgrid: false, height: 447, margin: { l: 10, r: 10, b: graphtype === 'bar' ? 80 : 10, t: graphtype === 'bar' ? 80 : 10, }, legend: { "orientation": "h", font: { size: 10 } }, xaxis: { rangemode: 'nonnegative', autorange: true }, yaxis: { autorange: true, showticklabels: false } }; let config = { responsive: true, modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'], displaylogo: false }; Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config); } createTextProportionsGraphData(texts, graphtype) { let corpusData = this.data.corpus.o.staticData; let graphData = []; switch (graphtype) { case 'bar': for (let text of texts) { let textData = { type: 'bar', orientation: 'h', x: [text[1].bounds[1] - text[1].bounds[0]], y: [0.5], text: [`${text[1].bounds[0]} - ${text[1].bounds[1]}`], name: `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`, hovertemplate: `${text[1].bounds[0]} - ${text[1].bounds[1]}`, }; graphData.push(textData); } break; default: graphData = [ { values: texts.map(text => text[1].bounds[1] - text[1].bounds[0]), labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), type: graphtype } ]; break; } return graphData; } async renderTokenList() { let corpusTokenListElement = document.querySelector('.corpus-token-list'); let corpusTokenList = new nopaque.resource_lists.CorpusTokenList(corpusTokenListElement); let filteredData = this.filterData(); let stopwords = this.data.stopwords; if (this.data.stopwords === undefined) { stopwords = await this.getStopwords(); } stopwords = Object.values(stopwords).flat(); let mostFrequent = Object.entries(filteredData) .sort((a, b) => b[1].count - a[1].count) .filter(item => !stopwords.includes(item[0].toLowerCase())) .slice(0, 4) .map(item => item[0]) let tokenData = []; Object.entries(filteredData).forEach(item => { let resource = { term: item[0], count: item[1].count, mostFrequent: mostFrequent.includes(item[0]) }; if (!Object.values(stopwords).includes(resource.term)) { tokenData.push(resource); } }); corpusTokenList.add(tokenData); } filterData() { let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); let corpusData = this.data.corpus.o.staticData; let filteredData = {}; for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) { let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase(); let count = corpusData.corpus.freqs[tokenCategory][i]; if (filteredData[term]) { filteredData[term].count += count; filteredData[term].originalIds.push(i); } else { filteredData[term] = { count: count, originalIds: [i] }; } } return filteredData; } renderFrequenciesGraphic(tokenSet) { this.data.tokenSet = tokenSet; let corpusData = this.data.corpus.o.staticData; let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); let texts = Object.entries(corpusData.s_attrs.text.lexicon); let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType; let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet); let graphLayout = { barmode: graphtype === 'bar' ? 'stack' : '', yaxis: { showticklabels: graphtype === 'markers' ? false : true }, height: 627, margin: { l: 33 } }; let config = { responsive: true, modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'], displaylogo: false }; Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config); } createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) { let corpusData = this.data.corpus.o.staticData; let graphData = []; let filteredData = this.filterData(); switch (graphtype) { case 'markers': for (let item of tokenSet) { let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`); let tokenCountPerText = []; for (let originalId of filteredData[item].originalIds) { for (let i = 0; i < texts.length; i++) { tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0); } } let data = { x: textTitles, y: texts.map(text => item), name: item, text: texts.map(text => `${item}
${tokenCountPerText || 0}`), mode: 'markers', marker: { size: tokenCountPerText, sizeref: 0.4 } }; graphData.push(data); } break; default: for (let item of tokenSet) { let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`); let tokenCountPerText = []; for (let originalId of filteredData[item].originalIds) { for (let i = 0; i < texts.length; i++) { tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0); } } let data = { x: textTitles, y: tokenCountPerText, name: item, type: graphtype }; graphData.push(data); } break; } return graphData; } renderStopwordSettingsModal(stopwords) { let stopwordInputField = document.querySelector('#stopword-input-field'); let userStopwordListContainer = document.querySelector('#user-stopword-list-container'); let stopwordLanguageSelection = document.querySelector('#stopword-language-selection'); let stopwordLanguageChipList = document.querySelector('#stopword-language-chip-list'); let deleteLanguageStopwordListEntriesButton = document.querySelector('#delete-language-stopword-list-entries-button'); let resetLanguageStopwordListEntriesButton = document.querySelector('#reset-language-stopword-list-entries-button'); stopwordLanguageChipList.innerHTML = ''; userStopwordListContainer.innerHTML = ''; stopwordInputField.value = ''; // Render stopword language selection. Set english as default language. Filter out user_stopwords. if (stopwordLanguageSelection.children.length === 0) { Object.keys(stopwords).forEach(language => { if (language !== 'user_stopwords') { let optionElement = nopaque.Utils.HTMLToElement(``); stopwordLanguageSelection.appendChild(optionElement); } }); } // Render user stopwords over input field. if (this.data.stopwords['user_stopwords'].length > 0) { for (let word of this.data.stopwords['user_stopwords']) { let chipElement = nopaque.Utils.HTMLToElement(`
${word}close
`); chipElement.addEventListener('click', (event) => { let removedListItem = event.target.closest('.chip').firstChild.textContent; this.data.stopwords['user_stopwords'] = structuredClone(this.data.stopwords['user_stopwords'].filter(item => item !== removedListItem)); }); userStopwordListContainer.appendChild(chipElement); } } // Render english stopwords as default ... let selectedLanguage = document.querySelector('#stopword-language-selection').value; this.renderStopwordLanguageChipList(selectedLanguage, stopwords[selectedLanguage]); // ... or render selected language stopwords. stopwordLanguageSelection.addEventListener('change', (event) => { this.renderStopwordLanguageChipList(event.target.value, stopwords[event.target.value]); }); // Eventlistener for deleting all stopwords of a language. deleteLanguageStopwordListEntriesButton.addEventListener('click', (event) => { let selectedLanguage = stopwordLanguageSelection.value; this.data.stopwords[selectedLanguage] = []; stopwordLanguageChipList.innerHTML = ''; this.buttonRendering(); }); // Eventlistener for resetting all stopwords of a language to the original stopwords. resetLanguageStopwordListEntriesButton.addEventListener('click', () => { let selectedLanguage = stopwordLanguageSelection.value; this.data.stopwords[selectedLanguage] = structuredClone(this.data.originalStopwords[selectedLanguage]); this.renderStopwordLanguageChipList(selectedLanguage, this.data.stopwords[selectedLanguage]); }); // Initialize Materialize components. M.Chips.init( stopwordInputField, { placeholder: 'Add stopwords', onChipAdd: (event) => { for (let word of event[0].M_Chips.chipsData) { if (!this.data.stopwords['user_stopwords'].includes(word.tag.toLowerCase())) { this.data.stopwords['user_stopwords'].push(word.tag.toLowerCase()); } } } } ); M.FormSelect.init(stopwordLanguageSelection); } buttonRendering() { let deleteLanguageStopwordListEntriesButton = document.querySelector('#delete-language-stopword-list-entries-button'); let resetLanguageStopwordListEntriesButton = document.querySelector('#reset-language-stopword-list-entries-button'); let selectedLanguage = document.querySelector('#stopword-language-selection').value; let stopwordLength = this.data.stopwords[selectedLanguage].length; let originalStopwordListLength = this.data.originalStopwords[selectedLanguage].length; deleteLanguageStopwordListEntriesButton.classList.toggle('disabled', stopwordLength === 0); resetLanguageStopwordListEntriesButton.classList.toggle('disabled', stopwordLength === originalStopwordListLength); } renderStopwordLanguageChipList(language, stopwords) { let stopwordLanguageChipList = document.querySelector('#stopword-language-chip-list'); stopwordLanguageChipList.innerHTML = ''; for (let word of stopwords) { let chipElement = nopaque.Utils.HTMLToElement(`
${word}close
`); chipElement.addEventListener('click', (event) => { let removedListItem = event.target.closest('.chip').firstChild.textContent; this.data.stopwords[language] = structuredClone(this.data.stopwords[language].filter(item => item !== removedListItem)); this.buttonRendering(); }); stopwordLanguageChipList.appendChild(chipElement); } this.buttonRendering(); } }