From d08f95e9449442a8015719b285f1793825455f0c Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Fri, 21 Jul 2023 13:14:29 +0200 Subject: [PATCH] dynamic token visualization --- .../CorpusAnalysisStaticVisualization.js | 116 ++++++++++++------ app/static/js/CorpusAnalysis/QueryBuilder.js | 2 +- .../js/ResourceLists/CorpusTokenList.js | 80 +++++++----- .../_analysis/static_visualization.html.j2 | 22 ++-- 4 files changed, 138 insertions(+), 82 deletions(-) diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js b/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js index 09686387..01c8f08f 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js @@ -7,20 +7,21 @@ class CorpusAnalysisStaticVisualization { stopwords: undefined, originalStopwords: {}, stopwordCache: {}, - promises: {getStopwords: undefined} + promises: {getStopwords: undefined}, + tokenSet: new Set() }; this.app.registerExtension(this); } - async init() { + init() { // Init data this.data.corpus = this.app.data.corpus; this.renderGeneralCorpusInfo(); this.renderTextInfoList(); this.renderTextProportionsGraphic(); this.renderTokenList(); - this.renderFrequenciesGraphic(); + // this.renderFrequenciesGraphic(); // Add event listeners let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal'); @@ -46,7 +47,7 @@ class CorpusAnalysisStaticVisualization { let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML; - this.renderFrequenciesGraphic(); + this.renderTokenList(); }); let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button'); @@ -56,7 +57,7 @@ class CorpusAnalysisStaticVisualization { btn.classList.remove('disabled'); }); event.target.closest('.frequencies-graph-mode-button').classList.add('disabled'); - this.renderFrequenciesGraphic(); + this.renderFrequenciesGraphic(this.data.tokenSet); }); }); @@ -64,7 +65,8 @@ class CorpusAnalysisStaticVisualization { actionButton.addEventListener('click', (event) => { let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action; if (action === 'submit') { - this.renderFrequenciesGraphic(); + console.log('Stopwords changed'); + this.renderTokenList(); } else if (action === 'cancel') { this.data.stopwords = structuredClone(this.data.stopwordCache); } @@ -208,34 +210,60 @@ class CorpusAnalysisStaticVisualization { } async renderTokenList() { - let corpusData = this.data.corpus.o.staticData; let corpusTokenListElement = document.querySelector('.corpus-token-list'); let corpusTokenList = new CorpusTokenList(corpusTokenListElement); + let filteredData = this.filterData(); let stopwords = this.data.stopwords; if (this.data.stopwords === undefined) { stopwords = await this.getStopwords(); } stopwords = Object.values(stopwords).flat(); - let mostFrequent = Object.entries(corpusData.corpus.freqs.word) - .sort((a, b) => b[1] - a[1]) - .filter(item => !stopwords.includes(corpusData.values.p_attrs.word[item[0]].toLowerCase())) + let mostFrequent = Object.entries(filteredData) + .sort((a, b) => b[1].count - a[1].count) + .filter(item => !stopwords.includes(item[0].toLowerCase())) .slice(0, 4) - .map(item => parseInt(item[0])); + .map(item => item[0]) + let tokenData = []; - for (let i = 0; i < Object.values(corpusData.corpus.freqs.word).length; i++) { + Object.entries(filteredData).forEach(item => { let resource = { - term: corpusData.values.p_attrs.word[i].toLowerCase(), - count: corpusData.corpus.freqs.word[i], - mostFrequent: mostFrequent.includes(i) + term: item[0], + count: item[1].count, + mostFrequent: mostFrequent.includes(item[0]) }; if (!Object.values(stopwords).includes(resource.term)) { tokenData.push(resource); } - } + }); corpusTokenList.add(tokenData); } - async renderFrequenciesGraphic() { + filterData() { + let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); + let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); + let corpusData = this.data.corpus.o.staticData; + let filteredData = {}; + + for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) { + let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase(); + let count = corpusData.corpus.freqs[tokenCategory][i]; + + if (filteredData[term]) { + filteredData[term].count += count; + filteredData[term].originalIds.push(i); + } else { + filteredData[term] = { + count: count, + originalIds: [i] + }; + } + } + return filteredData; + } + + + renderFrequenciesGraphic(tokenSet) { + this.data.tokenSet = tokenSet; let corpusData = this.data.corpus.o.staticData; let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); @@ -243,12 +271,16 @@ class CorpusAnalysisStaticVisualization { let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType; let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); - let graphData = await this.createFrequenciesGraphData(tokenCategory, texts, graphtype); + let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet); let graphLayout = { barmode: graphtype === 'bar' ? 'stack' : '', yaxis: { showticklabels: graphtype === 'markers' ? false : true }, + height: 627, + margin: { + l: 17 + } }; let config = { responsive: true, @@ -258,31 +290,28 @@ class CorpusAnalysisStaticVisualization { Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config); } - async createFrequenciesGraphData(tokenCategory, texts, graphtype) { + createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) { let corpusData = this.data.corpus.o.staticData; - let stopwords = this.data.stopwords; - if (this.data.stopwords === undefined) { - stopwords = await this.getStopwords(); - } - let stopwordList = Object.values(stopwords).flat(); let graphData = []; - let filteredData = Object.entries(corpusData.corpus.freqs[tokenCategory]) - .sort((a, b) => b[1] - a[1]) - .filter(item => !stopwordList.includes(corpusData.values.p_attrs[tokenCategory][item[0]].toLowerCase())) - .slice(0, 5); - + let filteredData = this.filterData(); switch (graphtype) { case 'markers': - for (let item of filteredData) { - let size = texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0); + for (let item of tokenSet) { + let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`); + let tokenCountPerText = []; + for (let originalId of filteredData[item].originalIds) { + for (let i = 0; i < texts.length; i++) { + tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0); + } + } let data = { - x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), - y: texts.map(text => corpusData.values.p_attrs[tokenCategory][item[0]]), - name: corpusData.values.p_attrs[tokenCategory][item[0]], - text: texts.map(text => `${corpusData.values.p_attrs[tokenCategory][item[0]]}
${text[1].freqs[tokenCategory][item[0]] || 0}`), + x: textTitles, + y: texts.map(text => item), + name: item, + text: texts.map(text => `${item}
${tokenCountPerText || 0}`), mode: 'markers', marker: { - size: size, + size: tokenCountPerText, sizeref: 0.4 } }; @@ -290,11 +319,18 @@ class CorpusAnalysisStaticVisualization { } break; default: - for (let item of filteredData) { + for (let item of tokenSet) { + let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`); + let tokenCountPerText = []; + for (let originalId of filteredData[item].originalIds) { + for (let i = 0; i < texts.length; i++) { + tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0); + } + } let data = { - x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), - y: texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0), - name: corpusData.values.p_attrs[tokenCategory][item[0]], + x: textTitles, + y: tokenCountPerText, + name: item, type: graphtype }; graphData.push(data); diff --git a/app/static/js/CorpusAnalysis/QueryBuilder.js b/app/static/js/CorpusAnalysis/QueryBuilder.js index ee52fb45..f3a2bf60 100644 --- a/app/static/js/CorpusAnalysis/QueryBuilder.js +++ b/app/static/js/CorpusAnalysis/QueryBuilder.js @@ -296,7 +296,7 @@ class ConcordanceQueryBuilder { this.elements.entity.innerHTML = 'Entity'; } this.elements.counter -= 1; - if (this.elements.counter <= 0) { + if (this.elements.counter === 0) { this.elements.queryContainer.classList.add('hide'); } this.queryPreviewBuilder(); diff --git a/app/static/js/ResourceLists/CorpusTokenList.js b/app/static/js/ResourceLists/CorpusTokenList.js index 1b992038..48129217 100644 --- a/app/static/js/ResourceLists/CorpusTokenList.js +++ b/app/static/js/ResourceLists/CorpusTokenList.js @@ -6,7 +6,7 @@ class CorpusTokenList extends ResourceList { } static defaultOptions = { - page: 100 + page: 7 }; constructor(listContainerElement, options = {}) { @@ -16,8 +16,35 @@ class CorpusTokenList extends ResourceList { ); super(listContainerElement, _options); this.listjs.list.addEventListener('click', (event) => {this.onClick(event)}); - this.selectedItemIds = new Set(); - + this.selectedItemTerms = new Set(); + this.listjs.on('sortComplete', () => { + let listItems = Array.from(this.listjs.items).filter(item => item.elm); + for (let item of listItems) { + let termElement = item.elm.querySelector('.term'); + let mostFrequent = item.elm.dataset.mostfrequent === 'true'; + if (mostFrequent) { + this.selectedItemTerms.add(termElement.textContent); + } + } + corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms); + }); + + let tokenListResetButtonElement = this.listContainerElement.querySelector('#token-list-reset-button'); + tokenListResetButtonElement.addEventListener('click', () => { + this.selectedItemTerms.clear(); + let listItems = Array.from(this.listjs.items).filter(item => item.elm); + for (let item of listItems) { + let termElement = item.elm.querySelector('.term'); + let mostFrequent = item.elm.dataset.mostfrequent === 'true'; + if (mostFrequent) { + item.elm.querySelector('.select-checkbox').checked = true; + this.selectedItemTerms.add(termElement.textContent); + } else { + item.elm.querySelector('.select-checkbox').checked = false; + } + } + corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms); + }); } get item() { @@ -42,7 +69,7 @@ class CorpusTokenList extends ResourceList { return [ 'term', 'count', - 'mostFrequent', + {data: ['mostFrequent']}, 'frequency' ]; } @@ -58,24 +85,21 @@ class CorpusTokenList extends ResourceList { -
-
- - - - - - - - - - -
TermCountFrequency
-
-
+ + + + + + + + + + +
+ refresh + TermCountFrequency
`.trim(); - this.listContainerElement.style.padding = '30px'; } mapResourceToValue(corpusTokenData) { @@ -92,19 +116,20 @@ class CorpusTokenList extends ResourceList { } onClick(event) { - let listItemElement = event.target.closest('.list-item[data-id]'); + if (event.target.closest('.disable-on-click') !== null) {return;} + let listItemElement = event.target.closest('.list-item'); if (listItemElement === null) {return;} - let itemId = listItemElement.dataset.id; + let item = listItemElement.querySelector('.term').textContent; let listActionElement = event.target.closest('.list-action-trigger[data-list-action]'); let listAction = listActionElement === null ? '' : listActionElement.dataset.listAction; switch (listAction) { case 'select': { if (event.target.checked) { - this.selectedItemIds.add(itemId); + this.selectedItemTerms.add(item); } else { - this.selectedItemIds.delete(itemId); + this.selectedItemTerms.delete(item); } - this.renderingItemSelection(); + corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms); break; } default: { @@ -113,9 +138,4 @@ class CorpusTokenList extends ResourceList { } } - renderingItemSelection() { - - - } - } diff --git a/app/templates/corpora/_analysis/static_visualization.html.j2 b/app/templates/corpora/_analysis/static_visualization.html.j2 index 70a0b605..8ff56178 100644 --- a/app/templates/corpora/_analysis/static_visualization.html.j2 +++ b/app/templates/corpora/_analysis/static_visualization.html.j2 @@ -90,9 +90,8 @@
Frequencies
- {#
#} - @@ -130,8 +130,7 @@ like "the" or "and," that carry little meaning and are often removed in text analysis to improve efficiency and accuracy.

-
-
+

Below you can find a list of all stopwords that are always filtered out. @@ -159,3 +158,4 @@ const corpusAnalysisStaticVisualization = new CorpusAnalysisStaticVisualization(corpusAnalysisApp); {% endset %} + \ No newline at end of file