Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update

This commit is contained in:
Patrick Jentsch 2023-07-24 10:02:44 +02:00
commit cef82d9001
4 changed files with 138 additions and 82 deletions

View File

@ -7,20 +7,21 @@ class CorpusAnalysisStaticVisualization {
stopwords: undefined, stopwords: undefined,
originalStopwords: {}, originalStopwords: {},
stopwordCache: {}, stopwordCache: {},
promises: {getStopwords: undefined} promises: {getStopwords: undefined},
tokenSet: new Set()
}; };
this.app.registerExtension(this); this.app.registerExtension(this);
} }
async init() { init() {
// Init data // Init data
this.data.corpus = this.app.data.corpus; this.data.corpus = this.app.data.corpus;
this.renderGeneralCorpusInfo(); this.renderGeneralCorpusInfo();
this.renderTextInfoList(); this.renderTextInfoList();
this.renderTextProportionsGraphic(); this.renderTextProportionsGraphic();
this.renderTokenList(); this.renderTokenList();
this.renderFrequenciesGraphic(); // this.renderFrequenciesGraphic();
// Add event listeners // Add event listeners
let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal'); let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal');
@ -46,7 +47,7 @@ class CorpusAnalysisStaticVisualization {
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML; frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
this.renderFrequenciesGraphic(); this.renderTokenList();
}); });
let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button'); let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
@ -56,7 +57,7 @@ class CorpusAnalysisStaticVisualization {
btn.classList.remove('disabled'); btn.classList.remove('disabled');
}); });
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled'); event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
this.renderFrequenciesGraphic(); this.renderFrequenciesGraphic(this.data.tokenSet);
}); });
}); });
@ -64,7 +65,8 @@ class CorpusAnalysisStaticVisualization {
actionButton.addEventListener('click', (event) => { actionButton.addEventListener('click', (event) => {
let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action; let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action;
if (action === 'submit') { if (action === 'submit') {
this.renderFrequenciesGraphic(); console.log('Stopwords changed');
this.renderTokenList();
} else if (action === 'cancel') { } else if (action === 'cancel') {
this.data.stopwords = structuredClone(this.data.stopwordCache); this.data.stopwords = structuredClone(this.data.stopwordCache);
} }
@ -208,34 +210,60 @@ class CorpusAnalysisStaticVisualization {
} }
async renderTokenList() { async renderTokenList() {
let corpusData = this.data.corpus.o.staticData;
let corpusTokenListElement = document.querySelector('.corpus-token-list'); let corpusTokenListElement = document.querySelector('.corpus-token-list');
let corpusTokenList = new CorpusTokenList(corpusTokenListElement); let corpusTokenList = new CorpusTokenList(corpusTokenListElement);
let filteredData = this.filterData();
let stopwords = this.data.stopwords; let stopwords = this.data.stopwords;
if (this.data.stopwords === undefined) { if (this.data.stopwords === undefined) {
stopwords = await this.getStopwords(); stopwords = await this.getStopwords();
} }
stopwords = Object.values(stopwords).flat(); stopwords = Object.values(stopwords).flat();
let mostFrequent = Object.entries(corpusData.corpus.freqs.word) let mostFrequent = Object.entries(filteredData)
.sort((a, b) => b[1] - a[1]) .sort((a, b) => b[1].count - a[1].count)
.filter(item => !stopwords.includes(corpusData.values.p_attrs.word[item[0]].toLowerCase())) .filter(item => !stopwords.includes(item[0].toLowerCase()))
.slice(0, 4) .slice(0, 4)
.map(item => parseInt(item[0])); .map(item => item[0])
let tokenData = []; let tokenData = [];
for (let i = 0; i < Object.values(corpusData.corpus.freqs.word).length; i++) { Object.entries(filteredData).forEach(item => {
let resource = { let resource = {
term: corpusData.values.p_attrs.word[i].toLowerCase(), term: item[0],
count: corpusData.corpus.freqs.word[i], count: item[1].count,
mostFrequent: mostFrequent.includes(i) mostFrequent: mostFrequent.includes(item[0])
}; };
if (!Object.values(stopwords).includes(resource.term)) { if (!Object.values(stopwords).includes(resource.term)) {
tokenData.push(resource); tokenData.push(resource);
} }
} });
corpusTokenList.add(tokenData); corpusTokenList.add(tokenData);
} }
async renderFrequenciesGraphic() { filterData() {
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let corpusData = this.data.corpus.o.staticData;
let filteredData = {};
for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) {
let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase();
let count = corpusData.corpus.freqs[tokenCategory][i];
if (filteredData[term]) {
filteredData[term].count += count;
filteredData[term].originalIds.push(i);
} else {
filteredData[term] = {
count: count,
originalIds: [i]
};
}
}
return filteredData;
}
renderFrequenciesGraphic(tokenSet) {
this.data.tokenSet = tokenSet;
let corpusData = this.data.corpus.o.staticData; let corpusData = this.data.corpus.o.staticData;
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
@ -243,12 +271,16 @@ class CorpusAnalysisStaticVisualization {
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType; let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase(); let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let graphData = await this.createFrequenciesGraphData(tokenCategory, texts, graphtype); let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet);
let graphLayout = { let graphLayout = {
barmode: graphtype === 'bar' ? 'stack' : '', barmode: graphtype === 'bar' ? 'stack' : '',
yaxis: { yaxis: {
showticklabels: graphtype === 'markers' ? false : true showticklabels: graphtype === 'markers' ? false : true
}, },
height: 627,
margin: {
l: 17
}
}; };
let config = { let config = {
responsive: true, responsive: true,
@ -258,31 +290,28 @@ class CorpusAnalysisStaticVisualization {
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config); Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
} }
async createFrequenciesGraphData(tokenCategory, texts, graphtype) { createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) {
let corpusData = this.data.corpus.o.staticData; let corpusData = this.data.corpus.o.staticData;
let stopwords = this.data.stopwords;
if (this.data.stopwords === undefined) {
stopwords = await this.getStopwords();
}
let stopwordList = Object.values(stopwords).flat();
let graphData = []; let graphData = [];
let filteredData = Object.entries(corpusData.corpus.freqs[tokenCategory]) let filteredData = this.filterData();
.sort((a, b) => b[1] - a[1])
.filter(item => !stopwordList.includes(corpusData.values.p_attrs[tokenCategory][item[0]].toLowerCase()))
.slice(0, 5);
switch (graphtype) { switch (graphtype) {
case 'markers': case 'markers':
for (let item of filteredData) { for (let item of tokenSet) {
let size = texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0); let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
let tokenCountPerText = [];
for (let originalId of filteredData[item].originalIds) {
for (let i = 0; i < texts.length; i++) {
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
}
}
let data = { let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), x: textTitles,
y: texts.map(text => corpusData.values.p_attrs[tokenCategory][item[0]]), y: texts.map(text => item),
name: corpusData.values.p_attrs[tokenCategory][item[0]], name: item,
text: texts.map(text => `${corpusData.values.p_attrs[tokenCategory][item[0]]}<br>${text[1].freqs[tokenCategory][item[0]] || 0}`), text: texts.map(text => `${item}<br>${tokenCountPerText || 0}`),
mode: 'markers', mode: 'markers',
marker: { marker: {
size: size, size: tokenCountPerText,
sizeref: 0.4 sizeref: 0.4
} }
}; };
@ -290,11 +319,18 @@ class CorpusAnalysisStaticVisualization {
} }
break; break;
default: default:
for (let item of filteredData) { for (let item of tokenSet) {
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
let tokenCountPerText = [];
for (let originalId of filteredData[item].originalIds) {
for (let i = 0; i < texts.length; i++) {
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
}
}
let data = { let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), x: textTitles,
y: texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0), y: tokenCountPerText,
name: corpusData.values.p_attrs[tokenCategory][item[0]], name: item,
type: graphtype type: graphtype
}; };
graphData.push(data); graphData.push(data);

View File

@ -296,7 +296,7 @@ class ConcordanceQueryBuilder {
this.elements.entity.innerHTML = 'Entity'; this.elements.entity.innerHTML = 'Entity';
} }
this.elements.counter -= 1; this.elements.counter -= 1;
if (this.elements.counter <= 0) { if (this.elements.counter === 0) {
this.elements.queryContainer.classList.add('hide'); this.elements.queryContainer.classList.add('hide');
} }
this.queryPreviewBuilder(); this.queryPreviewBuilder();

View File

@ -6,7 +6,7 @@ class CorpusTokenList extends ResourceList {
} }
static defaultOptions = { static defaultOptions = {
page: 100 page: 7
}; };
constructor(listContainerElement, options = {}) { constructor(listContainerElement, options = {}) {
@ -16,8 +16,35 @@ class CorpusTokenList extends ResourceList {
); );
super(listContainerElement, _options); super(listContainerElement, _options);
this.listjs.list.addEventListener('click', (event) => {this.onClick(event)}); this.listjs.list.addEventListener('click', (event) => {this.onClick(event)});
this.selectedItemIds = new Set(); this.selectedItemTerms = new Set();
this.listjs.on('sortComplete', () => {
let listItems = Array.from(this.listjs.items).filter(item => item.elm);
for (let item of listItems) {
let termElement = item.elm.querySelector('.term');
let mostFrequent = item.elm.dataset.mostfrequent === 'true';
if (mostFrequent) {
this.selectedItemTerms.add(termElement.textContent);
}
}
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
});
let tokenListResetButtonElement = this.listContainerElement.querySelector('#token-list-reset-button');
tokenListResetButtonElement.addEventListener('click', () => {
this.selectedItemTerms.clear();
let listItems = Array.from(this.listjs.items).filter(item => item.elm);
for (let item of listItems) {
let termElement = item.elm.querySelector('.term');
let mostFrequent = item.elm.dataset.mostfrequent === 'true';
if (mostFrequent) {
item.elm.querySelector('.select-checkbox').checked = true;
this.selectedItemTerms.add(termElement.textContent);
} else {
item.elm.querySelector('.select-checkbox').checked = false;
}
}
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
});
} }
get item() { get item() {
@ -42,7 +69,7 @@ class CorpusTokenList extends ResourceList {
return [ return [
'term', 'term',
'count', 'count',
'mostFrequent', {data: ['mostFrequent']},
'frequency' 'frequency'
]; ];
} }
@ -58,24 +85,21 @@ class CorpusTokenList extends ResourceList {
<input id="${listSearchElementId}" class="search" type="text"></input> <input id="${listSearchElementId}" class="search" type="text"></input>
<label for="${listSearchElementId}">Search token</label> <label for="${listSearchElementId}">Search token</label>
</div> </div>
<div class="scrollable-list-container-wrapper" style="height:276px; overflow:scroll;"> <table>
<div class="scrollable-list-container"> <thead>
<table> <tr>
<thead> <th style="width:15%;">
<tr> <span class="material-icons" style="cursor:pointer" id="token-list-reset-button">refresh</span>
<th></th> </th>
<th>Term</th> <th>Term</th>
<th>Count</th> <th>Count</th>
<th>Frequency</th> <th>Frequency</th>
</tr> </tr>
</thead> </thead>
<tbody class="list"></tbody> <tbody class="list"></tbody>
</table> </table>
</div>
</div>
<ul class="pagination"></ul> <ul class="pagination"></ul>
`.trim(); `.trim();
this.listContainerElement.style.padding = '30px';
} }
mapResourceToValue(corpusTokenData) { mapResourceToValue(corpusTokenData) {
@ -92,19 +116,20 @@ class CorpusTokenList extends ResourceList {
} }
onClick(event) { onClick(event) {
let listItemElement = event.target.closest('.list-item[data-id]'); if (event.target.closest('.disable-on-click') !== null) {return;}
let listItemElement = event.target.closest('.list-item');
if (listItemElement === null) {return;} if (listItemElement === null) {return;}
let itemId = listItemElement.dataset.id; let item = listItemElement.querySelector('.term').textContent;
let listActionElement = event.target.closest('.list-action-trigger[data-list-action]'); let listActionElement = event.target.closest('.list-action-trigger[data-list-action]');
let listAction = listActionElement === null ? '' : listActionElement.dataset.listAction; let listAction = listActionElement === null ? '' : listActionElement.dataset.listAction;
switch (listAction) { switch (listAction) {
case 'select': { case 'select': {
if (event.target.checked) { if (event.target.checked) {
this.selectedItemIds.add(itemId); this.selectedItemTerms.add(item);
} else { } else {
this.selectedItemIds.delete(itemId); this.selectedItemTerms.delete(item);
} }
this.renderingItemSelection(); corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
break; break;
} }
default: { default: {
@ -113,9 +138,4 @@ class CorpusTokenList extends ResourceList {
} }
} }
renderingItemSelection() {
}
} }

View File

@ -90,9 +90,8 @@
<div class="card-content"> <div class="card-content">
<span class="card-title">Frequencies</span> <span class="card-title">Frequencies</span>
<div class="row"> <div class="row">
{# <div class="col s1"></div> #} <div class="col s4">
<div class="col s5"> <div class="corpus-token-list no-autoinit" style="transform: scale(0.91);"></div>
<div class="corpus-token-list no-autoinit"></div>
<a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> <a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a>
<a class="btn-flat modal-trigger no-autoinit" id="frequencies-stopwords-setting-modal-button" href="#frequencies-stopwords-setting-modal"> <a class="btn-flat modal-trigger no-autoinit" id="frequencies-stopwords-setting-modal-button" href="#frequencies-stopwords-setting-modal">
<i class="material-icons grey-text text-darken-2">settings</i> <i class="material-icons grey-text text-darken-2">settings</i>
@ -104,12 +103,13 @@
<li><a data-token-category="simple_pos">Simple_pos</a></li> <li><a data-token-category="simple_pos">Simple_pos</a></li>
</ul> </ul>
</div> </div>
{# <div class="col s1"></div> #} <div class="col s8">
<div class="col s7"> <div id="frequencies-graphic"></div>
<div id="frequencies-graphic"></div> <div>
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">stacked_bar_chart</i></a> <a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">stacked_bar_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a> <a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a> <a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
</div>
</div> </div>
</div> </div>
</div> </div>
@ -130,8 +130,7 @@
like "the" or "and," that carry little meaning and are often removed in text analysis like "the" or "and," that carry little meaning and are often removed in text analysis
to improve efficiency and accuracy.</p> to improve efficiency and accuracy.</p>
<div id="user-stopword-list-container"></div> <div id="user-stopword-list-container"></div>
<div class="chips col s8 no-autoinit input-field" id="stopword-input-field"> <div class="chips col s8 no-autoinit input-field" id="stopword-input-field"></div>
</div>
</div> </div>
<div class="row"> <div class="row">
<p>Below you can find a list of all stopwords that are always filtered out. <p>Below you can find a list of all stopwords that are always filtered out.
@ -159,3 +158,4 @@
const corpusAnalysisStaticVisualization = new CorpusAnalysisStaticVisualization(corpusAnalysisApp); const corpusAnalysisStaticVisualization = new CorpusAnalysisStaticVisualization(corpusAnalysisApp);
</script> </script>
{% endset %} {% endset %}