dynamic token visualization

This commit is contained in:
Inga Kirschnick 2023-07-21 13:14:29 +02:00
parent e4f435c5ee
commit d08f95e944
4 changed files with 138 additions and 82 deletions

View File

@ -7,20 +7,21 @@ class CorpusAnalysisStaticVisualization {
stopwords: undefined,
originalStopwords: {},
stopwordCache: {},
promises: {getStopwords: undefined}
promises: {getStopwords: undefined},
tokenSet: new Set()
};
this.app.registerExtension(this);
}
async init() {
init() {
// Init data
this.data.corpus = this.app.data.corpus;
this.renderGeneralCorpusInfo();
this.renderTextInfoList();
this.renderTextProportionsGraphic();
this.renderTokenList();
this.renderFrequenciesGraphic();
// this.renderFrequenciesGraphic();
// Add event listeners
let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal');
@ -46,7 +47,7 @@ class CorpusAnalysisStaticVisualization {
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
this.renderFrequenciesGraphic();
this.renderTokenList();
});
let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
@ -56,7 +57,7 @@ class CorpusAnalysisStaticVisualization {
btn.classList.remove('disabled');
});
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
this.renderFrequenciesGraphic();
this.renderFrequenciesGraphic(this.data.tokenSet);
});
});
@ -64,7 +65,8 @@ class CorpusAnalysisStaticVisualization {
actionButton.addEventListener('click', (event) => {
let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action;
if (action === 'submit') {
this.renderFrequenciesGraphic();
console.log('Stopwords changed');
this.renderTokenList();
} else if (action === 'cancel') {
this.data.stopwords = structuredClone(this.data.stopwordCache);
}
@ -208,34 +210,60 @@ class CorpusAnalysisStaticVisualization {
}
async renderTokenList() {
let corpusData = this.data.corpus.o.staticData;
let corpusTokenListElement = document.querySelector('.corpus-token-list');
let corpusTokenList = new CorpusTokenList(corpusTokenListElement);
let filteredData = this.filterData();
let stopwords = this.data.stopwords;
if (this.data.stopwords === undefined) {
stopwords = await this.getStopwords();
}
stopwords = Object.values(stopwords).flat();
let mostFrequent = Object.entries(corpusData.corpus.freqs.word)
.sort((a, b) => b[1] - a[1])
.filter(item => !stopwords.includes(corpusData.values.p_attrs.word[item[0]].toLowerCase()))
let mostFrequent = Object.entries(filteredData)
.sort((a, b) => b[1].count - a[1].count)
.filter(item => !stopwords.includes(item[0].toLowerCase()))
.slice(0, 4)
.map(item => parseInt(item[0]));
.map(item => item[0])
let tokenData = [];
for (let i = 0; i < Object.values(corpusData.corpus.freqs.word).length; i++) {
Object.entries(filteredData).forEach(item => {
let resource = {
term: corpusData.values.p_attrs.word[i].toLowerCase(),
count: corpusData.corpus.freqs.word[i],
mostFrequent: mostFrequent.includes(i)
term: item[0],
count: item[1].count,
mostFrequent: mostFrequent.includes(item[0])
};
if (!Object.values(stopwords).includes(resource.term)) {
tokenData.push(resource);
}
}
});
corpusTokenList.add(tokenData);
}
async renderFrequenciesGraphic() {
filterData() {
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let corpusData = this.data.corpus.o.staticData;
let filteredData = {};
for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) {
let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase();
let count = corpusData.corpus.freqs[tokenCategory][i];
if (filteredData[term]) {
filteredData[term].count += count;
filteredData[term].originalIds.push(i);
} else {
filteredData[term] = {
count: count,
originalIds: [i]
};
}
}
return filteredData;
}
renderFrequenciesGraphic(tokenSet) {
this.data.tokenSet = tokenSet;
let corpusData = this.data.corpus.o.staticData;
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
@ -243,12 +271,16 @@ class CorpusAnalysisStaticVisualization {
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let graphData = await this.createFrequenciesGraphData(tokenCategory, texts, graphtype);
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet);
let graphLayout = {
barmode: graphtype === 'bar' ? 'stack' : '',
yaxis: {
showticklabels: graphtype === 'markers' ? false : true
},
height: 627,
margin: {
l: 17
}
};
let config = {
responsive: true,
@ -258,31 +290,28 @@ class CorpusAnalysisStaticVisualization {
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
}
async createFrequenciesGraphData(tokenCategory, texts, graphtype) {
createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) {
let corpusData = this.data.corpus.o.staticData;
let stopwords = this.data.stopwords;
if (this.data.stopwords === undefined) {
stopwords = await this.getStopwords();
}
let stopwordList = Object.values(stopwords).flat();
let graphData = [];
let filteredData = Object.entries(corpusData.corpus.freqs[tokenCategory])
.sort((a, b) => b[1] - a[1])
.filter(item => !stopwordList.includes(corpusData.values.p_attrs[tokenCategory][item[0]].toLowerCase()))
.slice(0, 5);
let filteredData = this.filterData();
switch (graphtype) {
case 'markers':
for (let item of filteredData) {
let size = texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0);
for (let item of tokenSet) {
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
let tokenCountPerText = [];
for (let originalId of filteredData[item].originalIds) {
for (let i = 0; i < texts.length; i++) {
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
}
}
let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
y: texts.map(text => corpusData.values.p_attrs[tokenCategory][item[0]]),
name: corpusData.values.p_attrs[tokenCategory][item[0]],
text: texts.map(text => `${corpusData.values.p_attrs[tokenCategory][item[0]]}<br>${text[1].freqs[tokenCategory][item[0]] || 0}`),
x: textTitles,
y: texts.map(text => item),
name: item,
text: texts.map(text => `${item}<br>${tokenCountPerText || 0}`),
mode: 'markers',
marker: {
size: size,
size: tokenCountPerText,
sizeref: 0.4
}
};
@ -290,11 +319,18 @@ class CorpusAnalysisStaticVisualization {
}
break;
default:
for (let item of filteredData) {
for (let item of tokenSet) {
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
let tokenCountPerText = [];
for (let originalId of filteredData[item].originalIds) {
for (let i = 0; i < texts.length; i++) {
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
}
}
let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
y: texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0),
name: corpusData.values.p_attrs[tokenCategory][item[0]],
x: textTitles,
y: tokenCountPerText,
name: item,
type: graphtype
};
graphData.push(data);

View File

@ -296,7 +296,7 @@ class ConcordanceQueryBuilder {
this.elements.entity.innerHTML = 'Entity';
}
this.elements.counter -= 1;
if (this.elements.counter <= 0) {
if (this.elements.counter === 0) {
this.elements.queryContainer.classList.add('hide');
}
this.queryPreviewBuilder();

View File

@ -6,7 +6,7 @@ class CorpusTokenList extends ResourceList {
}
static defaultOptions = {
page: 100
page: 7
};
constructor(listContainerElement, options = {}) {
@ -16,8 +16,35 @@ class CorpusTokenList extends ResourceList {
);
super(listContainerElement, _options);
this.listjs.list.addEventListener('click', (event) => {this.onClick(event)});
this.selectedItemIds = new Set();
this.selectedItemTerms = new Set();
this.listjs.on('sortComplete', () => {
let listItems = Array.from(this.listjs.items).filter(item => item.elm);
for (let item of listItems) {
let termElement = item.elm.querySelector('.term');
let mostFrequent = item.elm.dataset.mostfrequent === 'true';
if (mostFrequent) {
this.selectedItemTerms.add(termElement.textContent);
}
}
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
});
let tokenListResetButtonElement = this.listContainerElement.querySelector('#token-list-reset-button');
tokenListResetButtonElement.addEventListener('click', () => {
this.selectedItemTerms.clear();
let listItems = Array.from(this.listjs.items).filter(item => item.elm);
for (let item of listItems) {
let termElement = item.elm.querySelector('.term');
let mostFrequent = item.elm.dataset.mostfrequent === 'true';
if (mostFrequent) {
item.elm.querySelector('.select-checkbox').checked = true;
this.selectedItemTerms.add(termElement.textContent);
} else {
item.elm.querySelector('.select-checkbox').checked = false;
}
}
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
});
}
get item() {
@ -42,7 +69,7 @@ class CorpusTokenList extends ResourceList {
return [
'term',
'count',
'mostFrequent',
{data: ['mostFrequent']},
'frequency'
];
}
@ -58,12 +85,12 @@ class CorpusTokenList extends ResourceList {
<input id="${listSearchElementId}" class="search" type="text"></input>
<label for="${listSearchElementId}">Search token</label>
</div>
<div class="scrollable-list-container-wrapper" style="height:276px; overflow:scroll;">
<div class="scrollable-list-container">
<table>
<thead>
<tr>
<th></th>
<th style="width:15%;">
<span class="material-icons" style="cursor:pointer" id="token-list-reset-button">refresh</span>
</th>
<th>Term</th>
<th>Count</th>
<th>Frequency</th>
@ -71,11 +98,8 @@ class CorpusTokenList extends ResourceList {
</thead>
<tbody class="list"></tbody>
</table>
</div>
</div>
<ul class="pagination"></ul>
`.trim();
this.listContainerElement.style.padding = '30px';
}
mapResourceToValue(corpusTokenData) {
@ -92,19 +116,20 @@ class CorpusTokenList extends ResourceList {
}
onClick(event) {
let listItemElement = event.target.closest('.list-item[data-id]');
if (event.target.closest('.disable-on-click') !== null) {return;}
let listItemElement = event.target.closest('.list-item');
if (listItemElement === null) {return;}
let itemId = listItemElement.dataset.id;
let item = listItemElement.querySelector('.term').textContent;
let listActionElement = event.target.closest('.list-action-trigger[data-list-action]');
let listAction = listActionElement === null ? '' : listActionElement.dataset.listAction;
switch (listAction) {
case 'select': {
if (event.target.checked) {
this.selectedItemIds.add(itemId);
this.selectedItemTerms.add(item);
} else {
this.selectedItemIds.delete(itemId);
this.selectedItemTerms.delete(item);
}
this.renderingItemSelection();
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
break;
}
default: {
@ -113,9 +138,4 @@ class CorpusTokenList extends ResourceList {
}
}
renderingItemSelection() {
}
}

View File

@ -90,9 +90,8 @@
<div class="card-content">
<span class="card-title">Frequencies</span>
<div class="row">
{# <div class="col s1"></div> #}
<div class="col s5">
<div class="corpus-token-list no-autoinit"></div>
<div class="col s4">
<div class="corpus-token-list no-autoinit" style="transform: scale(0.91);"></div>
<a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a>
<a class="btn-flat modal-trigger no-autoinit" id="frequencies-stopwords-setting-modal-button" href="#frequencies-stopwords-setting-modal">
<i class="material-icons grey-text text-darken-2">settings</i>
@ -104,9 +103,9 @@
<li><a data-token-category="simple_pos">Simple_pos</a></li>
</ul>
</div>
{# <div class="col s1"></div> #}
<div class="col s7">
<div class="col s8">
<div id="frequencies-graphic"></div>
<div>
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">stacked_bar_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
@ -116,6 +115,7 @@
</div>
</div>
</div>
</div>
<div class="row">
</div>
@ -130,8 +130,7 @@
like "the" or "and," that carry little meaning and are often removed in text analysis
to improve efficiency and accuracy.</p>
<div id="user-stopword-list-container"></div>
<div class="chips col s8 no-autoinit input-field" id="stopword-input-field">
</div>
<div class="chips col s8 no-autoinit input-field" id="stopword-input-field"></div>
</div>
<div class="row">
<p>Below you can find a list of all stopwords that are always filtered out.
@ -159,3 +158,4 @@
const corpusAnalysisStaticVisualization = new CorpusAnalysisStaticVisualization(corpusAnalysisApp);
</script>
{% endset %}