mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-24 16:40:35 +00:00
Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update
This commit is contained in:
commit
cef82d9001
@ -7,20 +7,21 @@ class CorpusAnalysisStaticVisualization {
|
||||
stopwords: undefined,
|
||||
originalStopwords: {},
|
||||
stopwordCache: {},
|
||||
promises: {getStopwords: undefined}
|
||||
promises: {getStopwords: undefined},
|
||||
tokenSet: new Set()
|
||||
};
|
||||
|
||||
this.app.registerExtension(this);
|
||||
}
|
||||
|
||||
async init() {
|
||||
init() {
|
||||
// Init data
|
||||
this.data.corpus = this.app.data.corpus;
|
||||
this.renderGeneralCorpusInfo();
|
||||
this.renderTextInfoList();
|
||||
this.renderTextProportionsGraphic();
|
||||
this.renderTokenList();
|
||||
this.renderFrequenciesGraphic();
|
||||
// this.renderFrequenciesGraphic();
|
||||
|
||||
// Add event listeners
|
||||
let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal');
|
||||
@ -46,7 +47,7 @@ class CorpusAnalysisStaticVisualization {
|
||||
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
|
||||
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
|
||||
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
|
||||
this.renderFrequenciesGraphic();
|
||||
this.renderTokenList();
|
||||
});
|
||||
|
||||
let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
|
||||
@ -56,7 +57,7 @@ class CorpusAnalysisStaticVisualization {
|
||||
btn.classList.remove('disabled');
|
||||
});
|
||||
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
|
||||
this.renderFrequenciesGraphic();
|
||||
this.renderFrequenciesGraphic(this.data.tokenSet);
|
||||
});
|
||||
});
|
||||
|
||||
@ -64,7 +65,8 @@ class CorpusAnalysisStaticVisualization {
|
||||
actionButton.addEventListener('click', (event) => {
|
||||
let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action;
|
||||
if (action === 'submit') {
|
||||
this.renderFrequenciesGraphic();
|
||||
console.log('Stopwords changed');
|
||||
this.renderTokenList();
|
||||
} else if (action === 'cancel') {
|
||||
this.data.stopwords = structuredClone(this.data.stopwordCache);
|
||||
}
|
||||
@ -208,34 +210,60 @@ class CorpusAnalysisStaticVisualization {
|
||||
}
|
||||
|
||||
async renderTokenList() {
|
||||
let corpusData = this.data.corpus.o.staticData;
|
||||
let corpusTokenListElement = document.querySelector('.corpus-token-list');
|
||||
let corpusTokenList = new CorpusTokenList(corpusTokenListElement);
|
||||
let filteredData = this.filterData();
|
||||
let stopwords = this.data.stopwords;
|
||||
if (this.data.stopwords === undefined) {
|
||||
stopwords = await this.getStopwords();
|
||||
}
|
||||
stopwords = Object.values(stopwords).flat();
|
||||
let mostFrequent = Object.entries(corpusData.corpus.freqs.word)
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.filter(item => !stopwords.includes(corpusData.values.p_attrs.word[item[0]].toLowerCase()))
|
||||
let mostFrequent = Object.entries(filteredData)
|
||||
.sort((a, b) => b[1].count - a[1].count)
|
||||
.filter(item => !stopwords.includes(item[0].toLowerCase()))
|
||||
.slice(0, 4)
|
||||
.map(item => parseInt(item[0]));
|
||||
.map(item => item[0])
|
||||
|
||||
let tokenData = [];
|
||||
for (let i = 0; i < Object.values(corpusData.corpus.freqs.word).length; i++) {
|
||||
Object.entries(filteredData).forEach(item => {
|
||||
let resource = {
|
||||
term: corpusData.values.p_attrs.word[i].toLowerCase(),
|
||||
count: corpusData.corpus.freqs.word[i],
|
||||
mostFrequent: mostFrequent.includes(i)
|
||||
term: item[0],
|
||||
count: item[1].count,
|
||||
mostFrequent: mostFrequent.includes(item[0])
|
||||
};
|
||||
if (!Object.values(stopwords).includes(resource.term)) {
|
||||
tokenData.push(resource);
|
||||
}
|
||||
}
|
||||
});
|
||||
corpusTokenList.add(tokenData);
|
||||
}
|
||||
|
||||
async renderFrequenciesGraphic() {
|
||||
filterData() {
|
||||
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
|
||||
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
|
||||
let corpusData = this.data.corpus.o.staticData;
|
||||
let filteredData = {};
|
||||
|
||||
for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) {
|
||||
let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase();
|
||||
let count = corpusData.corpus.freqs[tokenCategory][i];
|
||||
|
||||
if (filteredData[term]) {
|
||||
filteredData[term].count += count;
|
||||
filteredData[term].originalIds.push(i);
|
||||
} else {
|
||||
filteredData[term] = {
|
||||
count: count,
|
||||
originalIds: [i]
|
||||
};
|
||||
}
|
||||
}
|
||||
return filteredData;
|
||||
}
|
||||
|
||||
|
||||
renderFrequenciesGraphic(tokenSet) {
|
||||
this.data.tokenSet = tokenSet;
|
||||
let corpusData = this.data.corpus.o.staticData;
|
||||
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
|
||||
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
|
||||
@ -243,12 +271,16 @@ class CorpusAnalysisStaticVisualization {
|
||||
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
|
||||
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
|
||||
|
||||
let graphData = await this.createFrequenciesGraphData(tokenCategory, texts, graphtype);
|
||||
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet);
|
||||
let graphLayout = {
|
||||
barmode: graphtype === 'bar' ? 'stack' : '',
|
||||
yaxis: {
|
||||
showticklabels: graphtype === 'markers' ? false : true
|
||||
},
|
||||
height: 627,
|
||||
margin: {
|
||||
l: 17
|
||||
}
|
||||
};
|
||||
let config = {
|
||||
responsive: true,
|
||||
@ -258,31 +290,28 @@ class CorpusAnalysisStaticVisualization {
|
||||
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
|
||||
}
|
||||
|
||||
async createFrequenciesGraphData(tokenCategory, texts, graphtype) {
|
||||
createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) {
|
||||
let corpusData = this.data.corpus.o.staticData;
|
||||
let stopwords = this.data.stopwords;
|
||||
if (this.data.stopwords === undefined) {
|
||||
stopwords = await this.getStopwords();
|
||||
}
|
||||
let stopwordList = Object.values(stopwords).flat();
|
||||
let graphData = [];
|
||||
let filteredData = Object.entries(corpusData.corpus.freqs[tokenCategory])
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.filter(item => !stopwordList.includes(corpusData.values.p_attrs[tokenCategory][item[0]].toLowerCase()))
|
||||
.slice(0, 5);
|
||||
|
||||
let filteredData = this.filterData();
|
||||
switch (graphtype) {
|
||||
case 'markers':
|
||||
for (let item of filteredData) {
|
||||
let size = texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0);
|
||||
for (let item of tokenSet) {
|
||||
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
|
||||
let tokenCountPerText = [];
|
||||
for (let originalId of filteredData[item].originalIds) {
|
||||
for (let i = 0; i < texts.length; i++) {
|
||||
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
|
||||
}
|
||||
}
|
||||
let data = {
|
||||
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
y: texts.map(text => corpusData.values.p_attrs[tokenCategory][item[0]]),
|
||||
name: corpusData.values.p_attrs[tokenCategory][item[0]],
|
||||
text: texts.map(text => `${corpusData.values.p_attrs[tokenCategory][item[0]]}<br>${text[1].freqs[tokenCategory][item[0]] || 0}`),
|
||||
x: textTitles,
|
||||
y: texts.map(text => item),
|
||||
name: item,
|
||||
text: texts.map(text => `${item}<br>${tokenCountPerText || 0}`),
|
||||
mode: 'markers',
|
||||
marker: {
|
||||
size: size,
|
||||
size: tokenCountPerText,
|
||||
sizeref: 0.4
|
||||
}
|
||||
};
|
||||
@ -290,11 +319,18 @@ class CorpusAnalysisStaticVisualization {
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (let item of filteredData) {
|
||||
for (let item of tokenSet) {
|
||||
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
|
||||
let tokenCountPerText = [];
|
||||
for (let originalId of filteredData[item].originalIds) {
|
||||
for (let i = 0; i < texts.length; i++) {
|
||||
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
|
||||
}
|
||||
}
|
||||
let data = {
|
||||
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
y: texts.map(text => text[1].freqs[tokenCategory][item[0]] || 0),
|
||||
name: corpusData.values.p_attrs[tokenCategory][item[0]],
|
||||
x: textTitles,
|
||||
y: tokenCountPerText,
|
||||
name: item,
|
||||
type: graphtype
|
||||
};
|
||||
graphData.push(data);
|
||||
|
@ -296,7 +296,7 @@ class ConcordanceQueryBuilder {
|
||||
this.elements.entity.innerHTML = 'Entity';
|
||||
}
|
||||
this.elements.counter -= 1;
|
||||
if (this.elements.counter <= 0) {
|
||||
if (this.elements.counter === 0) {
|
||||
this.elements.queryContainer.classList.add('hide');
|
||||
}
|
||||
this.queryPreviewBuilder();
|
||||
|
@ -6,7 +6,7 @@ class CorpusTokenList extends ResourceList {
|
||||
}
|
||||
|
||||
static defaultOptions = {
|
||||
page: 100
|
||||
page: 7
|
||||
};
|
||||
|
||||
constructor(listContainerElement, options = {}) {
|
||||
@ -16,8 +16,35 @@ class CorpusTokenList extends ResourceList {
|
||||
);
|
||||
super(listContainerElement, _options);
|
||||
this.listjs.list.addEventListener('click', (event) => {this.onClick(event)});
|
||||
this.selectedItemIds = new Set();
|
||||
|
||||
this.selectedItemTerms = new Set();
|
||||
this.listjs.on('sortComplete', () => {
|
||||
let listItems = Array.from(this.listjs.items).filter(item => item.elm);
|
||||
for (let item of listItems) {
|
||||
let termElement = item.elm.querySelector('.term');
|
||||
let mostFrequent = item.elm.dataset.mostfrequent === 'true';
|
||||
if (mostFrequent) {
|
||||
this.selectedItemTerms.add(termElement.textContent);
|
||||
}
|
||||
}
|
||||
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
|
||||
});
|
||||
|
||||
let tokenListResetButtonElement = this.listContainerElement.querySelector('#token-list-reset-button');
|
||||
tokenListResetButtonElement.addEventListener('click', () => {
|
||||
this.selectedItemTerms.clear();
|
||||
let listItems = Array.from(this.listjs.items).filter(item => item.elm);
|
||||
for (let item of listItems) {
|
||||
let termElement = item.elm.querySelector('.term');
|
||||
let mostFrequent = item.elm.dataset.mostfrequent === 'true';
|
||||
if (mostFrequent) {
|
||||
item.elm.querySelector('.select-checkbox').checked = true;
|
||||
this.selectedItemTerms.add(termElement.textContent);
|
||||
} else {
|
||||
item.elm.querySelector('.select-checkbox').checked = false;
|
||||
}
|
||||
}
|
||||
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
|
||||
});
|
||||
}
|
||||
|
||||
get item() {
|
||||
@ -42,7 +69,7 @@ class CorpusTokenList extends ResourceList {
|
||||
return [
|
||||
'term',
|
||||
'count',
|
||||
'mostFrequent',
|
||||
{data: ['mostFrequent']},
|
||||
'frequency'
|
||||
];
|
||||
}
|
||||
@ -58,24 +85,21 @@ class CorpusTokenList extends ResourceList {
|
||||
<input id="${listSearchElementId}" class="search" type="text"></input>
|
||||
<label for="${listSearchElementId}">Search token</label>
|
||||
</div>
|
||||
<div class="scrollable-list-container-wrapper" style="height:276px; overflow:scroll;">
|
||||
<div class="scrollable-list-container">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th></th>
|
||||
<th>Term</th>
|
||||
<th>Count</th>
|
||||
<th>Frequency</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="list"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="width:15%;">
|
||||
<span class="material-icons" style="cursor:pointer" id="token-list-reset-button">refresh</span>
|
||||
</th>
|
||||
<th>Term</th>
|
||||
<th>Count</th>
|
||||
<th>Frequency</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="list"></tbody>
|
||||
</table>
|
||||
<ul class="pagination"></ul>
|
||||
`.trim();
|
||||
this.listContainerElement.style.padding = '30px';
|
||||
}
|
||||
|
||||
mapResourceToValue(corpusTokenData) {
|
||||
@ -92,19 +116,20 @@ class CorpusTokenList extends ResourceList {
|
||||
}
|
||||
|
||||
onClick(event) {
|
||||
let listItemElement = event.target.closest('.list-item[data-id]');
|
||||
if (event.target.closest('.disable-on-click') !== null) {return;}
|
||||
let listItemElement = event.target.closest('.list-item');
|
||||
if (listItemElement === null) {return;}
|
||||
let itemId = listItemElement.dataset.id;
|
||||
let item = listItemElement.querySelector('.term').textContent;
|
||||
let listActionElement = event.target.closest('.list-action-trigger[data-list-action]');
|
||||
let listAction = listActionElement === null ? '' : listActionElement.dataset.listAction;
|
||||
switch (listAction) {
|
||||
case 'select': {
|
||||
if (event.target.checked) {
|
||||
this.selectedItemIds.add(itemId);
|
||||
this.selectedItemTerms.add(item);
|
||||
} else {
|
||||
this.selectedItemIds.delete(itemId);
|
||||
this.selectedItemTerms.delete(item);
|
||||
}
|
||||
this.renderingItemSelection();
|
||||
corpusAnalysisApp.extensions['Static Visualization'].renderFrequenciesGraphic(this.selectedItemTerms);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
@ -113,9 +138,4 @@ class CorpusTokenList extends ResourceList {
|
||||
}
|
||||
}
|
||||
|
||||
renderingItemSelection() {
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -90,9 +90,8 @@
|
||||
<div class="card-content">
|
||||
<span class="card-title">Frequencies</span>
|
||||
<div class="row">
|
||||
{# <div class="col s1"></div> #}
|
||||
<div class="col s5">
|
||||
<div class="corpus-token-list no-autoinit"></div>
|
||||
<div class="col s4">
|
||||
<div class="corpus-token-list no-autoinit" style="transform: scale(0.91);"></div>
|
||||
<a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a>
|
||||
<a class="btn-flat modal-trigger no-autoinit" id="frequencies-stopwords-setting-modal-button" href="#frequencies-stopwords-setting-modal">
|
||||
<i class="material-icons grey-text text-darken-2">settings</i>
|
||||
@ -104,12 +103,13 @@
|
||||
<li><a data-token-category="simple_pos">Simple_pos</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
{# <div class="col s1"></div> #}
|
||||
<div class="col s7">
|
||||
<div id="frequencies-graphic"></div>
|
||||
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">stacked_bar_chart</i></a>
|
||||
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
|
||||
<a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
|
||||
<div class="col s8">
|
||||
<div id="frequencies-graphic"></div>
|
||||
<div>
|
||||
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">stacked_bar_chart</i></a>
|
||||
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
|
||||
<a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -130,8 +130,7 @@
|
||||
like "the" or "and," that carry little meaning and are often removed in text analysis
|
||||
to improve efficiency and accuracy.</p>
|
||||
<div id="user-stopword-list-container"></div>
|
||||
<div class="chips col s8 no-autoinit input-field" id="stopword-input-field">
|
||||
</div>
|
||||
<div class="chips col s8 no-autoinit input-field" id="stopword-input-field"></div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<p>Below you can find a list of all stopwords that are always filtered out.
|
||||
@ -159,3 +158,4 @@
|
||||
const corpusAnalysisStaticVisualization = new CorpusAnalysisStaticVisualization(corpusAnalysisApp);
|
||||
</script>
|
||||
{% endset %}
|
||||
|
Loading…
x
Reference in New Issue
Block a user