mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-17 21:40:34 +00:00
Compare commits
2 Commits
b7ca2a2cf6
...
71359523ba
Author | SHA1 | Date | |
---|---|---|---|
|
71359523ba | ||
|
cc508cf4eb |
@ -100,7 +100,165 @@ class CQiCorpus {
|
||||
|
||||
getCorpusData() {
|
||||
return new Promise((resolve, reject) => {
|
||||
const dummyData = {};
|
||||
const dummyData = {
|
||||
"num_tokens": 2000, // number of tokens in the corpus
|
||||
"num_unique_words": 500, // number of unique words in the corpus
|
||||
"num_unique_lemmas": 200, // number of unique lemmas in the corpus
|
||||
"num_sentences": 90, // number of sentences in the corpus
|
||||
"average_sentence_length": 11, // average number of tokens per sentence in the corpus
|
||||
"num_ent_types": 30, // number of entities in the corpus
|
||||
"num_unique_ent_types":10,
|
||||
"ent_type_freqs": {
|
||||
"str": 10, // number of ent_types with ent_type "str"
|
||||
// ...
|
||||
},
|
||||
"texts": [
|
||||
{
|
||||
"num_tokens": 11, // number of tokens in the text
|
||||
"num_unique_words": 12, // number of unique words in the text
|
||||
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with word "str"
|
||||
// ...
|
||||
},
|
||||
"num_unique_lemmas": 15, // number of unique lemmas in the text
|
||||
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with lemma "str"
|
||||
// ...
|
||||
},
|
||||
"num_sentences": 4, // number of sentences in the text
|
||||
"average_sentence_length": 3, // average number of tokens per sentence in the text
|
||||
"num_ent_types": 12, // number of ent_types in the text
|
||||
"num_unique_ent_types": 28, // number of unique ent_types in the text
|
||||
"num_entities_by_id": {
|
||||
"1": "int", // number of entities with id 1
|
||||
// ...
|
||||
},
|
||||
"author": "Author Name",
|
||||
"title": "Titel",
|
||||
"publishing_year": 1950
|
||||
},
|
||||
{
|
||||
"num_tokens": 15, // number of tokens in the text
|
||||
"num_unique_words": 4, // number of unique words in the text
|
||||
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with word "str"
|
||||
// ...
|
||||
},
|
||||
"num_unique_lemmas": 90, // number of unique lemmas in the text
|
||||
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with lemma "str"
|
||||
// ...
|
||||
},
|
||||
"num_sentences": 11, // number of sentences in the text
|
||||
"average_sentence_length": 3, // average number of tokens per sentence in the text
|
||||
"num_ent_types": 4, // number of ent_types in the text
|
||||
"num_unique_ent_types": 300, // number of unique ent_types in the text
|
||||
"num_entities_by_id": {
|
||||
"1": "int", // number of entities with id 1
|
||||
// ...
|
||||
},
|
||||
"author": "Author Name",
|
||||
"title": "Titel 1",
|
||||
"publishing_year": 1962
|
||||
},
|
||||
{
|
||||
"num_tokens": 11, // number of tokens in the text
|
||||
"num_unique_words": 12, // number of unique words in the text
|
||||
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with word "str"
|
||||
// ...
|
||||
},
|
||||
"num_unique_lemmas": 64, // number of unique lemmas in the text
|
||||
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with lemma "str"
|
||||
// ...
|
||||
},
|
||||
"num_sentences": 52, // number of sentences in the text
|
||||
"average_sentence_length": 3, // average number of tokens per sentence in the text
|
||||
"num_ent_types": 45, // number of ent_types in the text
|
||||
"num_unique_ent_types": 68, // number of unique ent_types in the text
|
||||
"num_entities_by_id": {
|
||||
"1": "int", // number of entities with id 1
|
||||
// ...
|
||||
},
|
||||
"author": "Author Name",
|
||||
"title": "Titel 2",
|
||||
"publishing_year": 1850
|
||||
},
|
||||
{
|
||||
"num_tokens": 56, // number of tokens in the text
|
||||
"num_unique_words": 13, // number of unique words in the text
|
||||
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with word "str"
|
||||
// ...
|
||||
},
|
||||
"num_unique_lemmas": 43, // number of unique lemmas in the text
|
||||
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with lemma "str"
|
||||
// ...
|
||||
},
|
||||
"num_sentences": 45, // number of sentences in the text
|
||||
"average_sentence_length": 56, // average number of tokens per sentence in the text
|
||||
"num_ent_types": 8792, // number of ent_types in the text
|
||||
"num_unique_ent_types": 56758, // number of unique ent_types in the text
|
||||
"num_entities_by_id": {
|
||||
"1": "int", // number of entities with id 1
|
||||
// ...
|
||||
},
|
||||
"author": "Author Name",
|
||||
"title": "Titel 3",
|
||||
"publishing_year": 1504
|
||||
},
|
||||
{
|
||||
"num_tokens": 54345, // number of tokens in the text
|
||||
"num_unique_words": 561, // number of unique words in the text
|
||||
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with word "str"
|
||||
// ...
|
||||
},
|
||||
"num_unique_lemmas": 546, // number of unique lemmas in the text
|
||||
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with lemma "str"
|
||||
// ...
|
||||
},
|
||||
"num_sentences": 5427, // number of sentences in the text
|
||||
"average_sentence_length": 657, // average number of tokens per sentence in the text
|
||||
"num_ent_types": 3465, // number of ent_types in the text
|
||||
"num_unique_ent_types": 45, // number of unique ent_types in the text
|
||||
"num_entities_by_id": {
|
||||
"1": "int", // number of entities with id 1
|
||||
// ...
|
||||
},
|
||||
"author": "Author Name",
|
||||
"title": "Titel 4",
|
||||
"publishing_year": 1712
|
||||
},
|
||||
{
|
||||
"num_tokens": 4354, // number of tokens in the text
|
||||
"num_unique_words": 45234, // number of unique words in the text
|
||||
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
||||
"testwort": 50, // number of tokens with word "str"
|
||||
"testwort2": 1
|
||||
},
|
||||
"num_unique_lemmas": 15, // number of unique lemmas in the text
|
||||
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
||||
"testlemma": 11, // number of tokens with lemma "str"
|
||||
"testlemma2": 1
|
||||
},
|
||||
"num_sentences": 90, // number of sentences in the text
|
||||
"average_sentence_length": 7, // average number of tokens per sentence in the text
|
||||
"num_ent_types": 19,
|
||||
"num_unique_ent_types": 5, // number of unique ent_types in the text
|
||||
"num_entities_by_id": {
|
||||
"1": "int", // number of entities with id 1
|
||||
// ...
|
||||
},
|
||||
"author": "Author Name 2",
|
||||
"title": "Titel 5",
|
||||
"publishing_year": 1951
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
resolve(dummyData);
|
||||
/*
|
||||
|
@ -25,7 +25,6 @@ class CorpusAnalysisApp {
|
||||
init() {
|
||||
this.disableActionElements();
|
||||
this.elements.m.initModal.open();
|
||||
|
||||
// Init data
|
||||
this.data.cQiClient = new CQiClient(this.settings.corpusId);
|
||||
this.data.cQiClient.connect()
|
||||
@ -35,7 +34,13 @@ class CorpusAnalysisApp {
|
||||
.then(
|
||||
cQiCorpus => {
|
||||
this.data.corpus = {o: cQiCorpus};
|
||||
// TODO: Don't do this here
|
||||
this.data.corpus.o.getCorpusData()
|
||||
.then(corpusData => {
|
||||
this.renderGeneralCorpusInfo(corpusData);
|
||||
this.renderTextInfoList(corpusData);
|
||||
this.renderTextProportionsGraphic(corpusData);
|
||||
});
|
||||
// TODO: Don't do this hgere
|
||||
cQiCorpus.updateDb();
|
||||
this.enableActionElements();
|
||||
for (let extension of Object.values(this.extensions)) {extension.init();}
|
||||
@ -52,6 +57,7 @@ class CorpusAnalysisApp {
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
// Add event listeners
|
||||
for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) {
|
||||
@ -95,4 +101,40 @@ class CorpusAnalysisApp {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
renderGeneralCorpusInfo(corpusData) {
|
||||
let corpusGeneralInfoListElement = document.querySelector('.corpus-general-info-list');
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-tokens').innerHTML = `<b>Number of tokens:</b> ${this.data.corpus.o.size}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-text-count').innerHTML = `<b>Corpus text count:</b> ${corpusData.texts.length}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-words').innerHTML = `<b>Corpus unique word count:</b> ${corpusData.num_unique_words}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-lemmas').innerHTML = `<b>Corpus unique lemma count:</b> ${corpusData.num_unique_lemmas}`;
|
||||
// corpusGeneralInfoListElement.querySelector('.corpus-most-frequent-words').innerHTML = `<b>Corpus most frequent words:</b> ${corpusData.most_frequent_words.join(', ');
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-sentences').innerHTML = `<b>Corpus sentence count:</b> ${corpusData.num_sentences}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-average-sentence-length').innerHTML = `<b>Corpus average sentence length:</b> ${corpusData.average_sentence_length}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-ent-types').innerHTML = `<b>Corpus entity count:</b> ${corpusData.num_ent_types}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-ent-types').innerHTML = `<b>Corpus unique entity count:</b> ${corpusData.num_unique_ent_types}`;
|
||||
}
|
||||
|
||||
renderTextInfoList(corpusData) {
|
||||
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
|
||||
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
|
||||
corpusTextInfoList.add(corpusData.texts);
|
||||
|
||||
}
|
||||
|
||||
renderTextProportionsGraphic(corpusData) {
|
||||
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
|
||||
let graphData = [
|
||||
{
|
||||
values: corpusData.texts.map(text => text.num_tokens),
|
||||
labels: corpusData.texts.map(text => `${text.title} (${text.publishing_year})`),
|
||||
type: 'pie'
|
||||
}
|
||||
];
|
||||
let graphLayout = {
|
||||
height: 400,
|
||||
width: 500
|
||||
};
|
||||
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout);
|
||||
}
|
||||
}
|
||||
|
111
app/static/js/ResourceLists/CorpusTextInfoList.js
Normal file
111
app/static/js/ResourceLists/CorpusTextInfoList.js
Normal file
@ -0,0 +1,111 @@
|
||||
class CorpusTextInfoList extends ResourceList {
|
||||
|
||||
static autoInit() {
|
||||
for (let corpusTextInfoListElement of document.querySelectorAll('.corpus-text-info-list:not(.no-autoinit)')) {
|
||||
new CorpusTextInfoList(corpusTextInfoListElement);
|
||||
}
|
||||
}
|
||||
|
||||
static defaultOptions = {
|
||||
page: 4
|
||||
};
|
||||
|
||||
constructor(listContainerElement, options = {}) {
|
||||
let _options = Utils.mergeObjectsDeep(
|
||||
CorpusTextInfoList.defaultOptions,
|
||||
options
|
||||
);
|
||||
super(listContainerElement, _options);
|
||||
this.isInitialized = false;
|
||||
let sortElements = this.listContainerElement.querySelectorAll('.sort');
|
||||
sortElements.forEach((sortElement) => {
|
||||
sortElement.addEventListener('click', (event) => {this.renderSortElement(sortElement)});
|
||||
});
|
||||
}
|
||||
|
||||
get item() {
|
||||
return (values) => {
|
||||
return `
|
||||
<tr class="list-item clickable hoverable">
|
||||
<td><span class="title"></span> (<span class="publishing_year"></span>)</td>
|
||||
<td><span class="num_tokens"></span></td>
|
||||
<td><span class="num_unique_words"></span></td>
|
||||
<td><span class="num_unique_lemmas"></span></td>
|
||||
<td><span class="num_sentences"></span></td>
|
||||
<td><span class="average_sentence_length"></span></td>
|
||||
<td><span class="num_unique_ent_types"></span></td>
|
||||
</tr>
|
||||
`.trim();
|
||||
}
|
||||
}
|
||||
|
||||
get valueNames() {
|
||||
return [
|
||||
'title',
|
||||
'publishing_year',
|
||||
'num_tokens',
|
||||
'num_unique_words',
|
||||
'num_unique_lemmas',
|
||||
'num_sentences',
|
||||
'average_sentence_length',
|
||||
'num_unique_ent_types'
|
||||
];
|
||||
}
|
||||
|
||||
initListContainerElement() {
|
||||
if (!this.listContainerElement.hasAttribute('id')) {
|
||||
this.listContainerElement.id = Utils.generateElementId('corpus-file-list-');
|
||||
}
|
||||
let listSearchElementId = Utils.generateElementId(`${this.listContainerElement.id}-search-`);
|
||||
this.listContainerElement.innerHTML = `
|
||||
<div class="input-field">
|
||||
<i class="material-icons prefix">search</i>
|
||||
<input id="${listSearchElementId}" class="search" type="text"></input>
|
||||
<label for="${listSearchElementId}">Search corpus file</label>
|
||||
</div>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Text<span class="sort right material-icons" data-sort="title" style="cursor:pointer; color:#aa9cc9">arrow_drop_down</span></th>
|
||||
<th>Number of tokens<span class="sort right material-icons" data-sort="num_tokens" style="cursor:pointer">arrow_drop_down</span></th>
|
||||
<th>Number of unique words<span class="sort right material-icons" data-sort="num_unique_words" style="cursor:pointer">arrow_drop_down</span></th>
|
||||
<th>Number of unique lemmas<span class="sort right material-icons" data-sort="num_unique_lemmas" style="cursor:pointer">arrow_drop_down</span></th>
|
||||
<th>Number of sentences<span class="sort right material-icons" data-sort="num_sentences" style="cursor:pointer">arrow_drop_down</span></th>
|
||||
<th>Average sentence length<span class="sort right material-icons" data-sort="average_sentence_length" style="cursor:pointer">arrow_drop_down</span></th>
|
||||
<th>Number of unique entity types<span class="sort right material-icons" data-sort="num_unique_ent_types" style="cursor:pointer">arrow_drop_down</span></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="list"></tbody>
|
||||
</table>
|
||||
<ul class="pagination"></ul>
|
||||
`.trim();
|
||||
}
|
||||
|
||||
mapResourceToValue(corpusTextData) {
|
||||
return {
|
||||
title: corpusTextData.title,
|
||||
publishing_year: corpusTextData.publishing_year,
|
||||
num_tokens: corpusTextData.num_tokens,
|
||||
num_unique_words: corpusTextData.num_unique_words,
|
||||
num_unique_lemmas: corpusTextData.num_unique_lemmas,
|
||||
num_sentences: corpusTextData.num_sentences,
|
||||
average_sentence_length: corpusTextData.average_sentence_length,
|
||||
num_unique_ent_types: corpusTextData.num_unique_ent_types
|
||||
};
|
||||
}
|
||||
|
||||
sort() {
|
||||
this.listjs.sort('title');
|
||||
}
|
||||
|
||||
renderSortElement(clickedSortElement) {
|
||||
this.listContainerElement.querySelectorAll('.sort').forEach((sortElement) => {
|
||||
if (sortElement !== clickedSortElement) {
|
||||
sortElement.classList.remove('asc', 'desc');
|
||||
sortElement.style.color = 'black';
|
||||
};
|
||||
});
|
||||
clickedSortElement.style.color = '#aa9cc9';
|
||||
clickedSortElement.innerHTML = clickedSortElement.classList.contains('asc') ? 'arrow_drop_down' : 'arrow_drop_up';
|
||||
}
|
||||
}
|
@ -15,6 +15,7 @@ class ResourceList {
|
||||
UserList.autoInit();
|
||||
AdminUserList.autoInit();
|
||||
CorpusFollowerList.autoInit();
|
||||
CorpusTextInfoList.autoInit();
|
||||
}
|
||||
|
||||
static defaultOptions = {
|
||||
|
@ -1,6 +1,8 @@
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/fast-json-patch/3.1.1/fast-json-patch.min.js" integrity="sha512-5uDdefwnzyq4N+SkmMBmekZLZNmc6dLixvVxCdlHBfqpyz0N3bzLdrJ55OLm7QrZmgZuhLGgHLDtJwU6RZoFCA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/2.3.1/list.min.js" integrity="sha512-93wYgwrIFL+b+P3RvYxi/WUFRXXUDSLCT2JQk9zhVGXuS2mHl2axj6d+R6pP+gcU5isMHRj1u0oYE/mWyt/RjA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.5.4/socket.io.min.js" integrity="sha512-HTENHrkQ/P0NGDFd5nk6ibVtCkcM7jhr2c7GyvXp5O+4X6O5cQO9AhqFzM+MdeBivsX7Hoys2J7pp2wdgMpCvw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
|
||||
<script src="https://cdn.plot.ly/plotly-2.24.1.min.js" charset="utf-8"></script>
|
||||
|
||||
{%- assets
|
||||
filters='rjsmin',
|
||||
output='gen/app.%(version)s.js',
|
||||
@ -49,6 +51,7 @@
|
||||
'js/ResourceLists/UserList.js',
|
||||
'js/ResourceLists/AdminUserList.js',
|
||||
'js/ResourceLists/CorpusFollowerList.js',
|
||||
'js/ResourceLists/CorpusTextInfoList.js',
|
||||
'js/ResourceLists/DetailledPublicCorpusList.js'
|
||||
%}
|
||||
<script src="{{ ASSET_URL }}"></script>
|
||||
|
@ -33,6 +33,58 @@
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="col s12">
|
||||
<h4><i class="material-icons left">query_stats</i>Visualizations</h4>
|
||||
</div>
|
||||
<div class="col s4" >
|
||||
<div class="card hoverable">
|
||||
<div class="card-content">
|
||||
<span class="card-title">General information about the Corpus</span>
|
||||
<p></p>
|
||||
<br>
|
||||
<ul class="corpus-general-info-list">
|
||||
<li class="corpus-num-tokens"></li>
|
||||
<br>
|
||||
<li class="corpus-text-count"></li>
|
||||
<br>
|
||||
<li class="corpus-num-unique-words"></li>
|
||||
<br>
|
||||
<li class="corpus-num-unique-lemmas"></li>
|
||||
<br>
|
||||
<li class="corpus-num-sentences"></li>
|
||||
<br>
|
||||
<li class="corpus-average-sentence-length"></li>
|
||||
<br>
|
||||
<li class="corpus-num-ent-types"></li>
|
||||
<br>
|
||||
<li class="corpus-num-unique-ent-types"></li>
|
||||
<br>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s8">
|
||||
<div class="card hoverable">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Text information</span>
|
||||
<div class="corpus-text-info-list no-autoinit"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col 12">
|
||||
<div class="card hoverable">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Text proportions within the corpus</span>
|
||||
<div id="text-proportions-graphic"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
{% for extension in extensions %}
|
||||
<div id="{{ extension.id_prefix }}-container">
|
||||
{{ extension.container_content }}
|
||||
|
Loading…
x
Reference in New Issue
Block a user