mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-08-02 16:55:18 +00:00
Visualization Testing Corpus Analysis
This commit is contained in:
@@ -100,7 +100,69 @@ class CQiCorpus {
|
||||
|
||||
getCorpusData() {
|
||||
return new Promise((resolve, reject) => {
|
||||
const dummyData = {};
|
||||
const dummyData = {
|
||||
"num_tokens": 2000, // number of tokens in the corpus
|
||||
"num_unique_words": 500, // number of unique words in the corpus
|
||||
"num_unique_lemmas": 200, // number of unique lemmas in the corpus
|
||||
"num_sentences": 90, // number of sentences in the corpus
|
||||
"average_sentence_length": 11, // average number of tokens per sentence in the corpus
|
||||
"num_ent_types": 30, // number of entities in the corpus
|
||||
"num_unique_ent_types":10,
|
||||
"ent_type_freqs": {
|
||||
"str": 10, // number of ent_types with ent_type "str"
|
||||
// ...
|
||||
},
|
||||
"texts": [
|
||||
{
|
||||
"num_tokens": 11, // number of tokens in the text
|
||||
"num_unique_words": 12, // number of unique words in the text
|
||||
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with word "str"
|
||||
// ...
|
||||
},
|
||||
"num_unique_lemmas": 15, // number of unique lemmas in the text
|
||||
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
||||
"str": "int", // number of tokens with lemma "str"
|
||||
// ...
|
||||
},
|
||||
"num_sentences": 4, // number of sentences in the text
|
||||
"average_sentence_length": 3, // average number of tokens per sentence in the text
|
||||
"num_ent_types": 12, // number of ent_types in the text
|
||||
"num_unique_ent_types": 28, // number of unique ent_types in the text
|
||||
"num_entities_by_id": {
|
||||
"1": "int", // number of entities with id 1
|
||||
// ...
|
||||
},
|
||||
"author": "Author Name",
|
||||
"title": "Titel",
|
||||
"publishing_year": 1950
|
||||
},
|
||||
{
|
||||
"num_tokens": 800, // number of tokens in the text
|
||||
"num_unique_words": 60, // number of unique words in the text
|
||||
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
||||
"testwort": 50, // number of tokens with word "str"
|
||||
"testwort2": 1
|
||||
},
|
||||
"num_unique_lemmas": 15, // number of unique lemmas in the text
|
||||
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
||||
"testlemma": 11, // number of tokens with lemma "str"
|
||||
"testlemma2": 1
|
||||
},
|
||||
"num_sentences": 90, // number of sentences in the text
|
||||
"average_sentence_length": 7, // average number of tokens per sentence in the text
|
||||
"num_ent_types": 19,
|
||||
"num_unique_ent_types": 5, // number of unique ent_types in the text
|
||||
"num_entities_by_id": {
|
||||
"1": "int", // number of entities with id 1
|
||||
// ...
|
||||
},
|
||||
"author": "Author Name 2",
|
||||
"title": "Titel 2",
|
||||
"publishing_year": 1951
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
resolve(dummyData);
|
||||
/*
|
||||
|
@@ -25,7 +25,6 @@ class CorpusAnalysisApp {
|
||||
init() {
|
||||
this.disableActionElements();
|
||||
this.elements.m.initModal.open();
|
||||
|
||||
// Init data
|
||||
this.data.cQiClient = new CQiClient(this.settings.corpusId);
|
||||
this.data.cQiClient.connect()
|
||||
@@ -35,7 +34,13 @@ class CorpusAnalysisApp {
|
||||
.then(
|
||||
cQiCorpus => {
|
||||
this.data.corpus = {o: cQiCorpus};
|
||||
// TODO: Don't do this here
|
||||
this.data.corpus.o.getCorpusData()
|
||||
.then(corpusData => {
|
||||
this.renderGeneralCorpusInfo(corpusData);
|
||||
this.renderTextInfoList(corpusData);
|
||||
this.renderTextProportionsGraphic(corpusData);
|
||||
});
|
||||
// TODO: Don't do this hgere
|
||||
cQiCorpus.updateDb();
|
||||
this.enableActionElements();
|
||||
for (let extension of Object.values(this.extensions)) {extension.init();}
|
||||
@@ -52,6 +57,7 @@ class CorpusAnalysisApp {
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
// Add event listeners
|
||||
for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) {
|
||||
@@ -95,4 +101,40 @@ class CorpusAnalysisApp {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
renderGeneralCorpusInfo(corpusData) {
|
||||
let corpusGeneralInfoListElement = document.querySelector('.corpus-general-info-list');
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-tokens').innerHTML = `<b>Number of tokens:</b> ${this.data.corpus.o.size}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-text-count').innerHTML = `<b>Corpus text count:</b> ${corpusData.texts.length}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-words').innerHTML = `<b>Corpus unique word count:</b> ${corpusData.num_unique_words}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-lemmas').innerHTML = `<b>Corpus unique lemma count:</b> ${corpusData.num_unique_lemmas}`;
|
||||
// corpusGeneralInfoListElement.querySelector('.corpus-most-frequent-words').innerHTML = `<b>Corpus most frequent words:</b> ${corpusData.most_frequent_words.join(', ');
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-sentences').innerHTML = `<b>Corpus sentence count:</b> ${corpusData.num_sentences}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-average-sentence-length').innerHTML = `<b>Corpus average sentence length:</b> ${corpusData.average_sentence_length}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-ent-types').innerHTML = `<b>Corpus entity count:</b> ${corpusData.num_ent_types}`;
|
||||
corpusGeneralInfoListElement.querySelector('.corpus-num-unique-ent-types').innerHTML = `<b>Corpus unique entity count:</b> ${corpusData.num_unique_ent_types}`;
|
||||
}
|
||||
|
||||
renderTextInfoList(corpusData) {
|
||||
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
|
||||
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
|
||||
corpusTextInfoList.add(corpusData.texts);
|
||||
|
||||
}
|
||||
|
||||
renderTextProportionsGraphic(corpusData) {
|
||||
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
|
||||
let graphData = [
|
||||
{
|
||||
values: corpusData.texts.map(text => text.num_tokens),
|
||||
labels: corpusData.texts.map(text => `${text.title} (${text.publishing_year})`),
|
||||
type: 'pie'
|
||||
}
|
||||
];
|
||||
let graphLayout = {
|
||||
height: 400,
|
||||
width: 500
|
||||
};
|
||||
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user