From cc508cf4ebb3f31076466dd9676c43c5c748e1ff Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Tue, 13 Jun 2023 15:41:34 +0200 Subject: [PATCH] Visualization Testing Corpus Analysis --- app/static/js/CorpusAnalysis/CQiClient.js | 64 +++++++++++- .../js/CorpusAnalysis/CorpusAnalysisApp.js | 46 ++++++++- .../js/ResourceLists/CorpusTextInfoList.js | 97 +++++++++++++++++++ app/static/js/ResourceLists/ResourceList.js | 1 + app/templates/_scripts.html.j2 | 3 + app/templates/corpora/analysis.html.j2 | 52 ++++++++++ 6 files changed, 260 insertions(+), 3 deletions(-) create mode 100644 app/static/js/ResourceLists/CorpusTextInfoList.js diff --git a/app/static/js/CorpusAnalysis/CQiClient.js b/app/static/js/CorpusAnalysis/CQiClient.js index 963a63ca..867aea0d 100644 --- a/app/static/js/CorpusAnalysis/CQiClient.js +++ b/app/static/js/CorpusAnalysis/CQiClient.js @@ -100,7 +100,69 @@ class CQiCorpus { getCorpusData() { return new Promise((resolve, reject) => { - const dummyData = {}; + const dummyData = { + "num_tokens": 2000, // number of tokens in the corpus + "num_unique_words": 500, // number of unique words in the corpus + "num_unique_lemmas": 200, // number of unique lemmas in the corpus + "num_sentences": 90, // number of sentences in the corpus + "average_sentence_length": 11, // average number of tokens per sentence in the corpus + "num_ent_types": 30, // number of entities in the corpus + "num_unique_ent_types":10, + "ent_type_freqs": { + "str": 10, // number of ent_types with ent_type "str" + // ... + }, + "texts": [ + { + "num_tokens": 11, // number of tokens in the text + "num_unique_words": 12, // number of unique words in the text + "word_freqs": { // frequency of unique words in the text (sorted by frequency) + "str": "int", // number of tokens with word "str" + // ... + }, + "num_unique_lemmas": 15, // number of unique lemmas in the text + "lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency) + "str": "int", // number of tokens with lemma "str" + // ... + }, + "num_sentences": 4, // number of sentences in the text + "average_sentence_length": 3, // average number of tokens per sentence in the text + "num_ent_types": 12, // number of ent_types in the text + "num_unique_ent_types": 28, // number of unique ent_types in the text + "num_entities_by_id": { + "1": "int", // number of entities with id 1 + // ... + }, + "author": "Author Name", + "title": "Titel", + "publishing_year": 1950 + }, + { + "num_tokens": 800, // number of tokens in the text + "num_unique_words": 60, // number of unique words in the text + "word_freqs": { // frequency of unique words in the text (sorted by frequency) + "testwort": 50, // number of tokens with word "str" + "testwort2": 1 + }, + "num_unique_lemmas": 15, // number of unique lemmas in the text + "lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency) + "testlemma": 11, // number of tokens with lemma "str" + "testlemma2": 1 + }, + "num_sentences": 90, // number of sentences in the text + "average_sentence_length": 7, // average number of tokens per sentence in the text + "num_ent_types": 19, + "num_unique_ent_types": 5, // number of unique ent_types in the text + "num_entities_by_id": { + "1": "int", // number of entities with id 1 + // ... + }, + "author": "Author Name 2", + "title": "Titel 2", + "publishing_year": 1951 + } + ] + }; resolve(dummyData); /* diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js index f5cb8712..cb012730 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js @@ -25,7 +25,6 @@ class CorpusAnalysisApp { init() { this.disableActionElements(); this.elements.m.initModal.open(); - // Init data this.data.cQiClient = new CQiClient(this.settings.corpusId); this.data.cQiClient.connect() @@ -35,7 +34,13 @@ class CorpusAnalysisApp { .then( cQiCorpus => { this.data.corpus = {o: cQiCorpus}; - // TODO: Don't do this here + this.data.corpus.o.getCorpusData() + .then(corpusData => { + this.renderGeneralCorpusInfo(corpusData); + this.renderTextInfoList(corpusData); + this.renderTextProportionsGraphic(corpusData); + }); + // TODO: Don't do this hgere cQiCorpus.updateDb(); this.enableActionElements(); for (let extension of Object.values(this.extensions)) {extension.init();} @@ -52,6 +57,7 @@ class CorpusAnalysisApp { } } ); + // Add event listeners for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) { @@ -95,4 +101,40 @@ class CorpusAnalysisApp { } } } + + renderGeneralCorpusInfo(corpusData) { + let corpusGeneralInfoListElement = document.querySelector('.corpus-general-info-list'); + corpusGeneralInfoListElement.querySelector('.corpus-num-tokens').innerHTML = `Number of tokens: ${this.data.corpus.o.size}`; + corpusGeneralInfoListElement.querySelector('.corpus-text-count').innerHTML = `Corpus text count: ${corpusData.texts.length}`; + corpusGeneralInfoListElement.querySelector('.corpus-num-unique-words').innerHTML = `Corpus unique word count: ${corpusData.num_unique_words}`; + corpusGeneralInfoListElement.querySelector('.corpus-num-unique-lemmas').innerHTML = `Corpus unique lemma count: ${corpusData.num_unique_lemmas}`; + // corpusGeneralInfoListElement.querySelector('.corpus-most-frequent-words').innerHTML = `Corpus most frequent words: ${corpusData.most_frequent_words.join(', '); + corpusGeneralInfoListElement.querySelector('.corpus-num-sentences').innerHTML = `Corpus sentence count: ${corpusData.num_sentences}`; + corpusGeneralInfoListElement.querySelector('.corpus-average-sentence-length').innerHTML = `Corpus average sentence length: ${corpusData.average_sentence_length}`; + corpusGeneralInfoListElement.querySelector('.corpus-num-ent-types').innerHTML = `Corpus entity count: ${corpusData.num_ent_types}`; + corpusGeneralInfoListElement.querySelector('.corpus-num-unique-ent-types').innerHTML = `Corpus unique entity count: ${corpusData.num_unique_ent_types}`; + } + + renderTextInfoList(corpusData) { + let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list'); + let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement); + corpusTextInfoList.add(corpusData.texts); + + } + + renderTextProportionsGraphic(corpusData) { + let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic'); + let graphData = [ + { + values: corpusData.texts.map(text => text.num_tokens), + labels: corpusData.texts.map(text => `${text.title} (${text.publishing_year})`), + type: 'pie' + } + ]; + let graphLayout = { + height: 400, + width: 500 + }; + Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout); + } } diff --git a/app/static/js/ResourceLists/CorpusTextInfoList.js b/app/static/js/ResourceLists/CorpusTextInfoList.js new file mode 100644 index 00000000..9807d06c --- /dev/null +++ b/app/static/js/ResourceLists/CorpusTextInfoList.js @@ -0,0 +1,97 @@ +class CorpusTextInfoList extends ResourceList { + + static autoInit() { + for (let corpusTextInfoListElement of document.querySelectorAll('.corpus-text-info-list:not(.no-autoinit)')) { + new CorpusTextInfoList(corpusTextInfoListElement); + } + } + + static defaultOptions = { + page: 4 + }; + + constructor(listContainerElement, options = {}) { + let _options = Utils.mergeObjectsDeep( + CorpusTextInfoList.defaultOptions, + options + ); + super(listContainerElement, _options); + this.isInitialized = false; + } + + get item() { + return (values) => { + return ` + + () + + + + + + + + + `.trim(); + } + } + + get valueNames() { + return [ + 'title', + 'publishing_year', + 'num_tokens', + 'num_unique_words', + 'num_unique_lemmas', + 'num_sentences', + 'average_sentence_length', + 'num_ent_types', + 'num_unique_ent_types' + ]; + } + + initListContainerElement() { + if (!this.listContainerElement.hasAttribute('id')) { + this.listContainerElement.id = Utils.generateElementId('corpus-file-list-'); + } + let listSearchElementId = Utils.generateElementId(`${this.listContainerElement.id}-search-`); + this.listContainerElement.innerHTML = ` +
+ search + + +
+ + + + + + + + + + + + + + +
TextNumber of tokensNumber of unique wordsNumber of unique lemmasNumber of sentencesAverage sentence lengthNumber of entity typesNumber of unique entity types
+ + `.trim(); + } + + mapResourceToValue(corpusTextData) { + console.log(corpusTextData); + return { + title: corpusTextData.title, + publishing_year: corpusTextData.publishing_year, + num_tokens: corpusTextData.num_tokens, + num_unique_words: corpusTextData.num_unique_words, + num_unique_lemmas: corpusTextData.num_unique_lemmas, + num_sentences: corpusTextData.num_sentences, + average_sentence_length: corpusTextData.average_sentence_length, + num_ent_types: corpusTextData.num_ent_types, + num_unique_ent_types: corpusTextData.num_unique_ent_types + }; + } +} diff --git a/app/static/js/ResourceLists/ResourceList.js b/app/static/js/ResourceLists/ResourceList.js index 3251ef2b..959a5fe1 100644 --- a/app/static/js/ResourceLists/ResourceList.js +++ b/app/static/js/ResourceLists/ResourceList.js @@ -15,6 +15,7 @@ class ResourceList { UserList.autoInit(); AdminUserList.autoInit(); CorpusFollowerList.autoInit(); + CorpusTextInfoList.autoInit(); } static defaultOptions = { diff --git a/app/templates/_scripts.html.j2 b/app/templates/_scripts.html.j2 index 88167db7..89cc1ca2 100644 --- a/app/templates/_scripts.html.j2 +++ b/app/templates/_scripts.html.j2 @@ -1,6 +1,8 @@ + + {%- assets filters='rjsmin', output='gen/app.%(version)s.js', @@ -49,6 +51,7 @@ 'js/ResourceLists/UserList.js', 'js/ResourceLists/AdminUserList.js', 'js/ResourceLists/CorpusFollowerList.js', + 'js/ResourceLists/CorpusTextInfoList.js', 'js/ResourceLists/DetailledPublicCorpusList.js' %} diff --git a/app/templates/corpora/analysis.html.j2 b/app/templates/corpora/analysis.html.j2 index cbfd6a6a..4619c09c 100644 --- a/app/templates/corpora/analysis.html.j2 +++ b/app/templates/corpora/analysis.html.j2 @@ -33,6 +33,58 @@ {% endfor %} +
+
+

query_statsVisualizations

+
+
+
+
+ General information about the Corpus +

+
+
    +
  • +
    +
  • +
    +
  • +
    +
  • +
    +
  • +
    +
  • +
    +
  • +
    +
  • +
    +
+
+
+
+
+
+
+ Text information +
+
+
+
+
+
+
+
+
+ Text proportions within the corpus +
+
+
+
+
+ + {% for extension in extensions %}
{{ extension.container_content }}