Visualization Testing Corpus Analysis

2026-02-03 16:20:55 +00:00 · 2023-06-13 15:41:34 +02:00
parent b7ca2a2cf6
commit cc508cf4eb
6 changed files with 260 additions and 3 deletions
--- a/app/static/js/CorpusAnalysis/CQiClient.js
+++ b/app/static/js/CorpusAnalysis/CQiClient.js
@@ -100,7 +100,69 @@ class CQiCorpus {

  getCorpusData() {
    return new Promise((resolve, reject) => {
-      const dummyData = {};
+      const dummyData = {
+          "num_tokens": 2000,    // number of tokens in the corpus
+          "num_unique_words": 500,    // number of unique words in the corpus
+          "num_unique_lemmas": 200,    // number of unique lemmas in the corpus
+          "num_sentences": 90,    // number of sentences in the corpus
+          "average_sentence_length": 11,   // average number of tokens per sentence in the corpus
+          "num_ent_types": 30,    // number of entities in the corpus
+          "num_unique_ent_types":10,
+          "ent_type_freqs": {
+            "str": 10,    // number of ent_types with ent_type "str"
+            // ...
+        },
+          "texts": [
+              {
+                  "num_tokens": 11,    // number of tokens in the text
+                  "num_unique_words": 12,    // number of unique words in the text
+                  "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
+                    "str": "int",    // number of tokens with word "str"
+                    // ...
+                  },
+                  "num_unique_lemmas": 15,    // number of unique lemmas in the text
+                  "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
+                    "str": "int",    // number of tokens with lemma "str"
+                    // ...
+                  },
+                  "num_sentences": 4,    // number of sentences in the text
+                  "average_sentence_length": 3,   // average number of tokens per sentence in the text
+                  "num_ent_types": 12,    // number of ent_types in the text
+                  "num_unique_ent_types": 28,    // number of unique ent_types in the text
+                  "num_entities_by_id": {
+                      "1": "int",    // number of entities with id 1
+                      // ...
+                  },            
+                  "author": "Author Name",
+                  "title": "Titel",
+                  "publishing_year": 1950
+              },
+              {
+                "num_tokens": 800,    // number of tokens in the text
+                "num_unique_words": 60,    // number of unique words in the text
+                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
+                  "testwort": 50,    // number of tokens with word "str"
+                  "testwort2": 1
+                },
+                "num_unique_lemmas": 15,    // number of unique lemmas in the text
+                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
+                  "testlemma": 11,    // number of tokens with lemma "str"
+                  "testlemma2": 1
+                },
+                "num_sentences": 90,    // number of sentences in the text
+                "average_sentence_length": 7,   // average number of tokens per sentence in the text
+                "num_ent_types": 19,
+                "num_unique_ent_types": 5,    // number of unique ent_types in the text
+                "num_entities_by_id": {
+                    "1": "int",    // number of entities with id 1
+                    // ...
+                },            
+                "author": "Author Name 2",
+                "title": "Titel 2",
+                "publishing_year": 1951
+              }
+          ]
+      };

      resolve(dummyData);
      /*
--- a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js
+++ b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js
@@ -25,7 +25,6 @@ class CorpusAnalysisApp {
  init() {
    this.disableActionElements();
    this.elements.m.initModal.open();
-  
    // Init data
    this.data.cQiClient = new CQiClient(this.settings.corpusId);
    this.data.cQiClient.connect()
@@ -35,7 +34,13 @@ class CorpusAnalysisApp {
      .then(
        cQiCorpus => {
          this.data.corpus = {o: cQiCorpus};
-          // TODO: Don't do this here
+          this.data.corpus.o.getCorpusData()
+            .then(corpusData => {
+              this.renderGeneralCorpusInfo(corpusData);
+              this.renderTextInfoList(corpusData);
+              this.renderTextProportionsGraphic(corpusData);
+            });
+          // TODO: Don't do this hgere
          cQiCorpus.updateDb();
          this.enableActionElements();
          for (let extension of Object.values(this.extensions)) {extension.init();}
@@ -52,6 +57,7 @@ class CorpusAnalysisApp {
          }
        }
      );
+
  
    // Add event listeners
    for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) {
@@ -95,4 +101,40 @@ class CorpusAnalysisApp {
      }
    }
  }
+
+  renderGeneralCorpusInfo(corpusData) {
+    let corpusGeneralInfoListElement = document.querySelector('.corpus-general-info-list');
+    corpusGeneralInfoListElement.querySelector('.corpus-num-tokens').innerHTML = `<b>Number of tokens:</b> ${this.data.corpus.o.size}`;
+    corpusGeneralInfoListElement.querySelector('.corpus-text-count').innerHTML = `<b>Corpus text count:</b> ${corpusData.texts.length}`;
+    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-words').innerHTML = `<b>Corpus unique word count:</b> ${corpusData.num_unique_words}`;
+    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-lemmas').innerHTML = `<b>Corpus unique lemma count:</b> ${corpusData.num_unique_lemmas}`;
+    // corpusGeneralInfoListElement.querySelector('.corpus-most-frequent-words').innerHTML = `<b>Corpus most frequent words:</b> ${corpusData.most_frequent_words.join(', ');
+    corpusGeneralInfoListElement.querySelector('.corpus-num-sentences').innerHTML = `<b>Corpus sentence count:</b> ${corpusData.num_sentences}`;
+    corpusGeneralInfoListElement.querySelector('.corpus-average-sentence-length').innerHTML = `<b>Corpus average sentence length:</b> ${corpusData.average_sentence_length}`;
+    corpusGeneralInfoListElement.querySelector('.corpus-num-ent-types').innerHTML = `<b>Corpus entity count:</b> ${corpusData.num_ent_types}`;
+    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-ent-types').innerHTML = `<b>Corpus unique entity count:</b> ${corpusData.num_unique_ent_types}`;
+  }
+
+  renderTextInfoList(corpusData) {
+    let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
+    let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
+    corpusTextInfoList.add(corpusData.texts);
+    
+  }
+
+  renderTextProportionsGraphic(corpusData) {
+    let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
+    let graphData = [
+      {
+        values: corpusData.texts.map(text => text.num_tokens),
+        labels: corpusData.texts.map(text => `${text.title} (${text.publishing_year})`),
+        type: 'pie'
+      }
+    ];
+    let graphLayout = {
+      height: 400,
+      width: 500
+    };
+    Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout);
+  }
 }