Sort Mechanics Text Info List

2026-02-03 16:20:55 +00:00 · 2023-06-13 17:18:00 +02:00
parent cc508cf4eb
commit 71359523ba
2 changed files with 125 additions and 15 deletions
--- a/app/static/js/CorpusAnalysis/CQiClient.js
+++ b/app/static/js/CorpusAnalysis/CQiClient.js
@@ -138,8 +138,104 @@ class CQiCorpus {
                  "publishing_year": 1950
              },
              {
-                "num_tokens": 800,    // number of tokens in the text
-                "num_unique_words": 60,    // number of unique words in the text
+                "num_tokens": 15,    // number of tokens in the text
+                "num_unique_words": 4,    // number of unique words in the text
+                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
+                  "str": "int",    // number of tokens with word "str"
+                  // ...
+                },
+                "num_unique_lemmas": 90,    // number of unique lemmas in the text
+                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
+                  "str": "int",    // number of tokens with lemma "str"
+                  // ...
+                },
+                "num_sentences": 11,    // number of sentences in the text
+                "average_sentence_length": 3,   // average number of tokens per sentence in the text
+                "num_ent_types": 4,    // number of ent_types in the text
+                "num_unique_ent_types": 300,    // number of unique ent_types in the text
+                "num_entities_by_id": {
+                    "1": "int",    // number of entities with id 1
+                    // ...
+                },            
+                "author": "Author Name",
+                "title": "Titel 1",
+                  "publishing_year": 1962
+              },
+              {
+                "num_tokens": 11,    // number of tokens in the text
+                "num_unique_words": 12,    // number of unique words in the text
+                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
+                  "str": "int",    // number of tokens with word "str"
+                  // ...
+                },
+                "num_unique_lemmas": 64,    // number of unique lemmas in the text
+                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
+                  "str": "int",    // number of tokens with lemma "str"
+                  // ...
+                },
+                "num_sentences": 52,    // number of sentences in the text
+                "average_sentence_length": 3,   // average number of tokens per sentence in the text
+                "num_ent_types": 45,    // number of ent_types in the text
+                "num_unique_ent_types": 68,    // number of unique ent_types in the text
+                "num_entities_by_id": {
+                    "1": "int",    // number of entities with id 1
+                    // ...
+                },            
+                "author": "Author Name",
+                "title": "Titel 2",
+                "publishing_year": 1850
+              },
+              {
+                "num_tokens": 56,    // number of tokens in the text
+                "num_unique_words": 13,    // number of unique words in the text
+                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
+                  "str": "int",    // number of tokens with word "str"
+                  // ...
+                },
+                "num_unique_lemmas": 43,    // number of unique lemmas in the text
+                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
+                  "str": "int",    // number of tokens with lemma "str"
+                  // ...
+                },
+                "num_sentences": 45,    // number of sentences in the text
+                "average_sentence_length": 56,   // average number of tokens per sentence in the text
+                "num_ent_types": 8792,    // number of ent_types in the text
+                "num_unique_ent_types": 56758,    // number of unique ent_types in the text
+                "num_entities_by_id": {
+                    "1": "int",    // number of entities with id 1
+                    // ...
+                },            
+                "author": "Author Name",
+                "title": "Titel 3",
+                "publishing_year": 1504
+              },
+              {
+                "num_tokens": 54345,    // number of tokens in the text
+                "num_unique_words": 561,    // number of unique words in the text
+                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
+                  "str": "int",    // number of tokens with word "str"
+                  // ...
+                },
+                "num_unique_lemmas": 546,    // number of unique lemmas in the text
+                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
+                  "str": "int",    // number of tokens with lemma "str"
+                  // ...
+                },
+                "num_sentences": 5427,    // number of sentences in the text
+                "average_sentence_length": 657,   // average number of tokens per sentence in the text
+                "num_ent_types": 3465,    // number of ent_types in the text
+                "num_unique_ent_types": 45,    // number of unique ent_types in the text
+                "num_entities_by_id": {
+                    "1": "int",    // number of entities with id 1
+                    // ...
+                },            
+                "author": "Author Name",
+                "title": "Titel 4",
+                "publishing_year": 1712
+              },                            
+              {
+                "num_tokens": 4354,    // number of tokens in the text
+                "num_unique_words": 45234,    // number of unique words in the text
                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
                  "testwort": 50,    // number of tokens with word "str"
                  "testwort2": 1
@@ -158,7 +254,7 @@ class CQiCorpus {
                    // ...
                },            
                "author": "Author Name 2",
-                "title": "Titel 2",
+                "title": "Titel 5",
                "publishing_year": 1951
              }
          ]