mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 04:12:45 +00:00 
			
		
		
		
	Visualization Testing Corpus Analysis
This commit is contained in:
		@@ -100,7 +100,69 @@ class CQiCorpus {
 | 
			
		||||
 | 
			
		||||
  getCorpusData() {
 | 
			
		||||
    return new Promise((resolve, reject) => {
 | 
			
		||||
      const dummyData = {};
 | 
			
		||||
      const dummyData = {
 | 
			
		||||
          "num_tokens": 2000,    // number of tokens in the corpus
 | 
			
		||||
          "num_unique_words": 500,    // number of unique words in the corpus
 | 
			
		||||
          "num_unique_lemmas": 200,    // number of unique lemmas in the corpus
 | 
			
		||||
          "num_sentences": 90,    // number of sentences in the corpus
 | 
			
		||||
          "average_sentence_length": 11,   // average number of tokens per sentence in the corpus
 | 
			
		||||
          "num_ent_types": 30,    // number of entities in the corpus
 | 
			
		||||
          "num_unique_ent_types":10,
 | 
			
		||||
          "ent_type_freqs": {
 | 
			
		||||
            "str": 10,    // number of ent_types with ent_type "str"
 | 
			
		||||
            // ...
 | 
			
		||||
        },
 | 
			
		||||
          "texts": [
 | 
			
		||||
              {
 | 
			
		||||
                  "num_tokens": 11,    // number of tokens in the text
 | 
			
		||||
                  "num_unique_words": 12,    // number of unique words in the text
 | 
			
		||||
                  "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
 | 
			
		||||
                    "str": "int",    // number of tokens with word "str"
 | 
			
		||||
                    // ...
 | 
			
		||||
                  },
 | 
			
		||||
                  "num_unique_lemmas": 15,    // number of unique lemmas in the text
 | 
			
		||||
                  "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
 | 
			
		||||
                    "str": "int",    // number of tokens with lemma "str"
 | 
			
		||||
                    // ...
 | 
			
		||||
                  },
 | 
			
		||||
                  "num_sentences": 4,    // number of sentences in the text
 | 
			
		||||
                  "average_sentence_length": 3,   // average number of tokens per sentence in the text
 | 
			
		||||
                  "num_ent_types": 12,    // number of ent_types in the text
 | 
			
		||||
                  "num_unique_ent_types": 28,    // number of unique ent_types in the text
 | 
			
		||||
                  "num_entities_by_id": {
 | 
			
		||||
                      "1": "int",    // number of entities with id 1
 | 
			
		||||
                      // ...
 | 
			
		||||
                  },            
 | 
			
		||||
                  "author": "Author Name",
 | 
			
		||||
                  "title": "Titel",
 | 
			
		||||
                  "publishing_year": 1950
 | 
			
		||||
              },
 | 
			
		||||
              {
 | 
			
		||||
                "num_tokens": 800,    // number of tokens in the text
 | 
			
		||||
                "num_unique_words": 60,    // number of unique words in the text
 | 
			
		||||
                "word_freqs": {    // frequency of unique words in the text (sorted by frequency)
 | 
			
		||||
                  "testwort": 50,    // number of tokens with word "str"
 | 
			
		||||
                  "testwort2": 1
 | 
			
		||||
                },
 | 
			
		||||
                "num_unique_lemmas": 15,    // number of unique lemmas in the text
 | 
			
		||||
                "lemma_freqs": {    // frequency of unique lemmas in the text (sorted by frequency)
 | 
			
		||||
                  "testlemma": 11,    // number of tokens with lemma "str"
 | 
			
		||||
                  "testlemma2": 1
 | 
			
		||||
                },
 | 
			
		||||
                "num_sentences": 90,    // number of sentences in the text
 | 
			
		||||
                "average_sentence_length": 7,   // average number of tokens per sentence in the text
 | 
			
		||||
                "num_ent_types": 19,
 | 
			
		||||
                "num_unique_ent_types": 5,    // number of unique ent_types in the text
 | 
			
		||||
                "num_entities_by_id": {
 | 
			
		||||
                    "1": "int",    // number of entities with id 1
 | 
			
		||||
                    // ...
 | 
			
		||||
                },            
 | 
			
		||||
                "author": "Author Name 2",
 | 
			
		||||
                "title": "Titel 2",
 | 
			
		||||
                "publishing_year": 1951
 | 
			
		||||
              }
 | 
			
		||||
          ]
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      resolve(dummyData);
 | 
			
		||||
      /*
 | 
			
		||||
 
 | 
			
		||||
@@ -25,7 +25,6 @@ class CorpusAnalysisApp {
 | 
			
		||||
  init() {
 | 
			
		||||
    this.disableActionElements();
 | 
			
		||||
    this.elements.m.initModal.open();
 | 
			
		||||
  
 | 
			
		||||
    // Init data
 | 
			
		||||
    this.data.cQiClient = new CQiClient(this.settings.corpusId);
 | 
			
		||||
    this.data.cQiClient.connect()
 | 
			
		||||
@@ -35,7 +34,13 @@ class CorpusAnalysisApp {
 | 
			
		||||
      .then(
 | 
			
		||||
        cQiCorpus => {
 | 
			
		||||
          this.data.corpus = {o: cQiCorpus};
 | 
			
		||||
          // TODO: Don't do this here
 | 
			
		||||
          this.data.corpus.o.getCorpusData()
 | 
			
		||||
            .then(corpusData => {
 | 
			
		||||
              this.renderGeneralCorpusInfo(corpusData);
 | 
			
		||||
              this.renderTextInfoList(corpusData);
 | 
			
		||||
              this.renderTextProportionsGraphic(corpusData);
 | 
			
		||||
            });
 | 
			
		||||
          // TODO: Don't do this hgere
 | 
			
		||||
          cQiCorpus.updateDb();
 | 
			
		||||
          this.enableActionElements();
 | 
			
		||||
          for (let extension of Object.values(this.extensions)) {extension.init();}
 | 
			
		||||
@@ -52,6 +57,7 @@ class CorpusAnalysisApp {
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      );
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
    // Add event listeners
 | 
			
		||||
    for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) {
 | 
			
		||||
@@ -95,4 +101,40 @@ class CorpusAnalysisApp {
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  renderGeneralCorpusInfo(corpusData) {
 | 
			
		||||
    let corpusGeneralInfoListElement = document.querySelector('.corpus-general-info-list');
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-tokens').innerHTML = `<b>Number of tokens:</b> ${this.data.corpus.o.size}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-text-count').innerHTML = `<b>Corpus text count:</b> ${corpusData.texts.length}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-words').innerHTML = `<b>Corpus unique word count:</b> ${corpusData.num_unique_words}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-lemmas').innerHTML = `<b>Corpus unique lemma count:</b> ${corpusData.num_unique_lemmas}`;
 | 
			
		||||
    // corpusGeneralInfoListElement.querySelector('.corpus-most-frequent-words').innerHTML = `<b>Corpus most frequent words:</b> ${corpusData.most_frequent_words.join(', ');
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-sentences').innerHTML = `<b>Corpus sentence count:</b> ${corpusData.num_sentences}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-average-sentence-length').innerHTML = `<b>Corpus average sentence length:</b> ${corpusData.average_sentence_length}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-ent-types').innerHTML = `<b>Corpus entity count:</b> ${corpusData.num_ent_types}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-ent-types').innerHTML = `<b>Corpus unique entity count:</b> ${corpusData.num_unique_ent_types}`;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  renderTextInfoList(corpusData) {
 | 
			
		||||
    let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
 | 
			
		||||
    let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
 | 
			
		||||
    corpusTextInfoList.add(corpusData.texts);
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  renderTextProportionsGraphic(corpusData) {
 | 
			
		||||
    let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
 | 
			
		||||
    let graphData = [
 | 
			
		||||
      {
 | 
			
		||||
        values: corpusData.texts.map(text => text.num_tokens),
 | 
			
		||||
        labels: corpusData.texts.map(text => `${text.title} (${text.publishing_year})`),
 | 
			
		||||
        type: 'pie'
 | 
			
		||||
      }
 | 
			
		||||
    ];
 | 
			
		||||
    let graphLayout = {
 | 
			
		||||
      height: 400,
 | 
			
		||||
      width: 500
 | 
			
		||||
    };
 | 
			
		||||
    Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										97
									
								
								app/static/js/ResourceLists/CorpusTextInfoList.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								app/static/js/ResourceLists/CorpusTextInfoList.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,97 @@
 | 
			
		||||
class CorpusTextInfoList extends ResourceList {
 | 
			
		||||
 | 
			
		||||
  static autoInit() {
 | 
			
		||||
    for (let corpusTextInfoListElement of document.querySelectorAll('.corpus-text-info-list:not(.no-autoinit)')) {
 | 
			
		||||
      new CorpusTextInfoList(corpusTextInfoListElement);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static defaultOptions = {
 | 
			
		||||
    page: 4
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  constructor(listContainerElement, options = {}) {
 | 
			
		||||
    let _options = Utils.mergeObjectsDeep(
 | 
			
		||||
      CorpusTextInfoList.defaultOptions,
 | 
			
		||||
      options
 | 
			
		||||
    );
 | 
			
		||||
    super(listContainerElement, _options);
 | 
			
		||||
    this.isInitialized = false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  get item() {
 | 
			
		||||
    return (values) => {
 | 
			
		||||
      return `
 | 
			
		||||
        <tr class="list-item clickable hoverable">
 | 
			
		||||
          <td><span class="title"></span> (<span class="publishing_year"></span>)</td>
 | 
			
		||||
          <td><span class="num_tokens"></span></td>
 | 
			
		||||
          <td><span class="num_unique_words"></span></td>
 | 
			
		||||
          <td><span class="num_unique_lemmas"></span></td>
 | 
			
		||||
          <td><span class="num_sentences"></span></td>
 | 
			
		||||
          <td><span class="average_sentence_length"></span></td>
 | 
			
		||||
          <td><span class="num_ent_types"></span></td>
 | 
			
		||||
          <td><span class="num_unique_ent_types"></span></td>
 | 
			
		||||
        </tr>
 | 
			
		||||
      `.trim();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  get valueNames() {
 | 
			
		||||
    return [
 | 
			
		||||
      'title',
 | 
			
		||||
      'publishing_year',
 | 
			
		||||
      'num_tokens',
 | 
			
		||||
      'num_unique_words',
 | 
			
		||||
      'num_unique_lemmas',
 | 
			
		||||
      'num_sentences',
 | 
			
		||||
      'average_sentence_length',
 | 
			
		||||
      'num_ent_types',
 | 
			
		||||
      'num_unique_ent_types'
 | 
			
		||||
    ];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  initListContainerElement() {
 | 
			
		||||
    if (!this.listContainerElement.hasAttribute('id')) {
 | 
			
		||||
      this.listContainerElement.id = Utils.generateElementId('corpus-file-list-');
 | 
			
		||||
    }
 | 
			
		||||
    let listSearchElementId = Utils.generateElementId(`${this.listContainerElement.id}-search-`);
 | 
			
		||||
    this.listContainerElement.innerHTML = `
 | 
			
		||||
      <div class="input-field">
 | 
			
		||||
        <i class="material-icons prefix">search</i>
 | 
			
		||||
        <input id="${listSearchElementId}" class="search" type="text"></input>
 | 
			
		||||
        <label for="${listSearchElementId}">Search corpus file</label>
 | 
			
		||||
      </div>
 | 
			
		||||
      <table>
 | 
			
		||||
        <thead>
 | 
			
		||||
          <tr>
 | 
			
		||||
            <th>Text</th>
 | 
			
		||||
            <th>Number of tokens</th>
 | 
			
		||||
            <th>Number of unique words</th>
 | 
			
		||||
            <th>Number of unique lemmas</th>
 | 
			
		||||
            <th>Number of sentences</th>
 | 
			
		||||
            <th>Average sentence length</th>
 | 
			
		||||
            <th>Number of entity types</th>
 | 
			
		||||
            <th>Number of unique entity types</th>
 | 
			
		||||
          </tr>
 | 
			
		||||
        </thead>
 | 
			
		||||
        <tbody class="list"></tbody>
 | 
			
		||||
      </table>
 | 
			
		||||
      <ul class="pagination"></ul>
 | 
			
		||||
    `.trim();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  mapResourceToValue(corpusTextData) {
 | 
			
		||||
    console.log(corpusTextData);
 | 
			
		||||
    return {
 | 
			
		||||
      title: corpusTextData.title,
 | 
			
		||||
      publishing_year: corpusTextData.publishing_year,
 | 
			
		||||
      num_tokens: corpusTextData.num_tokens,
 | 
			
		||||
      num_unique_words: corpusTextData.num_unique_words,
 | 
			
		||||
      num_unique_lemmas: corpusTextData.num_unique_lemmas,
 | 
			
		||||
      num_sentences: corpusTextData.num_sentences,
 | 
			
		||||
      average_sentence_length: corpusTextData.average_sentence_length,
 | 
			
		||||
      num_ent_types: corpusTextData.num_ent_types,
 | 
			
		||||
      num_unique_ent_types: corpusTextData.num_unique_ent_types
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -15,6 +15,7 @@ class ResourceList {
 | 
			
		||||
    UserList.autoInit();
 | 
			
		||||
    AdminUserList.autoInit();
 | 
			
		||||
    CorpusFollowerList.autoInit();
 | 
			
		||||
    CorpusTextInfoList.autoInit();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static defaultOptions = {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,8 @@
 | 
			
		||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/fast-json-patch/3.1.1/fast-json-patch.min.js" integrity="sha512-5uDdefwnzyq4N+SkmMBmekZLZNmc6dLixvVxCdlHBfqpyz0N3bzLdrJ55OLm7QrZmgZuhLGgHLDtJwU6RZoFCA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
 | 
			
		||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/2.3.1/list.min.js" integrity="sha512-93wYgwrIFL+b+P3RvYxi/WUFRXXUDSLCT2JQk9zhVGXuS2mHl2axj6d+R6pP+gcU5isMHRj1u0oYE/mWyt/RjA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
 | 
			
		||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.5.4/socket.io.min.js" integrity="sha512-HTENHrkQ/P0NGDFd5nk6ibVtCkcM7jhr2c7GyvXp5O+4X6O5cQO9AhqFzM+MdeBivsX7Hoys2J7pp2wdgMpCvw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
 | 
			
		||||
<script src="https://cdn.plot.ly/plotly-2.24.1.min.js" charset="utf-8"></script>
 | 
			
		||||
 | 
			
		||||
{%- assets
 | 
			
		||||
  filters='rjsmin',
 | 
			
		||||
  output='gen/app.%(version)s.js',
 | 
			
		||||
@@ -49,6 +51,7 @@
 | 
			
		||||
  'js/ResourceLists/UserList.js',
 | 
			
		||||
  'js/ResourceLists/AdminUserList.js',
 | 
			
		||||
  'js/ResourceLists/CorpusFollowerList.js',
 | 
			
		||||
  'js/ResourceLists/CorpusTextInfoList.js',
 | 
			
		||||
  'js/ResourceLists/DetailledPublicCorpusList.js'
 | 
			
		||||
%}
 | 
			
		||||
<script src="{{ ASSET_URL }}"></script>
 | 
			
		||||
 
 | 
			
		||||
@@ -33,6 +33,58 @@
 | 
			
		||||
  {% endfor %}
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
<div class="row">
 | 
			
		||||
  <div class="col s12">
 | 
			
		||||
    <h4><i class="material-icons left">query_stats</i>Visualizations</h4>
 | 
			
		||||
  </div>
 | 
			
		||||
  <div class="col s4" >
 | 
			
		||||
    <div class="card hoverable">
 | 
			
		||||
      <div class="card-content">
 | 
			
		||||
        <span class="card-title">General information about the Corpus</span>
 | 
			
		||||
        <p></p>
 | 
			
		||||
        <br>
 | 
			
		||||
        <ul class="corpus-general-info-list">
 | 
			
		||||
          <li class="corpus-num-tokens"></li>
 | 
			
		||||
          <br>
 | 
			
		||||
          <li class="corpus-text-count"></li>
 | 
			
		||||
          <br>
 | 
			
		||||
          <li class="corpus-num-unique-words"></li>
 | 
			
		||||
          <br>
 | 
			
		||||
          <li class="corpus-num-unique-lemmas"></li>
 | 
			
		||||
          <br>
 | 
			
		||||
          <li class="corpus-num-sentences"></li>
 | 
			
		||||
          <br>
 | 
			
		||||
          <li class="corpus-average-sentence-length"></li>
 | 
			
		||||
          <br>
 | 
			
		||||
          <li class="corpus-num-ent-types"></li>
 | 
			
		||||
          <br>
 | 
			
		||||
          <li class="corpus-num-unique-ent-types"></li>
 | 
			
		||||
          <br>
 | 
			
		||||
        </ul>
 | 
			
		||||
      </div>
 | 
			
		||||
    </div>
 | 
			
		||||
  </div>
 | 
			
		||||
  <div class="col s8">
 | 
			
		||||
    <div class="card hoverable">
 | 
			
		||||
      <div class="card-content">
 | 
			
		||||
        <span class="card-title">Text information</span>
 | 
			
		||||
        <div class="corpus-text-info-list no-autoinit"></div>
 | 
			
		||||
      </div>
 | 
			
		||||
    </div>
 | 
			
		||||
  </div>
 | 
			
		||||
</div>
 | 
			
		||||
<div class="row">
 | 
			
		||||
  <div class="col 12">
 | 
			
		||||
    <div class="card hoverable">
 | 
			
		||||
      <div class="card-content">
 | 
			
		||||
        <span class="card-title">Text proportions within the corpus</span>
 | 
			
		||||
        <div id="text-proportions-graphic"></div>
 | 
			
		||||
      </div>
 | 
			
		||||
    </div>
 | 
			
		||||
  </div>
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
{% for extension in extensions %}
 | 
			
		||||
<div id="{{ extension.id_prefix }}-container">
 | 
			
		||||
  {{ extension.container_content }}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user