mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 04:12:45 +00:00 
			
		
		
		
	Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update
This commit is contained in:
		
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -34,25 +34,25 @@ class CorpusAnalysisApp {
 | 
			
		||||
      .then(
 | 
			
		||||
        cQiCorpus => {
 | 
			
		||||
          this.data.corpus = {o: cQiCorpus};
 | 
			
		||||
          this.data.corpus.o.getVisualizationData().then(
 | 
			
		||||
            (data) => {
 | 
			
		||||
              console.log(data);
 | 
			
		||||
            }
 | 
			
		||||
          );
 | 
			
		||||
          // this.data.corpus.o.getVisualizationData()
 | 
			
		||||
          //   .then(
 | 
			
		||||
          //     (visualizationData) => {
 | 
			
		||||
          //       console.log(visualizationData);
 | 
			
		||||
          //       this.renderGeneralCorpusInfo(visualizationData);
 | 
			
		||||
          //       this.renderTextInfoList(visualizationData);
 | 
			
		||||
          //       this.renderTextProportionsGraphic(visualizationData);
 | 
			
		||||
          //     }
 | 
			
		||||
          //   );
 | 
			
		||||
          this.data.corpus.o.getVisualizationData()
 | 
			
		||||
            .then(
 | 
			
		||||
              (data) => {
 | 
			
		||||
                console.log(data);
 | 
			
		||||
                // this.renderGeneralCorpusInfo(data);
 | 
			
		||||
                // this.renderTextInfoList(data);
 | 
			
		||||
                // this.renderTextProportionsGraphic(data);
 | 
			
		||||
                // this.renderWordFrequenciesGraphic(data);
 | 
			
		||||
                // this.renderBoundsGraphic(data);
 | 
			
		||||
              }
 | 
			
		||||
            );
 | 
			
		||||
          this.data.corpus.o.getCorpusData()
 | 
			
		||||
            .then(corpusData => {
 | 
			
		||||
              console.log(corpusData);
 | 
			
		||||
              this.renderGeneralCorpusInfo(corpusData);
 | 
			
		||||
              this.renderTextInfoList(corpusData);
 | 
			
		||||
              this.renderTextProportionsGraphic(corpusData);
 | 
			
		||||
              this.renderFrequenciesGraphic(corpusData);
 | 
			
		||||
              this.renderBoundsGraphic(corpusData);
 | 
			
		||||
            });
 | 
			
		||||
          // TODO: Don't do this hgere
 | 
			
		||||
          cQiCorpus.updateDb();
 | 
			
		||||
@@ -117,38 +117,135 @@ class CorpusAnalysisApp {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  renderGeneralCorpusInfo(corpusData) {
 | 
			
		||||
    let corpusGeneralInfoListElement = document.querySelector('.corpus-general-info-list');
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-tokens').innerHTML = `<b>Number of tokens:</b> ${this.data.corpus.o.size}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-text-count').innerHTML = `<b>Corpus text count:</b> ${corpusData.texts.length}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-words').innerHTML = `<b>Corpus unique word count:</b> ${corpusData.num_unique_words}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-lemmas').innerHTML = `<b>Corpus unique lemma count:</b> ${corpusData.num_unique_lemmas}`;
 | 
			
		||||
    // corpusGeneralInfoListElement.querySelector('.corpus-most-frequent-words').innerHTML = `<b>Corpus most frequent words:</b> ${corpusData.most_frequent_words.join(', ');
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-sentences').innerHTML = `<b>Corpus sentence count:</b> ${corpusData.num_sentences}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-average-sentence-length').innerHTML = `<b>Corpus average sentence length:</b> ${corpusData.average_sentence_length}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-ent-types').innerHTML = `<b>Corpus entity count:</b> ${corpusData.num_ent_types}`;
 | 
			
		||||
    corpusGeneralInfoListElement.querySelector('.corpus-num-unique-ent-types').innerHTML = `<b>Corpus unique entity count:</b> ${corpusData.num_unique_ent_types}`;
 | 
			
		||||
    document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.lexicon[0].counts.token;
 | 
			
		||||
    document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.lexicon[0].counts.s;
 | 
			
		||||
    document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.word).length;
 | 
			
		||||
    document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.lemma).length;
 | 
			
		||||
    document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.pos).length;
 | 
			
		||||
    document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.simple_pos).length;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  renderTextInfoList(corpusData) {
 | 
			
		||||
    let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
 | 
			
		||||
    let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
 | 
			
		||||
    corpusTextInfoList.add(corpusData.texts);
 | 
			
		||||
    
 | 
			
		||||
    let texts = corpusData.text.lexicon;
 | 
			
		||||
    let textData = [];
 | 
			
		||||
    for (let i = 0; i < Object.entries(texts).length; i++) {
 | 
			
		||||
      let resource = {
 | 
			
		||||
        title: corpusData.lookups.text[i].title,
 | 
			
		||||
        publishing_year: corpusData.lookups.text[i].publishing_year,
 | 
			
		||||
        num_tokens: corpusData.text.lexicon[i].counts.token,
 | 
			
		||||
        num_sentences: corpusData.text.lexicon[i].counts.s,
 | 
			
		||||
        num_unique_words: Object.entries(corpusData.text.lexicon[i].freqs.word).length,
 | 
			
		||||
        num_unique_lemmas: Object.entries(corpusData.text.lexicon[i].freqs.lemma).length,
 | 
			
		||||
        num_unique_pos: Object.entries(corpusData.text.lexicon[i].freqs.pos).length,
 | 
			
		||||
        num_unique_simple_pos: Object.entries(corpusData.text.lexicon[i].freqs.simple_pos).length
 | 
			
		||||
      };
 | 
			
		||||
  
 | 
			
		||||
      textData.push(resource);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    corpusTextInfoList.add(textData);
 | 
			
		||||
 | 
			
		||||
    let textCountChipElement = document.querySelector('.text-count-chip');
 | 
			
		||||
    textCountChipElement.innerHTML = `Text count: ${Object.values(corpusData.text.lexicon).length}`;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  renderTextProportionsGraphic(corpusData) {
 | 
			
		||||
    let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
 | 
			
		||||
    let texts = Object.entries(corpusData.text.lexicon);
 | 
			
		||||
    let graphData = [
 | 
			
		||||
      {
 | 
			
		||||
        values: corpusData.texts.map(text => text.num_tokens),
 | 
			
		||||
        labels: corpusData.texts.map(text => `${text.title} (${text.publishing_year})`),
 | 
			
		||||
        values: texts.map(text => text[1].counts.token),
 | 
			
		||||
        labels: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
 | 
			
		||||
        type: 'pie'
 | 
			
		||||
      }
 | 
			
		||||
    ];
 | 
			
		||||
    let graphLayout = {
 | 
			
		||||
      height: 400,
 | 
			
		||||
      width: 500
 | 
			
		||||
      // height: 600,
 | 
			
		||||
      // width: 900
 | 
			
		||||
    };
 | 
			
		||||
    Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout);
 | 
			
		||||
    let config = {responsive: true};
 | 
			
		||||
 | 
			
		||||
    Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  renderFrequenciesGraphic(corpusData) {
 | 
			
		||||
    let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
 | 
			
		||||
    let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
 | 
			
		||||
    let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
 | 
			
		||||
    let texts = Object.entries(corpusData.text.lexicon);
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
    frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
 | 
			
		||||
      frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
 | 
			
		||||
      this.renderFrequenciesGraphic(corpusData);
 | 
			
		||||
    });
 | 
			
		||||
    
 | 
			
		||||
    let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
 | 
			
		||||
    
 | 
			
		||||
    let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData);
 | 
			
		||||
    let graphLayout = {
 | 
			
		||||
      barmode: 'stack',
 | 
			
		||||
      type: 'bar'
 | 
			
		||||
    };
 | 
			
		||||
    let config = {responsive: true};
 | 
			
		||||
    
 | 
			
		||||
    Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  createFrequenciesGraphData(category, texts, corpusData) {
 | 
			
		||||
    let graphData = [];
 | 
			
		||||
    let sortedData = Object.entries(corpusData.corpus.lexicon[0].freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
 | 
			
		||||
  
 | 
			
		||||
    for (let item of sortedData) {
 | 
			
		||||
      let data = {
 | 
			
		||||
        x: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
 | 
			
		||||
        y: texts.map(text => text[1].freqs[category][item[0]]),
 | 
			
		||||
        name: corpusData.lookups[category][item[0]],
 | 
			
		||||
        type: 'bar'
 | 
			
		||||
      };
 | 
			
		||||
      graphData.push(data);
 | 
			
		||||
    }
 | 
			
		||||
  
 | 
			
		||||
    return graphData;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  renderBoundsGraphic(corpusData) {
 | 
			
		||||
      let boundsGraphicElement = document.querySelector('#bounds-graphic');
 | 
			
		||||
 | 
			
		||||
      let graphData = [];
 | 
			
		||||
      let texts = Object.entries(corpusData.text.lexicon);
 | 
			
		||||
 | 
			
		||||
      graphData = [{
 | 
			
		||||
        type: 'bar',
 | 
			
		||||
        x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
 | 
			
		||||
        y: texts.map(text => corpusData.lookups.text[text[0]].title),
 | 
			
		||||
        base: texts.map(text => text[1].bounds[0]),
 | 
			
		||||
        text: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
 | 
			
		||||
        orientation: 'h',
 | 
			
		||||
        hovertemplate: '%{base} - %{x} <br>%{y}',
 | 
			
		||||
        showlegend: false
 | 
			
		||||
      }];
 | 
			
		||||
 | 
			
		||||
      let graphLayout = {
 | 
			
		||||
        // height: 600,
 | 
			
		||||
        // width: 2000,
 | 
			
		||||
        barmode: 'stack',
 | 
			
		||||
        type: 'bar',
 | 
			
		||||
        showgrid: false,
 | 
			
		||||
        xaxis: {
 | 
			
		||||
          rangemode: 'nonnegative',
 | 
			
		||||
          autorange: true
 | 
			
		||||
        },
 | 
			
		||||
        yaxis: {
 | 
			
		||||
          autorange: true,
 | 
			
		||||
          showticklabels: false
 | 
			
		||||
        }
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      let config = {responsive: true};
 | 
			
		||||
    
 | 
			
		||||
      Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -29,11 +29,11 @@ class CorpusTextInfoList extends ResourceList {
 | 
			
		||||
        <tr class="list-item clickable hoverable">
 | 
			
		||||
          <td><span class="title"></span> (<span class="publishing_year"></span>)</td>
 | 
			
		||||
          <td><span class="num_tokens"></span></td>
 | 
			
		||||
          <td><span class="num_sentences"></span></td>
 | 
			
		||||
          <td><span class="num_unique_words"></span></td>
 | 
			
		||||
          <td><span class="num_unique_lemmas"></span></td>
 | 
			
		||||
          <td><span class="num_sentences"></span></td>
 | 
			
		||||
          <td><span class="average_sentence_length"></span></td>
 | 
			
		||||
          <td><span class="num_unique_ent_types"></span></td>
 | 
			
		||||
          <td><span class="num_unique_pos"></span></td>
 | 
			
		||||
          <td><span class="num_unique_simple_pos"></span></td>
 | 
			
		||||
        </tr>
 | 
			
		||||
      `.trim();
 | 
			
		||||
    }
 | 
			
		||||
@@ -44,11 +44,11 @@ class CorpusTextInfoList extends ResourceList {
 | 
			
		||||
      'title',
 | 
			
		||||
      'publishing_year',
 | 
			
		||||
      'num_tokens',
 | 
			
		||||
      'num_sentences',
 | 
			
		||||
      'num_unique_words',
 | 
			
		||||
      'num_unique_lemmas',
 | 
			
		||||
      'num_sentences',
 | 
			
		||||
      'average_sentence_length',
 | 
			
		||||
      'num_unique_ent_types'
 | 
			
		||||
      'num_unique_pos',
 | 
			
		||||
      'num_unique_simple_pos'
 | 
			
		||||
    ];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -68,11 +68,11 @@ class CorpusTextInfoList extends ResourceList {
 | 
			
		||||
          <tr>
 | 
			
		||||
            <th>Text<span class="sort right material-icons" data-sort="title" style="cursor:pointer; color:#aa9cc9">arrow_drop_down</span></th>
 | 
			
		||||
            <th>Number of tokens<span class="sort right material-icons" data-sort="num_tokens" style="cursor:pointer">arrow_drop_down</span></th>
 | 
			
		||||
            <th>Number of sentences<span class="sort right material-icons" data-sort="num_sentences" style="cursor:pointer">arrow_drop_down</span></th>
 | 
			
		||||
            <th>Number of unique words<span class="sort right material-icons" data-sort="num_unique_words" style="cursor:pointer">arrow_drop_down</span></th>
 | 
			
		||||
            <th>Number of unique lemmas<span class="sort right material-icons" data-sort="num_unique_lemmas" style="cursor:pointer">arrow_drop_down</span></th>
 | 
			
		||||
            <th>Number of sentences<span class="sort right material-icons" data-sort="num_sentences" style="cursor:pointer">arrow_drop_down</span></th>
 | 
			
		||||
            <th>Average sentence length<span class="sort right material-icons" data-sort="average_sentence_length" style="cursor:pointer">arrow_drop_down</span></th>
 | 
			
		||||
            <th>Number of unique entity types<span class="sort right material-icons" data-sort="num_unique_ent_types" style="cursor:pointer">arrow_drop_down</span></th>
 | 
			
		||||
            <th>Number of unique pos<span class="sort right material-icons" data-sort="num_unique_pos" style="cursor:pointer">arrow_drop_down</span></th>
 | 
			
		||||
            <th>Number of unique simple pos<span class="sort right material-icons" data-sort="num_unique_simple_pos" style="cursor:pointer">arrow_drop_down</span></th>
 | 
			
		||||
          </tr>
 | 
			
		||||
        </thead>
 | 
			
		||||
        <tbody class="list"></tbody>
 | 
			
		||||
@@ -86,11 +86,11 @@ class CorpusTextInfoList extends ResourceList {
 | 
			
		||||
      title: corpusTextData.title,
 | 
			
		||||
      publishing_year: corpusTextData.publishing_year,
 | 
			
		||||
      num_tokens: corpusTextData.num_tokens,
 | 
			
		||||
      num_sentences: corpusTextData.num_sentences,
 | 
			
		||||
      num_unique_words: corpusTextData.num_unique_words,
 | 
			
		||||
      num_unique_lemmas: corpusTextData.num_unique_lemmas,
 | 
			
		||||
      num_sentences: corpusTextData.num_sentences,
 | 
			
		||||
      average_sentence_length: corpusTextData.average_sentence_length,
 | 
			
		||||
      num_unique_ent_types: corpusTextData.num_unique_ent_types
 | 
			
		||||
      num_unique_pos: corpusTextData.num_unique_pos,
 | 
			
		||||
      num_unique_simple_pos: corpusTextData.num_unique_simple_pos
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user