mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 12:22:47 +00:00 
			
		
		
		
	New visualizations for frequencies
This commit is contained in:
		@@ -7,6 +7,8 @@ from app.decorators import content_negotiation
 | 
			
		||||
from app.models import Corpus, CorpusFollowerRole
 | 
			
		||||
from . import bp
 | 
			
		||||
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
 | 
			
		||||
import nltk
 | 
			
		||||
from string import punctuation
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
 | 
			
		||||
@@ -56,6 +58,27 @@ def build_corpus(corpus_id):
 | 
			
		||||
    }
 | 
			
		||||
    return response_data, 202
 | 
			
		||||
 | 
			
		||||
@bp.route('/stopwords')
 | 
			
		||||
@content_negotiation(produces='application/json')
 | 
			
		||||
def get_stopwords():
 | 
			
		||||
    # data = request.json
 | 
			
		||||
    # if not isinstance(data, dict):
 | 
			
		||||
    #     abort(400)
 | 
			
		||||
    # language = data.get('language')
 | 
			
		||||
    # if not isinstance(language, str):
 | 
			
		||||
    #     abort(400)
 | 
			
		||||
    nltk.download('stopwords')
 | 
			
		||||
    languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
 | 
			
		||||
    stopwords = {}
 | 
			
		||||
    for language in languages:
 | 
			
		||||
        stopwords[language] = nltk.corpus.stopwords.words(language)
 | 
			
		||||
    stopwords['punctuation'] = list(punctuation) + ['—', '|']
 | 
			
		||||
    stopwords['user_stopwords'] = []
 | 
			
		||||
    print(stopwords)
 | 
			
		||||
    response_data = {
 | 
			
		||||
        'stopwords': stopwords
 | 
			
		||||
    }
 | 
			
		||||
    return response_data, 202
 | 
			
		||||
 | 
			
		||||
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
 | 
			
		||||
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
 | 
			
		||||
 
 | 
			
		||||
@@ -42,7 +42,6 @@ def job_log(job_id):
 | 
			
		||||
    with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
 | 
			
		||||
        log = log_file.read()
 | 
			
		||||
    response_data = {
 | 
			
		||||
        'message': '',
 | 
			
		||||
        'jobLog': log
 | 
			
		||||
    }
 | 
			
		||||
    return response_data, 200
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,8 @@
 | 
			
		||||
class CorpusAnalysisApp {
 | 
			
		||||
  constructor(corpusId) {
 | 
			
		||||
    this.data = {};
 | 
			
		||||
    this.data = {
 | 
			
		||||
      promises: {getStopwords: []}
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // HTML elements
 | 
			
		||||
    this.elements = {
 | 
			
		||||
@@ -22,6 +24,49 @@ class CorpusAnalysisApp {
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // getStopwords(language) {
 | 
			
		||||
  //   if (language in this.data.promises.getStopwords) {
 | 
			
		||||
  //     console.log('Stopwords already loaded');
 | 
			
		||||
  //     return this.data.promises.getStopwords[language];
 | 
			
		||||
  //   } 
 | 
			
		||||
  //   this.data.promises.getStopwords[language] = new Promise((resolve, reject) => {
 | 
			
		||||
  //       Requests.corpora.entity.getStopwords(language)
 | 
			
		||||
  //         .then((response) => {
 | 
			
		||||
  //           response.json()
 | 
			
		||||
  //             .then((json) => {
 | 
			
		||||
  //               let stopwords = json.stopwords;
 | 
			
		||||
  //               resolve(stopwords);
 | 
			
		||||
  //             })
 | 
			
		||||
  //             .catch((error) => {
 | 
			
		||||
  //               reject(error);
 | 
			
		||||
  //             });
 | 
			
		||||
  //         });
 | 
			
		||||
  //     });
 | 
			
		||||
  //     return this.data.promises.getStopwords[language];
 | 
			
		||||
  // }
 | 
			
		||||
 | 
			
		||||
  getStopwords() {
 | 
			
		||||
    if (this.data.promises.getStopwords.length !== 0) {
 | 
			
		||||
      console.log('Stopwords already loaded');
 | 
			
		||||
      return this.data.promises.getStopwords;
 | 
			
		||||
    }
 | 
			
		||||
    this.data.promises.getStopwords = new Promise((resolve, reject) => {
 | 
			
		||||
      Requests.corpora.entity.getStopwords()
 | 
			
		||||
        .then((response) => {
 | 
			
		||||
          response.json()
 | 
			
		||||
            .then((json) => {
 | 
			
		||||
              let stopwords = json.stopwords;
 | 
			
		||||
              resolve(stopwords);
 | 
			
		||||
            })
 | 
			
		||||
            .catch((error) => {
 | 
			
		||||
              reject(error);
 | 
			
		||||
            });
 | 
			
		||||
        });
 | 
			
		||||
    });
 | 
			
		||||
    return this.data.promises.getStopwords;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  init() {
 | 
			
		||||
    this.disableActionElements();
 | 
			
		||||
    this.elements.m.initModal.open();
 | 
			
		||||
@@ -161,9 +206,28 @@ class CorpusAnalysisApp {
 | 
			
		||||
        type: 'pie'
 | 
			
		||||
      }
 | 
			
		||||
    ];
 | 
			
		||||
    let config = {responsive: true};
 | 
			
		||||
    let graphLayout = {
 | 
			
		||||
      showlegend: true,
 | 
			
		||||
      height: 486,
 | 
			
		||||
      margin: {
 | 
			
		||||
        l: 10,
 | 
			
		||||
        r: 10,
 | 
			
		||||
        b: 10,
 | 
			
		||||
        t: 10
 | 
			
		||||
      },
 | 
			
		||||
      legend: {
 | 
			
		||||
        "orientation": "h",
 | 
			
		||||
        font: {
 | 
			
		||||
          size: 10
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
    let config = {
 | 
			
		||||
      responsive: true,
 | 
			
		||||
      displaylogo: false
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    Plotly.newPlot(textProportionsGraphicElement, graphData, config);
 | 
			
		||||
    Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  renderFrequenciesGraphic(corpusData) {
 | 
			
		||||
@@ -171,42 +235,106 @@ class CorpusAnalysisApp {
 | 
			
		||||
    let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
 | 
			
		||||
    let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
 | 
			
		||||
    let texts = Object.entries(corpusData.s_attrs.text.lexicon);
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
    let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
 | 
			
		||||
    let graphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
 | 
			
		||||
 | 
			
		||||
    frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
 | 
			
		||||
      frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
 | 
			
		||||
      this.renderFrequenciesGraphic(corpusData);
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    graphModeButtons.forEach(graphModeButton => {
 | 
			
		||||
      graphModeButton.addEventListener('click', (event) => {
 | 
			
		||||
        graphModeButtons.forEach(btn => {
 | 
			
		||||
          btn.classList.remove('disabled');
 | 
			
		||||
        });
 | 
			
		||||
        event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
 | 
			
		||||
        this.renderFrequenciesGraphic(corpusData);
 | 
			
		||||
      });
 | 
			
		||||
    });
 | 
			
		||||
    
 | 
			
		||||
    let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
 | 
			
		||||
    
 | 
			
		||||
    let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData);
 | 
			
		||||
    let graphLayout = {
 | 
			
		||||
      barmode: 'stack',
 | 
			
		||||
      type: 'bar'
 | 
			
		||||
    };
 | 
			
		||||
    let config = {responsive: true};
 | 
			
		||||
    
 | 
			
		||||
    Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
 | 
			
		||||
    this.createFrequenciesGraphData(tokenCategory, texts, corpusData, graphtype)
 | 
			
		||||
      .then(graphData => {
 | 
			
		||||
        let graphLayout = {
 | 
			
		||||
          barmode: graphtype === 'bar' ? 'stack' : '',
 | 
			
		||||
          margin: {
 | 
			
		||||
            t: 20,
 | 
			
		||||
            l: 50
 | 
			
		||||
          },
 | 
			
		||||
          yaxis: {
 | 
			
		||||
            showticklabels: graphtype === 'markers' ? false : true
 | 
			
		||||
          },
 | 
			
		||||
        };
 | 
			
		||||
        let config = {
 | 
			
		||||
          responsive: true,
 | 
			
		||||
          modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
 | 
			
		||||
          displaylogo: false
 | 
			
		||||
        };
 | 
			
		||||
        Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
 | 
			
		||||
      });
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  createFrequenciesGraphData(category, texts, corpusData) {
 | 
			
		||||
    let graphData = [];
 | 
			
		||||
    let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
 | 
			
		||||
  
 | 
			
		||||
    for (let item of sortedData) {
 | 
			
		||||
      let data = {
 | 
			
		||||
        x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
 | 
			
		||||
        y: texts.map(text => text[1].freqs[category][item[0]]),
 | 
			
		||||
        name: corpusData.values.p_attrs[category][item[0]],
 | 
			
		||||
        type: 'bar'
 | 
			
		||||
      };
 | 
			
		||||
      graphData.push(data);
 | 
			
		||||
    }
 | 
			
		||||
  
 | 
			
		||||
    return graphData;
 | 
			
		||||
  createFrequenciesGraphData(category, texts, corpusData, graphtype) {
 | 
			
		||||
    return new Promise((resolve, reject) => {
 | 
			
		||||
      this.getStopwords()
 | 
			
		||||
        .then(stopwords => {
 | 
			
		||||
          this.renderStopwordSettingsModal(stopwords);
 | 
			
		||||
          let stopwordList = [];
 | 
			
		||||
          Object.values(stopwords).forEach(stopwordItems => {
 | 
			
		||||
            stopwordItems.forEach(stopword => {
 | 
			
		||||
              stopwordList.push(stopword);
 | 
			
		||||
            });
 | 
			
		||||
          });
 | 
			
		||||
          let graphData = [];
 | 
			
		||||
          let filteredData = Object.entries(corpusData.corpus.freqs[category])
 | 
			
		||||
            .sort((a, b) => b[1] - a[1])
 | 
			
		||||
            .filter(item => !stopwordList.includes(corpusData.values.p_attrs[category][item[0]].toLowerCase()))
 | 
			
		||||
            .slice(0, 5);
 | 
			
		||||
          if (graphtype !== 'markers') {
 | 
			
		||||
            for (let item of filteredData) {
 | 
			
		||||
              let data = {
 | 
			
		||||
                x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
 | 
			
		||||
                y: texts.map(text => text[1].freqs[category][item[0]] || 0),
 | 
			
		||||
                name: corpusData.values.p_attrs[category][item[0]],
 | 
			
		||||
                type: graphtype
 | 
			
		||||
              };
 | 
			
		||||
              graphData.push(data);
 | 
			
		||||
            }
 | 
			
		||||
          } else {
 | 
			
		||||
            for (let item of filteredData) {
 | 
			
		||||
              let size = texts.map(text => text[1].freqs[category][item[0]] || 0); 
 | 
			
		||||
              let data = {
 | 
			
		||||
                x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
 | 
			
		||||
                y: texts.map(text => corpusData.values.p_attrs[category][item[0]]),
 | 
			
		||||
                name: corpusData.values.p_attrs[category][item[0]],
 | 
			
		||||
                text: texts.map(text => `${corpusData.values.p_attrs[category][item[0]]}<br>${text[1].freqs[category][item[0]] || 0}`),
 | 
			
		||||
                mode: 'markers',
 | 
			
		||||
                marker: {
 | 
			
		||||
                  size: size,
 | 
			
		||||
                  // sizeref: 2.0 * Math.max(...size) / (80**2),
 | 
			
		||||
                  // sizemode: 'area',
 | 
			
		||||
                  sizeref: 0.2
 | 
			
		||||
                }
 | 
			
		||||
              };
 | 
			
		||||
              graphData.push(data);
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
          resolve(graphData);
 | 
			
		||||
        })
 | 
			
		||||
        .catch(error => {
 | 
			
		||||
          reject(error);
 | 
			
		||||
        });
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  renderStopwordSettingsModal(stopwords) {
 | 
			
		||||
    let stopwordInputField = document.querySelector('.stopword-input-field');
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  renderBoundsGraphic(corpusData) {
 | 
			
		||||
      let boundsGraphicElement = document.querySelector('#bounds-graphic');
 | 
			
		||||
 | 
			
		||||
@@ -238,7 +366,11 @@ class CorpusAnalysisApp {
 | 
			
		||||
        }
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      let config = {responsive: true};
 | 
			
		||||
      let config = {
 | 
			
		||||
        responsive: true,
 | 
			
		||||
        modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
 | 
			
		||||
        displaylogo: false
 | 
			
		||||
      };
 | 
			
		||||
    
 | 
			
		||||
      Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -22,9 +22,11 @@ Requests.JSONfetch = (input, init={}) => {
 | 
			
		||||
          response.json()
 | 
			
		||||
            .then(
 | 
			
		||||
              (json) => {
 | 
			
		||||
                let message = json.message || json;
 | 
			
		||||
                let message = json.message;
 | 
			
		||||
                let category = json.category || 'message';
 | 
			
		||||
                app.flash(message, category);
 | 
			
		||||
                if (message) {
 | 
			
		||||
                  app.flash(message, category);
 | 
			
		||||
                }
 | 
			
		||||
              },
 | 
			
		||||
              (error) => {
 | 
			
		||||
                app.flash(`[${response.status}]: ${response.statusText}`, 'error');
 | 
			
		||||
 
 | 
			
		||||
@@ -31,6 +31,14 @@ Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => {
 | 
			
		||||
  return Requests.JSONfetch(input, init);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
Requests.corpora.entity.getStopwords = () => {
 | 
			
		||||
  let input = `/corpora/stopwords`;
 | 
			
		||||
  let init = {
 | 
			
		||||
    method: 'GET'
 | 
			
		||||
  };
 | 
			
		||||
  return Requests.JSONfetch(input, init);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
Requests.corpora.entity.isPublic = {};
 | 
			
		||||
 | 
			
		||||
Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
 | 
			
		||||
@@ -43,4 +51,3 @@ Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -98,19 +98,19 @@
 | 
			
		||||
      </div>
 | 
			
		||||
    </div>
 | 
			
		||||
    <div class="row">
 | 
			
		||||
      <div class="col s6">
 | 
			
		||||
      <div class="col s4">
 | 
			
		||||
        <div class="card hoverable">
 | 
			
		||||
          <div class="card-content">
 | 
			
		||||
            <span class="card-title">Proportions</span>
 | 
			
		||||
            <p>of texts within the corpus</p>
 | 
			
		||||
            <div id="text-proportions-graphic"></div>
 | 
			
		||||
            <div id="text-proportions-graphic" style="width:100"></div>
 | 
			
		||||
          </div>
 | 
			
		||||
        </div>
 | 
			
		||||
      </div>
 | 
			
		||||
      <div class="col s6">
 | 
			
		||||
      <div class="col s8">
 | 
			
		||||
        <div class="card hoverable">
 | 
			
		||||
          <div class="card-content">
 | 
			
		||||
            <span class="card-title"><a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> Frequencies</span>
 | 
			
		||||
            <span class="card-title">Frequencies</span>
 | 
			
		||||
            <ul id="frequencies-token-category-dropdown" class="dropdown-content">
 | 
			
		||||
              <li><a data-token-category="word">Word</a></li>
 | 
			
		||||
              <li><a data-token-category="lemma">Lemma</a></li>
 | 
			
		||||
@@ -119,6 +119,11 @@
 | 
			
		||||
            </ul>
 | 
			
		||||
            <p>within the texts of the 5 most frequent words in the corpus</p>
 | 
			
		||||
            <div id="frequencies-graphic"></div>
 | 
			
		||||
            <a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> 
 | 
			
		||||
            <a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">equalizer</i></a>
 | 
			
		||||
            <a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
 | 
			
		||||
            <a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
 | 
			
		||||
            <a class="btn-flat modal-trigger" href="#frequencies-stopwords-setting-modal"><i class="material-icons grey-text text-darken-2">settings</i></a>
 | 
			
		||||
          </div>
 | 
			
		||||
        </div>
 | 
			
		||||
      </div>
 | 
			
		||||
@@ -161,6 +166,21 @@
 | 
			
		||||
  </div>
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
<div class="modal" id="frequencies-stopwords-setting-modal">
 | 
			
		||||
  <div class="modal-content">
 | 
			
		||||
    <h4>Settings</h4>
 | 
			
		||||
    <p>Here you can change the stopword-lists. Add your own stopwords or change the already existing below.</p>
 | 
			
		||||
    <div class="chips chips-placeholder stopword-input-field"></div>
 | 
			
		||||
    <div class="row">
 | 
			
		||||
      <div class="input-field col s3">
 | 
			
		||||
        <select class="stopword-language-selection"></select>
 | 
			
		||||
        <label>Stopword language select</label>
 | 
			
		||||
      </div>
 | 
			
		||||
    </div>
 | 
			
		||||
  </div>
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
{% for extension in extensions %}
 | 
			
		||||
{{ extension.modals }}
 | 
			
		||||
{% endfor %}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user