mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 12:22:47 +00:00 
			
		
		
		
	Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update
This commit is contained in:
		@@ -1,6 +1,8 @@
 | 
			
		||||
from collections import Counter
 | 
			
		||||
from flask import session
 | 
			
		||||
import cqi
 | 
			
		||||
import math
 | 
			
		||||
import random
 | 
			
		||||
from app import db, socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from app.models import Corpus
 | 
			
		||||
@@ -38,10 +40,191 @@ def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcor
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    corpus = Corpus.query.get(session['d']['corpus_id'])
 | 
			
		||||
    corpus.num_tokens = cqi_client.corpora.get(corpus_name).attrs['size']
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    corpus.num_tokens = cqi_corpus.size
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    # s_attrs = [x for x in corpus.structural_attributes.list() if not x.has_values]
 | 
			
		||||
    # p_attrs = corpus.positional_attributes.list()
 | 
			
		||||
    # payload = {
 | 
			
		||||
    #     's_attrs': {},
 | 
			
		||||
    #     'p_attrs': {},
 | 
			
		||||
    #     'values': {
 | 
			
		||||
    #         's_attrs': {},
 | 
			
		||||
    #         'p_attrs': {}
 | 
			
		||||
    #     }
 | 
			
		||||
    # }
 | 
			
		||||
    # for s_attr in s_attrs:
 | 
			
		||||
    #     s_attr_lbound, s_attr_rbound = s_attr.cpos_by_id(text_id)
 | 
			
		||||
    #     s_attr_cpos_range = range(s_attr_lbound, s_attr_rbound + 1)
 | 
			
		||||
    #     payload['text']['lexicon'][text_id] = {
 | 
			
		||||
    #         's_attrs': [s_attr_lbound, s_attr_rbound],
 | 
			
		||||
    #         'counts': {
 | 
			
		||||
    #             'token': s_attr_rbound - s_attr_lbound + 1
 | 
			
		||||
    #         },
 | 
			
		||||
    #         'freqs': {
 | 
			
		||||
    #             p_attr.name: dict(Counter(p_attr.ids_by_cpos(list(s_attr_cpos_range))))
 | 
			
		||||
    #             for p_attr in p_attrs
 | 
			
		||||
    #         }
 | 
			
		||||
    #     }
 | 
			
		||||
    # for p_attr in p_attrs:
 | 
			
		||||
    #     payload['p_attrs'] = dict(
 | 
			
		||||
 | 
			
		||||
    #     )
 | 
			
		||||
    #     payload['values']['p_attrs'] = dict(
 | 
			
		||||
    #         zip(
 | 
			
		||||
    #             range(0, p_attr.lexicon_size),
 | 
			
		||||
    #             p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
 | 
			
		||||
    #         )
 | 
			
		||||
    #     )
 | 
			
		||||
    text = corpus.structural_attributes.get('text')
 | 
			
		||||
    text_value_names = []
 | 
			
		||||
    text_values = []
 | 
			
		||||
    for text_sub_attr in corpus.structural_attributes.list(filters={'part_of': text}):
 | 
			
		||||
        text_value_names.append(text_sub_attr.name[(len(text.name) + 1):])
 | 
			
		||||
        text_values.append(text_sub_attr.values_by_ids(list(range(0, text.size))))
 | 
			
		||||
    s = corpus.structural_attributes.get('s')
 | 
			
		||||
    ent = corpus.structural_attributes.get('ent')
 | 
			
		||||
    ent_value_names = []
 | 
			
		||||
    ent_values = []
 | 
			
		||||
    for ent_sub_attr in corpus.structural_attributes.list(filters={'part_of': ent}):
 | 
			
		||||
        ent_value_names.append(ent_sub_attr.name[(len(ent.name) + 1):])
 | 
			
		||||
        ent_values.append(ent_sub_attr.values_by_ids(list(range(0, ent.size))))
 | 
			
		||||
    word = corpus.positional_attributes.get('word')
 | 
			
		||||
    lemma = corpus.positional_attributes.get('lemma')
 | 
			
		||||
    pos = corpus.positional_attributes.get('pos')
 | 
			
		||||
    simple_pos = corpus.positional_attributes.get('simple_pos')
 | 
			
		||||
    payload = {}
 | 
			
		||||
    payload['corpus'] = {'lexicon': {}, 'values': []}
 | 
			
		||||
    payload['corpus']['lexicon'][0] = {
 | 
			
		||||
        'bounds': [0, corpus.size - 1],
 | 
			
		||||
        'counts': {
 | 
			
		||||
            'text': text.size,
 | 
			
		||||
            's': s.size,
 | 
			
		||||
            'ent': ent.size,
 | 
			
		||||
            'token': corpus.size
 | 
			
		||||
        },
 | 
			
		||||
        'freqs': {
 | 
			
		||||
            'word': dict(
 | 
			
		||||
                zip(
 | 
			
		||||
                    range(0, word.lexicon_size),
 | 
			
		||||
                    word.freqs_by_ids(list(range(0, word.lexicon_size)))
 | 
			
		||||
                )
 | 
			
		||||
            ),
 | 
			
		||||
            'lemma': dict(
 | 
			
		||||
                zip(
 | 
			
		||||
                    range(0, lemma.lexicon_size),
 | 
			
		||||
                    lemma.freqs_by_ids(list(range(0, lemma.lexicon_size)))
 | 
			
		||||
                )
 | 
			
		||||
            ),
 | 
			
		||||
            'pos': dict(
 | 
			
		||||
                zip(
 | 
			
		||||
                    range(0, pos.lexicon_size),
 | 
			
		||||
                    pos.freqs_by_ids(list(range(0, pos.lexicon_size)))
 | 
			
		||||
                )
 | 
			
		||||
            ),
 | 
			
		||||
            'simple_pos': dict(
 | 
			
		||||
                zip(
 | 
			
		||||
                    range(0, simple_pos.lexicon_size),
 | 
			
		||||
                    simple_pos.freqs_by_ids(list(range(0, simple_pos.lexicon_size)))
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    payload['text'] = {'lexicon': {}, 'values': None}
 | 
			
		||||
    for text_id in range(0, text.size):
 | 
			
		||||
        text_lbound, text_rbound = text.cpos_by_id(text_id)
 | 
			
		||||
        text_cpos_range = range(text_lbound, text_rbound + 1)
 | 
			
		||||
        text_s_ids = s.ids_by_cpos(list(text_cpos_range))
 | 
			
		||||
        text_ent_ids = ent.ids_by_cpos(list(text_cpos_range))
 | 
			
		||||
        payload['text']['lexicon'][text_id] = {
 | 
			
		||||
            'bounds': [text_lbound, text_rbound],
 | 
			
		||||
            'counts': {
 | 
			
		||||
                's': len([x for x in text_s_ids if x != -1]),
 | 
			
		||||
                'ent': len([x for x in text_ent_ids if x != -1]),
 | 
			
		||||
                'token': text_rbound - text_lbound + 1
 | 
			
		||||
            },
 | 
			
		||||
            'freqs': {
 | 
			
		||||
                'word': dict(
 | 
			
		||||
                    Counter(word.ids_by_cpos(list(text_cpos_range)))
 | 
			
		||||
                ),
 | 
			
		||||
                'lemma': dict(
 | 
			
		||||
                    Counter(lemma.ids_by_cpos(list(text_cpos_range)))
 | 
			
		||||
                ),
 | 
			
		||||
                'pos': dict(
 | 
			
		||||
                    Counter(pos.ids_by_cpos(list(text_cpos_range)))
 | 
			
		||||
                ),
 | 
			
		||||
                'simple_pos': dict(
 | 
			
		||||
                    Counter(simple_pos.ids_by_cpos(list(text_cpos_range)))
 | 
			
		||||
                )
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    payload['text']['values'] = text_value_names
 | 
			
		||||
    payload['s'] = {'lexicon': {}, 'values': None}
 | 
			
		||||
    for s_id in range(0, s.size):
 | 
			
		||||
        payload['s']['lexicon'][s_id] = {
 | 
			
		||||
            # 'bounds': s.cpos_by_id(s_id)
 | 
			
		||||
        }
 | 
			
		||||
    payload['s']['values'] = [
 | 
			
		||||
        sub_attr.name[(len(s.name) + 1):]
 | 
			
		||||
        for sub_attr in corpus.structural_attributes.list(filters={'part_of': s})
 | 
			
		||||
    ]
 | 
			
		||||
    payload['ent'] = {'lexicon': {}, 'values': None}
 | 
			
		||||
    for ent_id in range(0, ent.size):
 | 
			
		||||
        payload['ent']['lexicon'][ent_id] = {
 | 
			
		||||
            # 'bounds': ent.cpos_by_id(ent_id)
 | 
			
		||||
        }
 | 
			
		||||
    payload['ent']['values'] = ent_value_names
 | 
			
		||||
    payload['lookups'] = {
 | 
			
		||||
        'corpus': {},
 | 
			
		||||
        'text': {
 | 
			
		||||
            text_id: {
 | 
			
		||||
                text_value_name: text_values[text_value_name_idx][text_id_idx]
 | 
			
		||||
                for text_value_name_idx, text_value_name in enumerate(text_value_names)
 | 
			
		||||
            } for text_id_idx, text_id in enumerate(range(0, text.size))
 | 
			
		||||
        },
 | 
			
		||||
        's': {},
 | 
			
		||||
        'ent': {
 | 
			
		||||
            ent_id: {
 | 
			
		||||
                ent_value_name: ent_values[ent_value_name_idx][ent_id_idx]
 | 
			
		||||
                for ent_value_name_idx, ent_value_name in enumerate(ent_value_names)
 | 
			
		||||
            } for ent_id_idx, ent_id in enumerate(range(0, ent.size))
 | 
			
		||||
        },
 | 
			
		||||
        'word': dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, word.lexicon_size),
 | 
			
		||||
                word.values_by_ids(list(range(0, word.lexicon_size)))
 | 
			
		||||
            )
 | 
			
		||||
        ),
 | 
			
		||||
        'lemma': dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, lemma.lexicon_size),
 | 
			
		||||
                lemma.values_by_ids(list(range(0, lemma.lexicon_size)))
 | 
			
		||||
            )
 | 
			
		||||
        ),
 | 
			
		||||
        'pos': dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, pos.lexicon_size),
 | 
			
		||||
                pos.values_by_ids(list(range(0, pos.lexicon_size)))
 | 
			
		||||
            )
 | 
			
		||||
        ),
 | 
			
		||||
        'simple_pos': dict(
 | 
			
		||||
            zip(
 | 
			
		||||
                range(0, simple_pos.lexicon_size),
 | 
			
		||||
                simple_pos.values_by_ids(list(range(0, simple_pos.lexicon_size)))
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
    }
 | 
			
		||||
    # print(payload)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
@@ -52,13 +235,13 @@ def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, pag
 | 
			
		||||
        per_page < 1
 | 
			
		||||
        or page < 1
 | 
			
		||||
        or (
 | 
			
		||||
            cqi_corpus.attrs['size'] > 0
 | 
			
		||||
            and page > math.ceil(cqi_corpus.attrs['size'] / per_page)
 | 
			
		||||
            cqi_corpus.size > 0
 | 
			
		||||
            and page > math.ceil(cqi_corpus.size / per_page)
 | 
			
		||||
        )
 | 
			
		||||
    ):
 | 
			
		||||
        return {'code': 416, 'msg': 'Range Not Satisfiable'}
 | 
			
		||||
    first_cpos = (page - 1) * per_page
 | 
			
		||||
    last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page)
 | 
			
		||||
    last_cpos = min(cqi_corpus.size, first_cpos + per_page)
 | 
			
		||||
    cpos_list = [*range(first_cpos, last_cpos)]
 | 
			
		||||
    lookups = lookups_by_cpos(cqi_corpus, cpos_list)
 | 
			
		||||
    payload = {}
 | 
			
		||||
@@ -67,7 +250,7 @@ def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, pag
 | 
			
		||||
    # the lookups for the items
 | 
			
		||||
    payload['lookups'] = lookups
 | 
			
		||||
    # the total number of items matching the query
 | 
			
		||||
    payload['total'] = cqi_corpus.attrs['size']
 | 
			
		||||
    payload['total'] = cqi_corpus.size
 | 
			
		||||
    # the number of items to be displayed on a page.
 | 
			
		||||
    payload['per_page'] = per_page
 | 
			
		||||
    # The total number of pages
 | 
			
		||||
 
 | 
			
		||||
@@ -98,6 +98,20 @@ class CQiCorpus {
 | 
			
		||||
    this.subcorpora = new CQiSubcorpusCollection(this.socket, this);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  getVisualizationData() {
 | 
			
		||||
    return new Promise((resolve, reject) => {
 | 
			
		||||
      const args = {corpus_name: this.name};
 | 
			
		||||
 | 
			
		||||
      this.socket.emit('cqi.corpora.corpus.get_visualization_data', args, response => {
 | 
			
		||||
        if (response.code === 200) {
 | 
			
		||||
          resolve(response.payload);
 | 
			
		||||
        } else {
 | 
			
		||||
          reject(response);
 | 
			
		||||
        }
 | 
			
		||||
      });
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  getCorpusData() {
 | 
			
		||||
    return new Promise((resolve, reject) => {
 | 
			
		||||
      const dummyData = {
 | 
			
		||||
 
 | 
			
		||||
@@ -34,6 +34,16 @@ class CorpusAnalysisApp {
 | 
			
		||||
      .then(
 | 
			
		||||
        cQiCorpus => {
 | 
			
		||||
          this.data.corpus = {o: cQiCorpus};
 | 
			
		||||
          this.data.corpus.o.getVisualizationData().then(data => console.log(data));
 | 
			
		||||
          // this.data.corpus.o.getVisualizationData()
 | 
			
		||||
          //   .then(
 | 
			
		||||
          //     (visualizationData) => {
 | 
			
		||||
          //       console.log(visualizationData);
 | 
			
		||||
          //       this.renderGeneralCorpusInfo(visualizationData);
 | 
			
		||||
          //       this.renderTextInfoList(visualizationData);
 | 
			
		||||
          //       this.renderTextProportionsGraphic(visualizationData);
 | 
			
		||||
          //     }
 | 
			
		||||
          //   );
 | 
			
		||||
          this.data.corpus.o.getCorpusData()
 | 
			
		||||
            .then(corpusData => {
 | 
			
		||||
              this.renderGeneralCorpusInfo(corpusData);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user