mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-25 00:50:35 +00:00
Add visualization data method to cqi over socketio
This commit is contained in:
parent
71359523ba
commit
e6d8d72e52
@ -1,6 +1,8 @@
|
||||
from collections import Counter
|
||||
from flask import session
|
||||
import cqi
|
||||
import math
|
||||
import random
|
||||
from app import db, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus
|
||||
@ -38,10 +40,75 @@ def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcor
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
corpus.num_tokens = cqi_client.corpora.get(corpus_name).attrs['size']
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
corpus.num_tokens = cqi_corpus.size
|
||||
db.session.commit()
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = {}
|
||||
payload['num_tokens'] = cqi_corpus.size
|
||||
cqi_word_attr = cqi_corpus.positional_attributes.get('word')
|
||||
payload['num_unique_words'] = cqi_word_attr.lexicon_size
|
||||
payload['word_freqs'] = dict(zip(cqi_word_attr.values_by_ids(list(range(0, cqi_word_attr.lexicon_size))), cqi_word_attr.freqs_by_ids(list(range(0, cqi_word_attr.lexicon_size)))))
|
||||
# payload['word_freqs'].sort(key=lambda a: a[1], reverse=True)
|
||||
# payload['word_freqs'] = {k: v for k, v in payload['word_freqs']}
|
||||
cqi_lemma_attr = cqi_corpus.positional_attributes.get('lemma')
|
||||
payload['num_unique_lemmas'] = cqi_lemma_attr.lexicon_size
|
||||
payload['lemma_freqs'] = dict(zip(cqi_lemma_attr.values_by_ids(list(range(0, cqi_lemma_attr.lexicon_size))), cqi_lemma_attr.freqs_by_ids(list(range(0, cqi_lemma_attr.lexicon_size)))))
|
||||
# payload['lemma_freqs'].sort(key=lambda a: a[1], reverse=True)
|
||||
# payload['lemma_freqs'] = {k: v for k, v in payload['lemma_freqs']}
|
||||
cqi_s_attr = cqi_corpus.structural_attributes.get('s')
|
||||
payload['num_sentences'] = cqi_s_attr.size
|
||||
# assuming all tokens are in a sentence
|
||||
payload['average_sentence_length'] = payload['num_tokens'] / payload['num_sentences'] if payload['num_sentences'] != 0 else 0
|
||||
# payload['average_sentence_length'] = 0
|
||||
# for s_id in range(0, cqi_s_attr.size):
|
||||
# s_lbound, s_rbound = cqi_s_attr.cpos_by_id(s_id)
|
||||
# payload['average_sentence_length'] += s_rbound - s_lbound + 1
|
||||
# payload['average_sentence_length'] /= payload['num_sentences']
|
||||
cqi_ent_type_attr = cqi_corpus.structural_attributes.get('ent_type')
|
||||
payload['num_ent_types'] = cqi_ent_type_attr.size
|
||||
payload['ent_type_freqs'] = dict(Counter(cqi_ent_type_attr.values_by_ids(list(range(0, cqi_ent_type_attr.size)))))
|
||||
payload['num_unique_ent_types'] = len(payload['ent_type_freqs'])
|
||||
payload['texts'] = []
|
||||
cqi_text_attr = cqi_corpus.structural_attributes.get('text')
|
||||
for text_id in range(0, cqi_text_attr.size):
|
||||
text_lbound, text_rbound = cqi_text_attr.cpos_by_id(text_id)
|
||||
text_cpos_list = list(range(text_lbound, text_rbound + 1))
|
||||
text_payload = {}
|
||||
text_payload['num_tokens'] = text_rbound - text_lbound + 1
|
||||
text_word_ids = cqi_word_attr.ids_by_cpos(text_cpos_list)
|
||||
print(text_word_ids)
|
||||
text_payload['num_unique_words'] = len(set(text_word_ids))
|
||||
text_payload['word_freqs'] = dict(Counter(cqi_word_attr.values_by_ids(text_word_ids)))
|
||||
text_lemma_ids = cqi_lemma_attr.ids_by_cpos(text_cpos_list)
|
||||
text_payload['num_unique_lemmas'] = len(set(text_lemma_ids))
|
||||
text_payload['lemma_freqs'] = dict(Counter(cqi_word_attr.values_by_ids(text_lemma_ids)))
|
||||
text_s_attr_ids = list(filter(lambda x: x != -1, cqi_s_attr.ids_by_cpos(text_cpos_list)))
|
||||
text_payload['num_sentences'] = len(set(text_s_attr_ids))
|
||||
# assuming all tokens are in a sentence
|
||||
text_payload['average_sentence_length'] = text_payload['num_tokens'] / text_payload['num_sentences'] if text_payload['num_sentences'] != 0 else 0
|
||||
# text_payload['average_sentence_length'] = 0
|
||||
# for text_s_id in range(0, cqi_s_attr.size):
|
||||
# text_s_lbound, text_s_rbound = cqi_s_attr.cpos_by_id(text_s_id)
|
||||
# text_payload['average_sentence_length'] += text_s_rbound - text_s_lbound + 1
|
||||
# text_payload['average_sentence_length'] /= text_payload['num_sentences']
|
||||
text_ent_type_ids = list(filter(lambda x: x != -1, cqi_ent_type_attr.ids_by_cpos(text_cpos_list)))
|
||||
text_payload['num_ent_types'] = len(set(text_ent_type_ids))
|
||||
text_payload['ent_type_freqs'] = dict(Counter(cqi_ent_type_attr.values_by_ids(text_ent_type_ids)))
|
||||
text_payload['num_unique_ent_types'] = len(text_payload['ent_type_freqs'])
|
||||
for text_sub_attr in cqi_corpus.structural_attributes.list(filters={'part_of': cqi_text_attr}):
|
||||
text_payload[text_sub_attr.name[(len(cqi_text_attr.name) + 1):]] = text_sub_attr.values_by_ids([text_id])[0]
|
||||
payload['texts'].append(text_payload)
|
||||
# print(payload)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
@ -52,13 +119,13 @@ def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, pag
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_corpus.attrs['size'] > 0
|
||||
and page > math.ceil(cqi_corpus.attrs['size'] / per_page)
|
||||
cqi_corpus.size > 0
|
||||
and page > math.ceil(cqi_corpus.size / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
first_cpos = (page - 1) * per_page
|
||||
last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page)
|
||||
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
|
||||
cpos_list = [*range(first_cpos, last_cpos)]
|
||||
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
|
||||
payload = {}
|
||||
@ -67,7 +134,7 @@ def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, pag
|
||||
# the lookups for the items
|
||||
payload['lookups'] = lookups
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_corpus.attrs['size']
|
||||
payload['total'] = cqi_corpus.size
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
|
@ -98,6 +98,20 @@ class CQiCorpus {
|
||||
this.subcorpora = new CQiSubcorpusCollection(this.socket, this);
|
||||
}
|
||||
|
||||
getVisualizationData() {
|
||||
return new Promise((resolve, reject) => {
|
||||
const args = {corpus_name: this.name};
|
||||
|
||||
this.socket.emit('cqi.corpora.corpus.get_visualization_data', args, response => {
|
||||
if (response.code === 200) {
|
||||
resolve(response.payload);
|
||||
} else {
|
||||
reject(response);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
getCorpusData() {
|
||||
return new Promise((resolve, reject) => {
|
||||
const dummyData = {
|
||||
|
@ -34,6 +34,15 @@ class CorpusAnalysisApp {
|
||||
.then(
|
||||
cQiCorpus => {
|
||||
this.data.corpus = {o: cQiCorpus};
|
||||
// this.data.corpus.o.getVisualizationData()
|
||||
// .then(
|
||||
// (visualizationData) => {
|
||||
// console.log(visualizationData);
|
||||
// this.renderGeneralCorpusInfo(visualizationData);
|
||||
// this.renderTextInfoList(visualizationData);
|
||||
// this.renderTextProportionsGraphic(visualizationData);
|
||||
// }
|
||||
// );
|
||||
this.data.corpus.o.getCorpusData()
|
||||
.then(corpusData => {
|
||||
this.renderGeneralCorpusInfo(corpusData);
|
||||
|
Loading…
x
Reference in New Issue
Block a user