Add visualization data method to cqi over socketio

This commit is contained in:
Patrick Jentsch 2023-06-14 14:50:04 +02:00
parent 71359523ba
commit e6d8d72e52
3 changed files with 95 additions and 5 deletions

View File

@ -1,6 +1,8 @@
from collections import Counter
from flask import session from flask import session
import cqi import cqi
import math import math
import random
from app import db, socketio from app import db, socketio
from app.decorators import socketio_login_required from app.decorators import socketio_login_required
from app.models import Corpus from app.models import Corpus
@ -38,10 +40,75 @@ def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcor
@cqi_over_socketio @cqi_over_socketio
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str): def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id']) corpus = Corpus.query.get(session['d']['corpus_id'])
corpus.num_tokens = cqi_client.corpora.get(corpus_name).attrs['size'] cqi_corpus = cqi_client.corpora.get(corpus_name)
corpus.num_tokens = cqi_corpus.size
db.session.commit() db.session.commit()
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = {}
payload['num_tokens'] = cqi_corpus.size
cqi_word_attr = cqi_corpus.positional_attributes.get('word')
payload['num_unique_words'] = cqi_word_attr.lexicon_size
payload['word_freqs'] = dict(zip(cqi_word_attr.values_by_ids(list(range(0, cqi_word_attr.lexicon_size))), cqi_word_attr.freqs_by_ids(list(range(0, cqi_word_attr.lexicon_size)))))
# payload['word_freqs'].sort(key=lambda a: a[1], reverse=True)
# payload['word_freqs'] = {k: v for k, v in payload['word_freqs']}
cqi_lemma_attr = cqi_corpus.positional_attributes.get('lemma')
payload['num_unique_lemmas'] = cqi_lemma_attr.lexicon_size
payload['lemma_freqs'] = dict(zip(cqi_lemma_attr.values_by_ids(list(range(0, cqi_lemma_attr.lexicon_size))), cqi_lemma_attr.freqs_by_ids(list(range(0, cqi_lemma_attr.lexicon_size)))))
# payload['lemma_freqs'].sort(key=lambda a: a[1], reverse=True)
# payload['lemma_freqs'] = {k: v for k, v in payload['lemma_freqs']}
cqi_s_attr = cqi_corpus.structural_attributes.get('s')
payload['num_sentences'] = cqi_s_attr.size
# assuming all tokens are in a sentence
payload['average_sentence_length'] = payload['num_tokens'] / payload['num_sentences'] if payload['num_sentences'] != 0 else 0
# payload['average_sentence_length'] = 0
# for s_id in range(0, cqi_s_attr.size):
# s_lbound, s_rbound = cqi_s_attr.cpos_by_id(s_id)
# payload['average_sentence_length'] += s_rbound - s_lbound + 1
# payload['average_sentence_length'] /= payload['num_sentences']
cqi_ent_type_attr = cqi_corpus.structural_attributes.get('ent_type')
payload['num_ent_types'] = cqi_ent_type_attr.size
payload['ent_type_freqs'] = dict(Counter(cqi_ent_type_attr.values_by_ids(list(range(0, cqi_ent_type_attr.size)))))
payload['num_unique_ent_types'] = len(payload['ent_type_freqs'])
payload['texts'] = []
cqi_text_attr = cqi_corpus.structural_attributes.get('text')
for text_id in range(0, cqi_text_attr.size):
text_lbound, text_rbound = cqi_text_attr.cpos_by_id(text_id)
text_cpos_list = list(range(text_lbound, text_rbound + 1))
text_payload = {}
text_payload['num_tokens'] = text_rbound - text_lbound + 1
text_word_ids = cqi_word_attr.ids_by_cpos(text_cpos_list)
print(text_word_ids)
text_payload['num_unique_words'] = len(set(text_word_ids))
text_payload['word_freqs'] = dict(Counter(cqi_word_attr.values_by_ids(text_word_ids)))
text_lemma_ids = cqi_lemma_attr.ids_by_cpos(text_cpos_list)
text_payload['num_unique_lemmas'] = len(set(text_lemma_ids))
text_payload['lemma_freqs'] = dict(Counter(cqi_word_attr.values_by_ids(text_lemma_ids)))
text_s_attr_ids = list(filter(lambda x: x != -1, cqi_s_attr.ids_by_cpos(text_cpos_list)))
text_payload['num_sentences'] = len(set(text_s_attr_ids))
# assuming all tokens are in a sentence
text_payload['average_sentence_length'] = text_payload['num_tokens'] / text_payload['num_sentences'] if text_payload['num_sentences'] != 0 else 0
# text_payload['average_sentence_length'] = 0
# for text_s_id in range(0, cqi_s_attr.size):
# text_s_lbound, text_s_rbound = cqi_s_attr.cpos_by_id(text_s_id)
# text_payload['average_sentence_length'] += text_s_rbound - text_s_lbound + 1
# text_payload['average_sentence_length'] /= text_payload['num_sentences']
text_ent_type_ids = list(filter(lambda x: x != -1, cqi_ent_type_attr.ids_by_cpos(text_cpos_list)))
text_payload['num_ent_types'] = len(set(text_ent_type_ids))
text_payload['ent_type_freqs'] = dict(Counter(cqi_ent_type_attr.values_by_ids(text_ent_type_ids)))
text_payload['num_unique_ent_types'] = len(text_payload['ent_type_freqs'])
for text_sub_attr in cqi_corpus.structural_attributes.list(filters={'part_of': cqi_text_attr}):
text_payload[text_sub_attr.name[(len(cqi_text_attr.name) + 1):]] = text_sub_attr.values_by_ids([text_id])[0]
payload['texts'].append(text_payload)
# print(payload)
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns) @socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
@socketio_login_required @socketio_login_required
@cqi_over_socketio @cqi_over_socketio
@ -52,13 +119,13 @@ def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, pag
per_page < 1 per_page < 1
or page < 1 or page < 1
or ( or (
cqi_corpus.attrs['size'] > 0 cqi_corpus.size > 0
and page > math.ceil(cqi_corpus.attrs['size'] / per_page) and page > math.ceil(cqi_corpus.size / per_page)
) )
): ):
return {'code': 416, 'msg': 'Range Not Satisfiable'} return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page) last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)] cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list) lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {} payload = {}
@ -67,7 +134,7 @@ def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, pag
# the lookups for the items # the lookups for the items
payload['lookups'] = lookups payload['lookups'] = lookups
# the total number of items matching the query # the total number of items matching the query
payload['total'] = cqi_corpus.attrs['size'] payload['total'] = cqi_corpus.size
# the number of items to be displayed on a page. # the number of items to be displayed on a page.
payload['per_page'] = per_page payload['per_page'] = per_page
# The total number of pages # The total number of pages

View File

@ -98,6 +98,20 @@ class CQiCorpus {
this.subcorpora = new CQiSubcorpusCollection(this.socket, this); this.subcorpora = new CQiSubcorpusCollection(this.socket, this);
} }
getVisualizationData() {
return new Promise((resolve, reject) => {
const args = {corpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.get_visualization_data', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
getCorpusData() { getCorpusData() {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const dummyData = { const dummyData = {

View File

@ -34,6 +34,15 @@ class CorpusAnalysisApp {
.then( .then(
cQiCorpus => { cQiCorpus => {
this.data.corpus = {o: cQiCorpus}; this.data.corpus = {o: cQiCorpus};
// this.data.corpus.o.getVisualizationData()
// .then(
// (visualizationData) => {
// console.log(visualizationData);
// this.renderGeneralCorpusInfo(visualizationData);
// this.renderTextInfoList(visualizationData);
// this.renderTextProportionsGraphic(visualizationData);
// }
// );
this.data.corpus.o.getCorpusData() this.data.corpus.o.getCorpusData()
.then(corpusData => { .then(corpusData => {
this.renderGeneralCorpusInfo(corpusData); this.renderGeneralCorpusInfo(corpusData);