Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update

This commit is contained in:
Inga Kirschnick 2023-06-19 13:42:05 +02:00
commit 11b697145b
3 changed files with 212 additions and 5 deletions

View File

@ -1,6 +1,8 @@
from collections import Counter
from flask import session from flask import session
import cqi import cqi
import math import math
import random
from app import db, socketio from app import db, socketio
from app.decorators import socketio_login_required from app.decorators import socketio_login_required
from app.models import Corpus from app.models import Corpus
@ -38,10 +40,191 @@ def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcor
@cqi_over_socketio @cqi_over_socketio
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str): def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id']) corpus = Corpus.query.get(session['d']['corpus_id'])
corpus.num_tokens = cqi_client.corpora.get(corpus_name).attrs['size'] cqi_corpus = cqi_client.corpora.get(corpus_name)
corpus.num_tokens = cqi_corpus.size
db.session.commit() db.session.commit()
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = cqi_client.corpora.get(corpus_name)
# s_attrs = [x for x in corpus.structural_attributes.list() if not x.has_values]
# p_attrs = corpus.positional_attributes.list()
# payload = {
# 's_attrs': {},
# 'p_attrs': {},
# 'values': {
# 's_attrs': {},
# 'p_attrs': {}
# }
# }
# for s_attr in s_attrs:
# s_attr_lbound, s_attr_rbound = s_attr.cpos_by_id(text_id)
# s_attr_cpos_range = range(s_attr_lbound, s_attr_rbound + 1)
# payload['text']['lexicon'][text_id] = {
# 's_attrs': [s_attr_lbound, s_attr_rbound],
# 'counts': {
# 'token': s_attr_rbound - s_attr_lbound + 1
# },
# 'freqs': {
# p_attr.name: dict(Counter(p_attr.ids_by_cpos(list(s_attr_cpos_range))))
# for p_attr in p_attrs
# }
# }
# for p_attr in p_attrs:
# payload['p_attrs'] = dict(
# )
# payload['values']['p_attrs'] = dict(
# zip(
# range(0, p_attr.lexicon_size),
# p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
# )
# )
text = corpus.structural_attributes.get('text')
text_value_names = []
text_values = []
for text_sub_attr in corpus.structural_attributes.list(filters={'part_of': text}):
text_value_names.append(text_sub_attr.name[(len(text.name) + 1):])
text_values.append(text_sub_attr.values_by_ids(list(range(0, text.size))))
s = corpus.structural_attributes.get('s')
ent = corpus.structural_attributes.get('ent')
ent_value_names = []
ent_values = []
for ent_sub_attr in corpus.structural_attributes.list(filters={'part_of': ent}):
ent_value_names.append(ent_sub_attr.name[(len(ent.name) + 1):])
ent_values.append(ent_sub_attr.values_by_ids(list(range(0, ent.size))))
word = corpus.positional_attributes.get('word')
lemma = corpus.positional_attributes.get('lemma')
pos = corpus.positional_attributes.get('pos')
simple_pos = corpus.positional_attributes.get('simple_pos')
payload = {}
payload['corpus'] = {'lexicon': {}, 'values': []}
payload['corpus']['lexicon'][0] = {
'bounds': [0, corpus.size - 1],
'counts': {
'text': text.size,
's': s.size,
'ent': ent.size,
'token': corpus.size
},
'freqs': {
'word': dict(
zip(
range(0, word.lexicon_size),
word.freqs_by_ids(list(range(0, word.lexicon_size)))
)
),
'lemma': dict(
zip(
range(0, lemma.lexicon_size),
lemma.freqs_by_ids(list(range(0, lemma.lexicon_size)))
)
),
'pos': dict(
zip(
range(0, pos.lexicon_size),
pos.freqs_by_ids(list(range(0, pos.lexicon_size)))
)
),
'simple_pos': dict(
zip(
range(0, simple_pos.lexicon_size),
simple_pos.freqs_by_ids(list(range(0, simple_pos.lexicon_size)))
)
)
}
}
payload['text'] = {'lexicon': {}, 'values': None}
for text_id in range(0, text.size):
text_lbound, text_rbound = text.cpos_by_id(text_id)
text_cpos_range = range(text_lbound, text_rbound + 1)
text_s_ids = s.ids_by_cpos(list(text_cpos_range))
text_ent_ids = ent.ids_by_cpos(list(text_cpos_range))
payload['text']['lexicon'][text_id] = {
'bounds': [text_lbound, text_rbound],
'counts': {
's': len([x for x in text_s_ids if x != -1]),
'ent': len([x for x in text_ent_ids if x != -1]),
'token': text_rbound - text_lbound + 1
},
'freqs': {
'word': dict(
Counter(word.ids_by_cpos(list(text_cpos_range)))
),
'lemma': dict(
Counter(lemma.ids_by_cpos(list(text_cpos_range)))
),
'pos': dict(
Counter(pos.ids_by_cpos(list(text_cpos_range)))
),
'simple_pos': dict(
Counter(simple_pos.ids_by_cpos(list(text_cpos_range)))
)
}
}
payload['text']['values'] = text_value_names
payload['s'] = {'lexicon': {}, 'values': None}
for s_id in range(0, s.size):
payload['s']['lexicon'][s_id] = {
# 'bounds': s.cpos_by_id(s_id)
}
payload['s']['values'] = [
sub_attr.name[(len(s.name) + 1):]
for sub_attr in corpus.structural_attributes.list(filters={'part_of': s})
]
payload['ent'] = {'lexicon': {}, 'values': None}
for ent_id in range(0, ent.size):
payload['ent']['lexicon'][ent_id] = {
# 'bounds': ent.cpos_by_id(ent_id)
}
payload['ent']['values'] = ent_value_names
payload['lookups'] = {
'corpus': {},
'text': {
text_id: {
text_value_name: text_values[text_value_name_idx][text_id_idx]
for text_value_name_idx, text_value_name in enumerate(text_value_names)
} for text_id_idx, text_id in enumerate(range(0, text.size))
},
's': {},
'ent': {
ent_id: {
ent_value_name: ent_values[ent_value_name_idx][ent_id_idx]
for ent_value_name_idx, ent_value_name in enumerate(ent_value_names)
} for ent_id_idx, ent_id in enumerate(range(0, ent.size))
},
'word': dict(
zip(
range(0, word.lexicon_size),
word.values_by_ids(list(range(0, word.lexicon_size)))
)
),
'lemma': dict(
zip(
range(0, lemma.lexicon_size),
lemma.values_by_ids(list(range(0, lemma.lexicon_size)))
)
),
'pos': dict(
zip(
range(0, pos.lexicon_size),
pos.values_by_ids(list(range(0, pos.lexicon_size)))
)
),
'simple_pos': dict(
zip(
range(0, simple_pos.lexicon_size),
simple_pos.values_by_ids(list(range(0, simple_pos.lexicon_size)))
)
)
}
# print(payload)
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns) @socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
@socketio_login_required @socketio_login_required
@cqi_over_socketio @cqi_over_socketio
@ -52,13 +235,13 @@ def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, pag
per_page < 1 per_page < 1
or page < 1 or page < 1
or ( or (
cqi_corpus.attrs['size'] > 0 cqi_corpus.size > 0
and page > math.ceil(cqi_corpus.attrs['size'] / per_page) and page > math.ceil(cqi_corpus.size / per_page)
) )
): ):
return {'code': 416, 'msg': 'Range Not Satisfiable'} return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page) last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)] cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list) lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {} payload = {}
@ -67,7 +250,7 @@ def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, pag
# the lookups for the items # the lookups for the items
payload['lookups'] = lookups payload['lookups'] = lookups
# the total number of items matching the query # the total number of items matching the query
payload['total'] = cqi_corpus.attrs['size'] payload['total'] = cqi_corpus.size
# the number of items to be displayed on a page. # the number of items to be displayed on a page.
payload['per_page'] = per_page payload['per_page'] = per_page
# The total number of pages # The total number of pages

View File

@ -98,6 +98,20 @@ class CQiCorpus {
this.subcorpora = new CQiSubcorpusCollection(this.socket, this); this.subcorpora = new CQiSubcorpusCollection(this.socket, this);
} }
getVisualizationData() {
return new Promise((resolve, reject) => {
const args = {corpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.get_visualization_data', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
getCorpusData() { getCorpusData() {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const dummyData = { const dummyData = {

View File

@ -34,6 +34,16 @@ class CorpusAnalysisApp {
.then( .then(
cQiCorpus => { cQiCorpus => {
this.data.corpus = {o: cQiCorpus}; this.data.corpus = {o: cQiCorpus};
this.data.corpus.o.getVisualizationData().then(data => console.log(data));
// this.data.corpus.o.getVisualizationData()
// .then(
// (visualizationData) => {
// console.log(visualizationData);
// this.renderGeneralCorpusInfo(visualizationData);
// this.renderTextInfoList(visualizationData);
// this.renderTextProportionsGraphic(visualizationData);
// }
// );
this.data.corpus.o.getCorpusData() this.data.corpus.o.getCorpusData()
.then(corpusData => { .then(corpusData => {
this.renderGeneralCorpusInfo(corpusData); this.renderGeneralCorpusInfo(corpusData);