diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py index 9a976dd7..3e49ac09 100644 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py +++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py @@ -1,6 +1,8 @@ +from collections import Counter from flask import session import cqi import math +import random from app import db, socketio from app.decorators import socketio_login_required from app.models import Corpus @@ -38,10 +40,191 @@ def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcor @cqi_over_socketio def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str): corpus = Corpus.query.get(session['d']['corpus_id']) - corpus.num_tokens = cqi_client.corpora.get(corpus_name).attrs['size'] + cqi_corpus = cqi_client.corpora.get(corpus_name) + corpus.num_tokens = cqi_corpus.size db.session.commit() +@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns) +@socketio_login_required +@cqi_over_socketio +def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str): + corpus = cqi_client.corpora.get(corpus_name) + # s_attrs = [x for x in corpus.structural_attributes.list() if not x.has_values] + # p_attrs = corpus.positional_attributes.list() + # payload = { + # 's_attrs': {}, + # 'p_attrs': {}, + # 'values': { + # 's_attrs': {}, + # 'p_attrs': {} + # } + # } + # for s_attr in s_attrs: + # s_attr_lbound, s_attr_rbound = s_attr.cpos_by_id(text_id) + # s_attr_cpos_range = range(s_attr_lbound, s_attr_rbound + 1) + # payload['text']['lexicon'][text_id] = { + # 's_attrs': [s_attr_lbound, s_attr_rbound], + # 'counts': { + # 'token': s_attr_rbound - s_attr_lbound + 1 + # }, + # 'freqs': { + # p_attr.name: dict(Counter(p_attr.ids_by_cpos(list(s_attr_cpos_range)))) + # for p_attr in p_attrs + # } + # } + # for p_attr in p_attrs: + # payload['p_attrs'] = dict( + + # ) + # payload['values']['p_attrs'] = dict( + # zip( + # range(0, p_attr.lexicon_size), + # p_attr.values_by_ids(list(range(0, p_attr.lexicon_size))) + # ) + # ) + text = corpus.structural_attributes.get('text') + text_value_names = [] + text_values = [] + for text_sub_attr in corpus.structural_attributes.list(filters={'part_of': text}): + text_value_names.append(text_sub_attr.name[(len(text.name) + 1):]) + text_values.append(text_sub_attr.values_by_ids(list(range(0, text.size)))) + s = corpus.structural_attributes.get('s') + ent = corpus.structural_attributes.get('ent') + ent_value_names = [] + ent_values = [] + for ent_sub_attr in corpus.structural_attributes.list(filters={'part_of': ent}): + ent_value_names.append(ent_sub_attr.name[(len(ent.name) + 1):]) + ent_values.append(ent_sub_attr.values_by_ids(list(range(0, ent.size)))) + word = corpus.positional_attributes.get('word') + lemma = corpus.positional_attributes.get('lemma') + pos = corpus.positional_attributes.get('pos') + simple_pos = corpus.positional_attributes.get('simple_pos') + payload = {} + payload['corpus'] = {'lexicon': {}, 'values': []} + payload['corpus']['lexicon'][0] = { + 'bounds': [0, corpus.size - 1], + 'counts': { + 'text': text.size, + 's': s.size, + 'ent': ent.size, + 'token': corpus.size + }, + 'freqs': { + 'word': dict( + zip( + range(0, word.lexicon_size), + word.freqs_by_ids(list(range(0, word.lexicon_size))) + ) + ), + 'lemma': dict( + zip( + range(0, lemma.lexicon_size), + lemma.freqs_by_ids(list(range(0, lemma.lexicon_size))) + ) + ), + 'pos': dict( + zip( + range(0, pos.lexicon_size), + pos.freqs_by_ids(list(range(0, pos.lexicon_size))) + ) + ), + 'simple_pos': dict( + zip( + range(0, simple_pos.lexicon_size), + simple_pos.freqs_by_ids(list(range(0, simple_pos.lexicon_size))) + ) + ) + } + } + payload['text'] = {'lexicon': {}, 'values': None} + for text_id in range(0, text.size): + text_lbound, text_rbound = text.cpos_by_id(text_id) + text_cpos_range = range(text_lbound, text_rbound + 1) + text_s_ids = s.ids_by_cpos(list(text_cpos_range)) + text_ent_ids = ent.ids_by_cpos(list(text_cpos_range)) + payload['text']['lexicon'][text_id] = { + 'bounds': [text_lbound, text_rbound], + 'counts': { + 's': len([x for x in text_s_ids if x != -1]), + 'ent': len([x for x in text_ent_ids if x != -1]), + 'token': text_rbound - text_lbound + 1 + }, + 'freqs': { + 'word': dict( + Counter(word.ids_by_cpos(list(text_cpos_range))) + ), + 'lemma': dict( + Counter(lemma.ids_by_cpos(list(text_cpos_range))) + ), + 'pos': dict( + Counter(pos.ids_by_cpos(list(text_cpos_range))) + ), + 'simple_pos': dict( + Counter(simple_pos.ids_by_cpos(list(text_cpos_range))) + ) + } + } + payload['text']['values'] = text_value_names + payload['s'] = {'lexicon': {}, 'values': None} + for s_id in range(0, s.size): + payload['s']['lexicon'][s_id] = { + # 'bounds': s.cpos_by_id(s_id) + } + payload['s']['values'] = [ + sub_attr.name[(len(s.name) + 1):] + for sub_attr in corpus.structural_attributes.list(filters={'part_of': s}) + ] + payload['ent'] = {'lexicon': {}, 'values': None} + for ent_id in range(0, ent.size): + payload['ent']['lexicon'][ent_id] = { + # 'bounds': ent.cpos_by_id(ent_id) + } + payload['ent']['values'] = ent_value_names + payload['lookups'] = { + 'corpus': {}, + 'text': { + text_id: { + text_value_name: text_values[text_value_name_idx][text_id_idx] + for text_value_name_idx, text_value_name in enumerate(text_value_names) + } for text_id_idx, text_id in enumerate(range(0, text.size)) + }, + 's': {}, + 'ent': { + ent_id: { + ent_value_name: ent_values[ent_value_name_idx][ent_id_idx] + for ent_value_name_idx, ent_value_name in enumerate(ent_value_names) + } for ent_id_idx, ent_id in enumerate(range(0, ent.size)) + }, + 'word': dict( + zip( + range(0, word.lexicon_size), + word.values_by_ids(list(range(0, word.lexicon_size))) + ) + ), + 'lemma': dict( + zip( + range(0, lemma.lexicon_size), + lemma.values_by_ids(list(range(0, lemma.lexicon_size))) + ) + ), + 'pos': dict( + zip( + range(0, pos.lexicon_size), + pos.values_by_ids(list(range(0, pos.lexicon_size))) + ) + ), + 'simple_pos': dict( + zip( + range(0, simple_pos.lexicon_size), + simple_pos.values_by_ids(list(range(0, simple_pos.lexicon_size))) + ) + ) + } + # print(payload) + return {'code': 200, 'msg': 'OK', 'payload': payload} + + @socketio.on('cqi.corpora.corpus.paginate', namespace=ns) @socketio_login_required @cqi_over_socketio @@ -52,13 +235,13 @@ def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, pag per_page < 1 or page < 1 or ( - cqi_corpus.attrs['size'] > 0 - and page > math.ceil(cqi_corpus.attrs['size'] / per_page) + cqi_corpus.size > 0 + and page > math.ceil(cqi_corpus.size / per_page) ) ): return {'code': 416, 'msg': 'Range Not Satisfiable'} first_cpos = (page - 1) * per_page - last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page) + last_cpos = min(cqi_corpus.size, first_cpos + per_page) cpos_list = [*range(first_cpos, last_cpos)] lookups = lookups_by_cpos(cqi_corpus, cpos_list) payload = {} @@ -67,7 +250,7 @@ def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, pag # the lookups for the items payload['lookups'] = lookups # the total number of items matching the query - payload['total'] = cqi_corpus.attrs['size'] + payload['total'] = cqi_corpus.size # the number of items to be displayed on a page. payload['per_page'] = per_page # The total number of pages diff --git a/app/static/js/CorpusAnalysis/CQiClient.js b/app/static/js/CorpusAnalysis/CQiClient.js index 92cc6422..014c6d37 100644 --- a/app/static/js/CorpusAnalysis/CQiClient.js +++ b/app/static/js/CorpusAnalysis/CQiClient.js @@ -98,6 +98,20 @@ class CQiCorpus { this.subcorpora = new CQiSubcorpusCollection(this.socket, this); } + getVisualizationData() { + return new Promise((resolve, reject) => { + const args = {corpus_name: this.name}; + + this.socket.emit('cqi.corpora.corpus.get_visualization_data', args, response => { + if (response.code === 200) { + resolve(response.payload); + } else { + reject(response); + } + }); + }); + } + getCorpusData() { return new Promise((resolve, reject) => { const dummyData = { diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js index fbb91b4c..077326a3 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js @@ -34,6 +34,16 @@ class CorpusAnalysisApp { .then( cQiCorpus => { this.data.corpus = {o: cQiCorpus}; + this.data.corpus.o.getVisualizationData().then(data => console.log(data)); + // this.data.corpus.o.getVisualizationData() + // .then( + // (visualizationData) => { + // console.log(visualizationData); + // this.renderGeneralCorpusInfo(visualizationData); + // this.renderTextInfoList(visualizationData); + // this.renderTextProportionsGraphic(visualizationData); + // } + // ); this.data.corpus.o.getCorpusData() .then(corpusData => { this.renderGeneralCorpusInfo(corpusData);