diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py index b9dbe425..d3503b66 100644 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py +++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py @@ -49,18 +49,28 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str): @socketio_login_required @cqi_over_socketio def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str): - corpus = cqi_client.corpora.get(corpus_name) + cqi_corpus = cqi_client.corpora.get(corpus_name) + cqi_p_attrs = { + p_attr.name: p_attr + for p_attr in cqi_corpus.positional_attributes.list() + } + cqi_s_attrs = { + s_attr.name: s_attr + for s_attr in cqi_corpus.structural_attributes.list() + } payload = { 'corpus': { - 'bounds': [0, corpus.size - 1], - 'counts': {}, + 'bounds': [0, cqi_corpus.size - 1], + 'counts': { + 'token': cqi_corpus.size + }, 'freqs': {} }, 'p_attrs': {}, 's_attrs': {}, 'values': {'p_attrs': {}, 's_attrs': {}} } - for p_attr in corpus.positional_attributes.list(): + for p_attr in cqi_p_attrs.values(): payload['corpus']['freqs'][p_attr.name] = dict( zip( range(0, p_attr.lexicon_size), @@ -69,8 +79,8 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_ ) payload['p_attrs'][p_attr.name] = dict( zip( - range(0, corpus.size), - p_attr.ids_by_cpos(list(range(0, corpus.size))) + range(0, cqi_corpus.size), + p_attr.ids_by_cpos(list(range(0, cqi_corpus.size))) ) ) payload['values']['p_attrs'][p_attr.name] = dict( @@ -79,158 +89,27 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_ p_attr.values_by_ids(list(range(0, p_attr.lexicon_size))) ) ) - s = corpus.structural_attributes.get('s') - ent = corpus.structural_attributes.get('ent') - for s_attr in corpus.structural_attributes.list(): + for s_attr in cqi_s_attrs.values(): if s_attr.has_values: continue payload['corpus']['counts'][s_attr.name] = s_attr.size payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': []} for id in range(0, s_attr.size): payload['s_attrs'][s_attr.name]['lexicon'][id] = {} - if s_attr.name != 'text': - continue lbound, rbound = s_attr.cpos_by_id(id) payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound] - cpos_range = range(lbound, rbound + 1) payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {} - payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in s.ids_by_cpos(list(cpos_range)) if x != -1}) - payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in ent.ids_by_cpos(list(cpos_range)) if x != -1}) payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1 + if s_attr.name not in ['text', 's']: + continue + cpos_range = range(lbound, rbound + 1) + payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1}) + if s_attr.name != 'text': + continue + payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1}) payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {} - for p_attr in corpus.positional_attributes.list(): + for p_attr in cqi_p_attrs.values(): payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range)))) - # for s_attr in s_attrs: - # payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': []} - # payload['values']['s_attrs'][s_attr.name] = {} - # for id in range(0, s_attr.size): - # payload['s_attrs'][s_attr.name]['lexicon'][id] = {} - # if s_attr.name != 'text': - # continue - # lbound, rbound = s_attr.cpos_by_id(id) - # cpos_range = range(lbound, rbound + 1) - # # s_ids - # payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound] - # payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {} - # payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1 - # payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = { - # p_attr.name: dict(Counter(p_attr.ids_by_cpos(list(cpos_range)))) - # for p_attr in p_attrs - # } - # for sub_attr in corpus.structural_attributes.list(filters={'part_of': s_attr}): - # payload['s_attrs'][s_attr.name]['values'].append(sub_attr.name[(len(s_attr.name) + 1):]) - # payload['values']['s_attrs'][s_attr.name][sub_attr.name[(len(s_attr.name) + 1):]] = dict( - # zip( - # range(0, sub_attr.size), - # sub_attr.values_by_ids(list(range(0, sub_attr.size))) - # ) - # ) - # text = corpus.structural_attributes.get('text') - - # text = corpus.structural_attributes.get('text') - # text_value_names = [] - # text_values = [] - # for text_sub_attr in corpus.structural_attributes.list(filters={'part_of': text}): - # text_value_names.append(text_sub_attr.name[(len(text.name) + 1):]) - # text_values.append(text_sub_attr.values_by_ids(list(range(0, text.size)))) - # s = corpus.structural_attributes.get('s') - # ent = corpus.structural_attributes.get('ent') - # ent_value_names = [] - # ent_values = [] - # for ent_sub_attr in corpus.structural_attributes.list(filters={'part_of': ent}): - # ent_value_names.append(ent_sub_attr.name[(len(ent.name) + 1):]) - # ent_values.append(ent_sub_attr.values_by_ids(list(range(0, ent.size)))) - # word = corpus.positional_attributes.get('word') - # lemma = corpus.positional_attributes.get('lemma') - # pos = corpus.positional_attributes.get('pos') - # simple_pos = corpus.positional_attributes.get('simple_pos') - # payload = {} - - # payload['text'] = {'lexicon': {}, 'values': None} - # for text_id in range(0, text.size): - # text_lbound, text_rbound = text.cpos_by_id(text_id) - # text_cpos_range = range(text_lbound, text_rbound + 1) - # text_s_ids = s.ids_by_cpos(list(text_cpos_range)) - # text_ent_ids = ent.ids_by_cpos(list(text_cpos_range)) - # payload['text']['lexicon'][text_id] = { - # 'bounds': [text_lbound, text_rbound], - # 'counts': { - # 's': len([x for x in text_s_ids if x != -1]), - # 'ent': len([x for x in text_ent_ids if x != -1]), - # 'token': text_rbound - text_lbound + 1 - # }, - # 'freqs': { - # 'word': dict( - # Counter(word.ids_by_cpos(list(text_cpos_range))) - # ), - # 'lemma': dict( - # Counter(lemma.ids_by_cpos(list(text_cpos_range))) - # ), - # 'pos': dict( - # Counter(pos.ids_by_cpos(list(text_cpos_range))) - # ), - # 'simple_pos': dict( - # Counter(simple_pos.ids_by_cpos(list(text_cpos_range))) - # ) - # } - # } - # payload['text']['values'] = text_value_names - # payload['s'] = {'lexicon': {}, 'values': None} - # for s_id in range(0, s.size): - # payload['s']['lexicon'][s_id] = { - # # 'bounds': s.cpos_by_id(s_id) - # } - # payload['s']['values'] = [ - # sub_attr.name[(len(s.name) + 1):] - # for sub_attr in corpus.structural_attributes.list(filters={'part_of': s}) - # ] - # payload['ent'] = {'lexicon': {}, 'values': None} - # for ent_id in range(0, ent.size): - # payload['ent']['lexicon'][ent_id] = { - # # 'bounds': ent.cpos_by_id(ent_id) - # } - # payload['ent']['values'] = ent_value_names - # payload['lookups'] = { - # 'corpus': {}, - # 'text': { - # text_id: { - # text_value_name: text_values[text_value_name_idx][text_id_idx] - # for text_value_name_idx, text_value_name in enumerate(text_value_names) - # } for text_id_idx, text_id in enumerate(range(0, text.size)) - # }, - # 's': {}, - # 'ent': { - # ent_id: { - # ent_value_name: ent_values[ent_value_name_idx][ent_id_idx] - # for ent_value_name_idx, ent_value_name in enumerate(ent_value_names) - # } for ent_id_idx, ent_id in enumerate(range(0, ent.size)) - # }, - # 'word': dict( - # zip( - # range(0, word.lexicon_size), - # word.values_by_ids(list(range(0, word.lexicon_size))) - # ) - # ), - # 'lemma': dict( - # zip( - # range(0, lemma.lexicon_size), - # lemma.values_by_ids(list(range(0, lemma.lexicon_size))) - # ) - # ), - # 'pos': dict( - # zip( - # range(0, pos.lexicon_size), - # pos.values_by_ids(list(range(0, pos.lexicon_size))) - # ) - # ), - # 'simple_pos': dict( - # zip( - # range(0, simple_pos.lexicon_size), - # simple_pos.values_by_ids(list(range(0, simple_pos.lexicon_size))) - # ) - # ) - # } - # print(payload) return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js index 1d7cc479..53880572 100644 --- a/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js +++ b/app/static/js/CorpusAnalysis/CorpusAnalysisApp.js @@ -34,17 +34,17 @@ class CorpusAnalysisApp { .then( cQiCorpus => { this.data.corpus = {o: cQiCorpus}; - this.data.corpus.o.getVisualizationData() - .then( - (data) => { - console.log(data); - // this.renderGeneralCorpusInfo(data); - // this.renderTextInfoList(data); - // this.renderTextProportionsGraphic(data); - // this.renderWordFrequenciesGraphic(data); - // this.renderBoundsGraphic(data); - } - ); + // this.data.corpus.o.getVisualizationData() + // .then( + // (data) => { + // console.log(data); + // this.renderGeneralCorpusInfo(data); + // this.renderTextInfoList(data); + // this.renderTextProportionsGraphic(data); + // this.renderWordFrequenciesGraphic(data); + // this.renderBoundsGraphic(data); + // } + // ); this.data.corpus.o.getCorpusData() .then(corpusData => { console.log(corpusData);