Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update

This commit is contained in:
Inga Kirschnick 2023-06-22 16:23:55 +02:00
commit 1c98c5070a

View File

@ -1,8 +1,9 @@
from collections import Counter from collections import Counter
from flask import session from flask import session
import cqi import cqi
import json
import math import math
import random import os
from app import db, socketio from app import db, socketio
from app.decorators import socketio_login_required from app.decorators import socketio_login_required
from app.models import Corpus from app.models import Corpus
@ -49,6 +50,12 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
@socketio_login_required @socketio_login_required
@cqi_over_socketio @cqi_over_socketio
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str): def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
if os.path.exists(visualization_data_file_path):
with open(visualization_data_file_path, 'r') as f:
payload = json.load(f)
return {'code': 200, 'msg': 'OK', 'payload': payload}
cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_corpus = cqi_client.corpora.get(corpus_name)
########################################################################## ##########################################################################
# A faster way to get cpos boundaries for smaller s_attrs # # A faster way to get cpos boundaries for smaller s_attrs #
@ -108,7 +115,8 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_
if s_attr.has_values: if s_attr.has_values:
continue continue
payload['corpus']['counts'][s_attr.name] = s_attr.size payload['corpus']['counts'][s_attr.name] = s_attr.size
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': []} payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
payload['values']['s_attrs'][s_attr.name] = {}
for id in range(0, s_attr.size): for id in range(0, s_attr.size):
payload['s_attrs'][s_attr.name]['lexicon'][id] = {} payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
lbound, rbound = s_attr.cpos_by_id(id) lbound, rbound = s_attr.cpos_by_id(id)
@ -125,6 +133,26 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {} payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
for p_attr in cqi_p_attrs.values(): for p_attr in cqi_p_attrs.values():
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range)))) payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
s_attr_value_names = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
sub_s_attr_values = [
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
for sub_s_attr in sub_s_attrs
]
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
payload['values']['s_attrs'][s_attr.name] = {
s_attr_id: {
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
for s_attr_value_name_idx, s_attr_value_name in enumerate(
payload['s_attrs'][s_attr.name]['values']
)
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
}
with open(visualization_data_file_path, 'w') as f:
json.dump(payload, f)
return {'code': 200, 'msg': 'OK', 'payload': payload} return {'code': 200, 'msg': 'OK', 'payload': payload}