From 71013f1dc5d47d8800e6a3f92e50e46429f6d5ea Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Thu, 22 Jun 2023 16:42:28 +0200
Subject: [PATCH] Add missing data and data cache to vis data generator
function
---
.../cqi_over_socketio/cqi_corpora_corpus.py | 32 +++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py
index c5125da2..c18ceda4 100644
--- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py
+++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py
@@ -1,8 +1,9 @@
from collections import Counter
from flask import session
import cqi
+import json
import math
-import random
+import os
from app import db, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
@@ -49,6 +50,12 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
+ corpus = Corpus.query.get(session['d']['corpus_id'])
+ visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
+ if os.path.exists(visualization_data_file_path):
+ with open(visualization_data_file_path, 'r') as f:
+ payload = f.read()
+ return {'code': 200, 'msg': 'OK', 'payload': payload}
cqi_corpus = cqi_client.corpora.get(corpus_name)
##########################################################################
# A faster way to get cpos boundaries for smaller s_attrs #
@@ -108,7 +115,8 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_
if s_attr.has_values:
continue
payload['corpus']['counts'][s_attr.name] = s_attr.size
- payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': []}
+ payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
+ payload['values']['s_attrs'][s_attr.name] = {}
for id in range(0, s_attr.size):
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
lbound, rbound = s_attr.cpos_by_id(id)
@@ -125,6 +133,26 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
for p_attr in cqi_p_attrs.values():
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
+ sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
+ s_attr_value_names = [
+ sub_s_attr.name[(len(s_attr.name) + 1):]
+ for sub_s_attr in sub_s_attrs
+ ]
+ sub_s_attr_values = [
+ sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
+ for sub_s_attr in sub_s_attrs
+ ]
+ payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
+ payload['values']['s_attrs'][s_attr.name] = {
+ s_attr_id: {
+ s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
+ for s_attr_value_name_idx, s_attr_value_name in enumerate(
+ payload['s_attrs'][s_attr.name]['values']
+ )
+ } for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
+ }
+ with open(visualization_data_file_path, 'w') as f:
+ json.dump(payload, f)
return {'code': 200, 'msg': 'OK', 'payload': payload}