mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-12-25 02:44:18 +00:00
Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update
This commit is contained in:
commit
1c98c5070a
@ -1,8 +1,9 @@
|
||||
from collections import Counter
|
||||
from flask import session
|
||||
import cqi
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
import os
|
||||
from app import db, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus
|
||||
@ -49,6 +50,12 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
|
||||
if os.path.exists(visualization_data_file_path):
|
||||
with open(visualization_data_file_path, 'r') as f:
|
||||
payload = json.load(f)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
##########################################################################
|
||||
# A faster way to get cpos boundaries for smaller s_attrs #
|
||||
@ -108,7 +115,8 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_
|
||||
if s_attr.has_values:
|
||||
continue
|
||||
payload['corpus']['counts'][s_attr.name] = s_attr.size
|
||||
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': []}
|
||||
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
|
||||
payload['values']['s_attrs'][s_attr.name] = {}
|
||||
for id in range(0, s_attr.size):
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
|
||||
lbound, rbound = s_attr.cpos_by_id(id)
|
||||
@ -125,6 +133,26 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
||||
for p_attr in cqi_p_attrs.values():
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
|
||||
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
||||
s_attr_value_names = [
|
||||
sub_s_attr.name[(len(s_attr.name) + 1):]
|
||||
for sub_s_attr in sub_s_attrs
|
||||
]
|
||||
sub_s_attr_values = [
|
||||
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
|
||||
for sub_s_attr in sub_s_attrs
|
||||
]
|
||||
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
|
||||
payload['values']['s_attrs'][s_attr.name] = {
|
||||
s_attr_id: {
|
||||
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
|
||||
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
||||
payload['s_attrs'][s_attr.name]['values']
|
||||
)
|
||||
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
|
||||
}
|
||||
with open(visualization_data_file_path, 'w') as f:
|
||||
json.dump(payload, f)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user