mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-14 16:55:42 +00:00
Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update
This commit is contained in:
commit
1c98c5070a
@ -1,8 +1,9 @@
|
|||||||
from collections import Counter
|
from collections import Counter
|
||||||
from flask import session
|
from flask import session
|
||||||
import cqi
|
import cqi
|
||||||
|
import json
|
||||||
import math
|
import math
|
||||||
import random
|
import os
|
||||||
from app import db, socketio
|
from app import db, socketio
|
||||||
from app.decorators import socketio_login_required
|
from app.decorators import socketio_login_required
|
||||||
from app.models import Corpus
|
from app.models import Corpus
|
||||||
@ -49,6 +50,12 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
|
|||||||
@socketio_login_required
|
@socketio_login_required
|
||||||
@cqi_over_socketio
|
@cqi_over_socketio
|
||||||
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
|
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||||
|
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||||
|
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
|
||||||
|
if os.path.exists(visualization_data_file_path):
|
||||||
|
with open(visualization_data_file_path, 'r') as f:
|
||||||
|
payload = json.load(f)
|
||||||
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
##########################################################################
|
##########################################################################
|
||||||
# A faster way to get cpos boundaries for smaller s_attrs #
|
# A faster way to get cpos boundaries for smaller s_attrs #
|
||||||
@ -108,7 +115,8 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_
|
|||||||
if s_attr.has_values:
|
if s_attr.has_values:
|
||||||
continue
|
continue
|
||||||
payload['corpus']['counts'][s_attr.name] = s_attr.size
|
payload['corpus']['counts'][s_attr.name] = s_attr.size
|
||||||
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': []}
|
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
|
||||||
|
payload['values']['s_attrs'][s_attr.name] = {}
|
||||||
for id in range(0, s_attr.size):
|
for id in range(0, s_attr.size):
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
|
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
|
||||||
lbound, rbound = s_attr.cpos_by_id(id)
|
lbound, rbound = s_attr.cpos_by_id(id)
|
||||||
@ -125,6 +133,26 @@ def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_
|
|||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
||||||
for p_attr in cqi_p_attrs.values():
|
for p_attr in cqi_p_attrs.values():
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
|
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
|
||||||
|
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
||||||
|
s_attr_value_names = [
|
||||||
|
sub_s_attr.name[(len(s_attr.name) + 1):]
|
||||||
|
for sub_s_attr in sub_s_attrs
|
||||||
|
]
|
||||||
|
sub_s_attr_values = [
|
||||||
|
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
|
||||||
|
for sub_s_attr in sub_s_attrs
|
||||||
|
]
|
||||||
|
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
|
||||||
|
payload['values']['s_attrs'][s_attr.name] = {
|
||||||
|
s_attr_id: {
|
||||||
|
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
|
||||||
|
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
||||||
|
payload['s_attrs'][s_attr.name]['values']
|
||||||
|
)
|
||||||
|
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
|
||||||
|
}
|
||||||
|
with open(visualization_data_file_path, 'w') as f:
|
||||||
|
json.dump(payload, f)
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user