Compare commits

..

10 Commits

Author SHA1 Message Date
Inga Kirschnick
6c31788402 Visualization fix for real data 2023-06-22 16:38:06 +02:00
Inga Kirschnick
1c98c5070a Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-06-22 16:23:55 +02:00
Patrick Jentsch
1e33366820 fix cache loading string instead of parsing json 2023-06-22 16:44:29 +02:00
Patrick Jentsch
71013f1dc5 Add missing data and data cache to vis data generator function 2023-06-22 16:42:28 +02:00
Inga Kirschnick
142c82cc36 New data structure implementation 2023-06-22 16:23:46 +02:00
Patrick Jentsch
f84ac48975 Add test snippet for fast cpos boundary calculation for s_attrs 2023-06-22 14:19:14 +02:00
Patrick Jentsch
2739dc4b4f Remove debug code 2023-06-22 13:21:19 +02:00
Patrick Jentsch
eb2abf8282 Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-06-22 12:46:36 +02:00
Patrick Jentsch
529c778772 codestyle 2023-06-22 12:45:33 +02:00
Patrick Jentsch
be51044059 Fix cqi_over_socketio not handling cqi status correctly 2023-06-22 12:45:23 +02:00
6 changed files with 121973 additions and 17896 deletions

View File

@ -18,8 +18,8 @@ def cqi_connect(cqi_client: cqi.CQiClient):
'msg': 'Internal Server Error', 'msg': 'Internal Server Error',
'payload': {'code': e.args[0], 'desc': e.args[1]} 'payload': {'code': e.args[0], 'desc': e.args[1]}
} }
payload = {'code': cqi_status, payload = {'code': cqi_status.code,
'msg': cqi.api.specification.lookup[cqi_status]} 'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload} return {'code': 200, 'msg': 'OK', 'payload': payload}
@ -28,8 +28,8 @@ def cqi_connect(cqi_client: cqi.CQiClient):
@cqi_over_socketio @cqi_over_socketio
def cqi_disconnect(cqi_client: cqi.CQiClient): def cqi_disconnect(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.disconnect() cqi_status = cqi_client.disconnect()
payload = {'code': cqi_status, payload = {'code': cqi_status.code,
'msg': cqi.api.specification.lookup[cqi_status]} 'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload} return {'code': 200, 'msg': 'OK', 'payload': payload}
@ -38,6 +38,6 @@ def cqi_disconnect(cqi_client: cqi.CQiClient):
@cqi_over_socketio @cqi_over_socketio
def cqi_ping(cqi_client: cqi.CQiClient): def cqi_ping(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.ping() cqi_status = cqi_client.ping()
payload = {'code': cqi_status, payload = {'code': cqi_status.code,
'msg': cqi.api.specification.lookup[cqi_status]} 'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload} return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,8 +1,9 @@
from collections import Counter from collections import Counter
from flask import session from flask import session
import cqi import cqi
import json
import math import math
import random import os
from app import db, socketio from app import db, socketio
from app.decorators import socketio_login_required from app.decorators import socketio_login_required
from app.models import Corpus from app.models import Corpus
@ -16,8 +17,8 @@ from .utils import cqi_over_socketio, lookups_by_cpos
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str): def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.drop() cqi_status = cqi_corpus.drop()
payload = {'code': cqi_status, payload = {'code': cqi_status.code,
'msg': cqi.api.specification.lookup[cqi_status]} 'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload} return {'code': 200, 'msg': 'OK', 'payload': payload}
@ -27,8 +28,8 @@ def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.query(subcorpus_name, query) cqi_status = cqi_corpus.query(subcorpus_name, query)
payload = {'code': cqi_status, payload = {'code': cqi_status.code,
'msg': cqi.api.specification.lookup[cqi_status]} 'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload} return {'code': 200, 'msg': 'OK', 'payload': payload}
@ -49,179 +50,109 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
@socketio_login_required @socketio_login_required
@cqi_over_socketio @cqi_over_socketio
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str): def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = cqi_client.corpora.get(corpus_name) corpus = Corpus.query.get(session['d']['corpus_id'])
# s_attrs = [x for x in corpus.structural_attributes.list() if not x.has_values] visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
# p_attrs = corpus.positional_attributes.list() if os.path.exists(visualization_data_file_path):
# payload = { with open(visualization_data_file_path, 'r') as f:
# 's_attrs': {}, payload = json.load(f)
# 'p_attrs': {}, return {'code': 200, 'msg': 'OK', 'payload': payload}
# 'values': { cqi_corpus = cqi_client.corpora.get(corpus_name)
# 's_attrs': {}, ##########################################################################
# 'p_attrs': {} # A faster way to get cpos boundaries for smaller s_attrs #
# } ##########################################################################
# } # cqi_corpus.query('Last', '<s> []* </s>;')
# for s_attr in s_attrs: # cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
# s_attr_lbound, s_attr_rbound = s_attr.cpos_by_id(text_id) # print(cqi_subcorpus.size)
# s_attr_cpos_range = range(s_attr_lbound, s_attr_rbound + 1) # first_match = 0
# payload['text']['lexicon'][text_id] = { # last_match = cqi_subcorpus.attrs['size'] - 1
# 's_attrs': [s_attr_lbound, s_attr_rbound], # match_boundaries = zip(
# 'counts': { # list(range(first_match, last_match + 1)),
# 'token': s_attr_rbound - s_attr_lbound + 1 # cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
# }, # cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
# 'freqs': { # )
# p_attr.name: dict(Counter(p_attr.ids_by_cpos(list(s_attr_cpos_range)))) # for x in match_boundaries:
# for p_attr in p_attrs # print(x)
# } cqi_p_attrs = {
# } p_attr.name: p_attr
# for p_attr in p_attrs: for p_attr in cqi_corpus.positional_attributes.list()
# payload['p_attrs'] = dict(
# )
# payload['values']['p_attrs'] = dict(
# zip(
# range(0, p_attr.lexicon_size),
# p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
# )
# )
text = corpus.structural_attributes.get('text')
text_value_names = []
text_values = []
for text_sub_attr in corpus.structural_attributes.list(filters={'part_of': text}):
text_value_names.append(text_sub_attr.name[(len(text.name) + 1):])
text_values.append(text_sub_attr.values_by_ids(list(range(0, text.size))))
s = corpus.structural_attributes.get('s')
ent = corpus.structural_attributes.get('ent')
ent_value_names = []
ent_values = []
for ent_sub_attr in corpus.structural_attributes.list(filters={'part_of': ent}):
ent_value_names.append(ent_sub_attr.name[(len(ent.name) + 1):])
ent_values.append(ent_sub_attr.values_by_ids(list(range(0, ent.size))))
word = corpus.positional_attributes.get('word')
lemma = corpus.positional_attributes.get('lemma')
pos = corpus.positional_attributes.get('pos')
simple_pos = corpus.positional_attributes.get('simple_pos')
payload = {}
payload['corpus'] = {'lexicon': {}, 'values': []}
payload['corpus']['lexicon'][0] = {
'bounds': [0, corpus.size - 1],
'counts': {
'text': text.size,
's': s.size,
'ent': ent.size,
'token': corpus.size
},
'freqs': {
'word': dict(
zip(
range(0, word.lexicon_size),
word.freqs_by_ids(list(range(0, word.lexicon_size)))
)
),
'lemma': dict(
zip(
range(0, lemma.lexicon_size),
lemma.freqs_by_ids(list(range(0, lemma.lexicon_size)))
)
),
'pos': dict(
zip(
range(0, pos.lexicon_size),
pos.freqs_by_ids(list(range(0, pos.lexicon_size)))
)
),
'simple_pos': dict(
zip(
range(0, simple_pos.lexicon_size),
simple_pos.freqs_by_ids(list(range(0, simple_pos.lexicon_size)))
)
)
}
} }
payload['text'] = {'lexicon': {}, 'values': None} cqi_s_attrs = {
for text_id in range(0, text.size): s_attr.name: s_attr
text_lbound, text_rbound = text.cpos_by_id(text_id) for s_attr in cqi_corpus.structural_attributes.list()
text_cpos_range = range(text_lbound, text_rbound + 1) }
text_s_ids = s.ids_by_cpos(list(text_cpos_range)) payload = {
text_ent_ids = ent.ids_by_cpos(list(text_cpos_range)) 'corpus': {
payload['text']['lexicon'][text_id] = { 'bounds': [0, cqi_corpus.size - 1],
'bounds': [text_lbound, text_rbound],
'counts': { 'counts': {
's': len([x for x in text_s_ids if x != -1]), 'token': cqi_corpus.size
'ent': len([x for x in text_ent_ids if x != -1]),
'token': text_rbound - text_lbound + 1
}, },
'freqs': { 'freqs': {}
'word': dict(
Counter(word.ids_by_cpos(list(text_cpos_range)))
),
'lemma': dict(
Counter(lemma.ids_by_cpos(list(text_cpos_range)))
),
'pos': dict(
Counter(pos.ids_by_cpos(list(text_cpos_range)))
),
'simple_pos': dict(
Counter(simple_pos.ids_by_cpos(list(text_cpos_range)))
)
}
}
payload['text']['values'] = text_value_names
payload['s'] = {'lexicon': {}, 'values': None}
for s_id in range(0, s.size):
payload['s']['lexicon'][s_id] = {
# 'bounds': s.cpos_by_id(s_id)
}
payload['s']['values'] = [
sub_attr.name[(len(s.name) + 1):]
for sub_attr in corpus.structural_attributes.list(filters={'part_of': s})
]
payload['ent'] = {'lexicon': {}, 'values': None}
for ent_id in range(0, ent.size):
payload['ent']['lexicon'][ent_id] = {
# 'bounds': ent.cpos_by_id(ent_id)
}
payload['ent']['values'] = ent_value_names
payload['lookups'] = {
'corpus': {},
'text': {
text_id: {
text_value_name: text_values[text_value_name_idx][text_id_idx]
for text_value_name_idx, text_value_name in enumerate(text_value_names)
} for text_id_idx, text_id in enumerate(range(0, text.size))
}, },
's': {}, 'p_attrs': {},
'ent': { 's_attrs': {},
ent_id: { 'values': {'p_attrs': {}, 's_attrs': {}}
ent_value_name: ent_values[ent_value_name_idx][ent_id_idx] }
for ent_value_name_idx, ent_value_name in enumerate(ent_value_names) for p_attr in cqi_p_attrs.values():
} for ent_id_idx, ent_id in enumerate(range(0, ent.size)) payload['corpus']['freqs'][p_attr.name] = dict(
},
'word': dict(
zip( zip(
range(0, word.lexicon_size), range(0, p_attr.lexicon_size),
word.values_by_ids(list(range(0, word.lexicon_size))) p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
)
),
'lemma': dict(
zip(
range(0, lemma.lexicon_size),
lemma.values_by_ids(list(range(0, lemma.lexicon_size)))
)
),
'pos': dict(
zip(
range(0, pos.lexicon_size),
pos.values_by_ids(list(range(0, pos.lexicon_size)))
)
),
'simple_pos': dict(
zip(
range(0, simple_pos.lexicon_size),
simple_pos.values_by_ids(list(range(0, simple_pos.lexicon_size)))
) )
) )
} payload['p_attrs'][p_attr.name] = dict(
# print(payload) zip(
range(0, cqi_corpus.size),
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
)
)
payload['values']['p_attrs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
for s_attr in cqi_s_attrs.values():
if s_attr.has_values:
continue
payload['corpus']['counts'][s_attr.name] = s_attr.size
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
payload['values']['s_attrs'][s_attr.name] = {}
for id in range(0, s_attr.size):
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
lbound, rbound = s_attr.cpos_by_id(id)
payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
if s_attr.name not in ['text', 's']:
continue
cpos_range = range(lbound, rbound + 1)
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
if s_attr.name != 'text':
continue
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
for p_attr in cqi_p_attrs.values():
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
s_attr_value_names = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
sub_s_attr_values = [
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
for sub_s_attr in sub_s_attrs
]
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
payload['values']['s_attrs'][s_attr.name] = {
s_attr_id: {
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
for s_attr_value_name_idx, s_attr_value_name in enumerate(
payload['s_attrs'][s_attr.name]['values']
)
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
}
with open(visualization_data_file_path, 'w') as f:
json.dump(payload, f)
return {'code': 200, 'msg': 'OK', 'payload': payload} return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -32,8 +32,8 @@ def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corp
cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_status = cqi_subcorpus.drop() cqi_status = cqi_subcorpus.drop()
payload = {'code': cqi_status, payload = {'code': cqi_status.code,
'msg': cqi.api.specification.lookup[cqi_status]} 'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload} return {'code': 200, 'msg': 'OK', 'payload': payload}

File diff suppressed because it is too large Load Diff

View File

@ -34,26 +34,26 @@ class CorpusAnalysisApp {
.then( .then(
cQiCorpus => { cQiCorpus => {
this.data.corpus = {o: cQiCorpus}; this.data.corpus = {o: cQiCorpus};
// this.data.corpus.o.getVisualizationData() this.data.corpus.o.getVisualizationData()
// .then( .then(
// (data) => { (data) => {
// console.log(data); console.log(data);
// this.renderGeneralCorpusInfo(data); this.renderGeneralCorpusInfo(data);
// this.renderTextInfoList(data); this.renderTextInfoList(data);
// this.renderTextProportionsGraphic(data); this.renderTextProportionsGraphic(data);
// this.renderWordFrequenciesGraphic(data); this.renderFrequenciesGraphic(data);
// this.renderBoundsGraphic(data); this.renderBoundsGraphic(data);
// } }
// ); );
this.data.corpus.o.getCorpusData() // this.data.corpus.o.getCorpusData()
.then(corpusData => { // .then(corpusData => {
console.log(corpusData); // console.log(corpusData);
this.renderGeneralCorpusInfo(corpusData); // this.renderGeneralCorpusInfo(corpusData);
this.renderTextInfoList(corpusData); // this.renderTextInfoList(corpusData);
this.renderTextProportionsGraphic(corpusData); // this.renderTextProportionsGraphic(corpusData);
this.renderFrequenciesGraphic(corpusData); // this.renderFrequenciesGraphic(corpusData);
this.renderBoundsGraphic(corpusData); // this.renderBoundsGraphic(corpusData);
}); // });
// TODO: Don't do this hgere // TODO: Don't do this hgere
cQiCorpus.updateDb(); cQiCorpus.updateDb();
this.enableActionElements(); this.enableActionElements();
@ -117,29 +117,29 @@ class CorpusAnalysisApp {
} }
renderGeneralCorpusInfo(corpusData) { renderGeneralCorpusInfo(corpusData) {
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.lexicon[0].counts.token; document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.counts.token;
document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.lexicon[0].counts.s; document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.counts.s;
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.word).length; document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.lemma).length; document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.pos).length; document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.simple_pos).length; document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length;
} }
renderTextInfoList(corpusData) { renderTextInfoList(corpusData) {
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list'); let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement); let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
let texts = corpusData.text.lexicon; let texts = corpusData.s_attrs.text.lexicon;
let textData = []; let textData = [];
for (let i = 0; i < Object.entries(texts).length; i++) { for (let i = 0; i < Object.entries(texts).length; i++) {
let resource = { let resource = {
title: corpusData.lookups.text[i].title, title: corpusData.values.s_attrs.text[i].title,
publishing_year: corpusData.lookups.text[i].publishing_year, publishing_year: corpusData.values.s_attrs.text[i].publishing_year,
num_tokens: corpusData.text.lexicon[i].counts.token, num_tokens: corpusData.s_attrs.text.lexicon[i].counts.token,
num_sentences: corpusData.text.lexicon[i].counts.s, num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s,
num_unique_words: Object.entries(corpusData.text.lexicon[i].freqs.word).length, num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length,
num_unique_lemmas: Object.entries(corpusData.text.lexicon[i].freqs.lemma).length, num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length,
num_unique_pos: Object.entries(corpusData.text.lexicon[i].freqs.pos).length, num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length,
num_unique_simple_pos: Object.entries(corpusData.text.lexicon[i].freqs.simple_pos).length num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length
}; };
textData.push(resource); textData.push(resource);
@ -148,33 +148,29 @@ class CorpusAnalysisApp {
corpusTextInfoList.add(textData); corpusTextInfoList.add(textData);
let textCountChipElement = document.querySelector('.text-count-chip'); let textCountChipElement = document.querySelector('.text-count-chip');
textCountChipElement.innerHTML = `Text count: ${Object.values(corpusData.text.lexicon).length}`; textCountChipElement.innerHTML = `Text count: ${corpusData.corpus.counts.text}`;
} }
renderTextProportionsGraphic(corpusData) { renderTextProportionsGraphic(corpusData) {
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic'); let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
let texts = Object.entries(corpusData.text.lexicon); let texts = Object.entries(corpusData.s_attrs.text.lexicon);
let graphData = [ let graphData = [
{ {
values: texts.map(text => text[1].counts.token), values: texts.map(text => text[1].counts.token),
labels: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`), labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
type: 'pie' type: 'pie'
} }
]; ];
let graphLayout = {
// height: 600,
// width: 900
};
let config = {responsive: true}; let config = {responsive: true};
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config); Plotly.newPlot(textProportionsGraphicElement, graphData, config);
} }
renderFrequenciesGraphic(corpusData) { renderFrequenciesGraphic(corpusData) {
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
let texts = Object.entries(corpusData.text.lexicon); let texts = Object.entries(corpusData.s_attrs.text.lexicon);
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
@ -196,13 +192,13 @@ class CorpusAnalysisApp {
createFrequenciesGraphData(category, texts, corpusData) { createFrequenciesGraphData(category, texts, corpusData) {
let graphData = []; let graphData = [];
let sortedData = Object.entries(corpusData.corpus.lexicon[0].freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5); let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
for (let item of sortedData) { for (let item of sortedData) {
let data = { let data = {
x: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`), x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
y: texts.map(text => text[1].freqs[category][item[0]]), y: texts.map(text => text[1].freqs[category][item[0]]),
name: corpusData.lookups[category][item[0]], name: corpusData.values.p_attrs[category][item[0]],
type: 'bar' type: 'bar'
}; };
graphData.push(data); graphData.push(data);
@ -215,22 +211,20 @@ class CorpusAnalysisApp {
let boundsGraphicElement = document.querySelector('#bounds-graphic'); let boundsGraphicElement = document.querySelector('#bounds-graphic');
let graphData = []; let graphData = [];
let texts = Object.entries(corpusData.text.lexicon); let texts = Object.entries(corpusData.s_attrs.text.lexicon);
graphData = [{ graphData = [{
type: 'bar', type: 'bar',
x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]), x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
y: texts.map(text => corpusData.lookups.text[text[0]].title), y: texts.map(text => corpusData.values.s_attrs.text[text[0]].title),
base: texts.map(text => text[1].bounds[0]), base: texts.map(text => text[1].bounds[0]),
text: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`), text: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
orientation: 'h', orientation: 'h',
hovertemplate: '%{base} - %{x} <br>%{y}', hovertemplate: '%{base} - %{x} <br>%{y}',
showlegend: false showlegend: false
}]; }];
let graphLayout = { let graphLayout = {
// height: 600,
// width: 2000,
barmode: 'stack', barmode: 'stack',
type: 'bar', type: 'bar',
showgrid: false, showgrid: false,

View File

@ -103,6 +103,7 @@ class CorpusTextInfoList extends ResourceList {
if (sortElement !== clickedSortElement) { if (sortElement !== clickedSortElement) {
sortElement.classList.remove('asc', 'desc'); sortElement.classList.remove('asc', 'desc');
sortElement.style.color = 'black'; sortElement.style.color = 'black';
sortElement.innerHTML = 'arrow_drop_down';
}; };
}); });
clickedSortElement.style.color = '#aa9cc9'; clickedSortElement.style.color = '#aa9cc9';