mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-18 05:50:34 +00:00
Compare commits
10 Commits
e194ce7541
...
6c31788402
Author | SHA1 | Date | |
---|---|---|---|
|
6c31788402 | ||
|
1c98c5070a | ||
|
1e33366820 | ||
|
71013f1dc5 | ||
|
142c82cc36 | ||
|
f84ac48975 | ||
|
2739dc4b4f | ||
|
eb2abf8282 | ||
|
529c778772 | ||
|
be51044059 |
@ -18,8 +18,8 @@ def cqi_connect(cqi_client: cqi.CQiClient):
|
||||
'msg': 'Internal Server Error',
|
||||
'payload': {'code': e.args[0], 'desc': e.args[1]}
|
||||
}
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@ -28,8 +28,8 @@ def cqi_connect(cqi_client: cqi.CQiClient):
|
||||
@cqi_over_socketio
|
||||
def cqi_disconnect(cqi_client: cqi.CQiClient):
|
||||
cqi_status = cqi_client.disconnect()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@ -38,6 +38,6 @@ def cqi_disconnect(cqi_client: cqi.CQiClient):
|
||||
@cqi_over_socketio
|
||||
def cqi_ping(cqi_client: cqi.CQiClient):
|
||||
cqi_status = cqi_client.ping()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
@ -1,8 +1,9 @@
|
||||
from collections import Counter
|
||||
from flask import session
|
||||
import cqi
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
import os
|
||||
from app import db, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus
|
||||
@ -16,8 +17,8 @@ from .utils import cqi_over_socketio, lookups_by_cpos
|
||||
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_status = cqi_corpus.drop()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@ -27,8 +28,8 @@ def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_status = cqi_corpus.query(subcorpus_name, query)
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@ -49,179 +50,109 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
corpus = cqi_client.corpora.get(corpus_name)
|
||||
# s_attrs = [x for x in corpus.structural_attributes.list() if not x.has_values]
|
||||
# p_attrs = corpus.positional_attributes.list()
|
||||
# payload = {
|
||||
# 's_attrs': {},
|
||||
# 'p_attrs': {},
|
||||
# 'values': {
|
||||
# 's_attrs': {},
|
||||
# 'p_attrs': {}
|
||||
# }
|
||||
# }
|
||||
# for s_attr in s_attrs:
|
||||
# s_attr_lbound, s_attr_rbound = s_attr.cpos_by_id(text_id)
|
||||
# s_attr_cpos_range = range(s_attr_lbound, s_attr_rbound + 1)
|
||||
# payload['text']['lexicon'][text_id] = {
|
||||
# 's_attrs': [s_attr_lbound, s_attr_rbound],
|
||||
# 'counts': {
|
||||
# 'token': s_attr_rbound - s_attr_lbound + 1
|
||||
# },
|
||||
# 'freqs': {
|
||||
# p_attr.name: dict(Counter(p_attr.ids_by_cpos(list(s_attr_cpos_range))))
|
||||
# for p_attr in p_attrs
|
||||
# }
|
||||
# }
|
||||
# for p_attr in p_attrs:
|
||||
# payload['p_attrs'] = dict(
|
||||
|
||||
# )
|
||||
# payload['values']['p_attrs'] = dict(
|
||||
# zip(
|
||||
# range(0, p_attr.lexicon_size),
|
||||
# p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
|
||||
# )
|
||||
# )
|
||||
text = corpus.structural_attributes.get('text')
|
||||
text_value_names = []
|
||||
text_values = []
|
||||
for text_sub_attr in corpus.structural_attributes.list(filters={'part_of': text}):
|
||||
text_value_names.append(text_sub_attr.name[(len(text.name) + 1):])
|
||||
text_values.append(text_sub_attr.values_by_ids(list(range(0, text.size))))
|
||||
s = corpus.structural_attributes.get('s')
|
||||
ent = corpus.structural_attributes.get('ent')
|
||||
ent_value_names = []
|
||||
ent_values = []
|
||||
for ent_sub_attr in corpus.structural_attributes.list(filters={'part_of': ent}):
|
||||
ent_value_names.append(ent_sub_attr.name[(len(ent.name) + 1):])
|
||||
ent_values.append(ent_sub_attr.values_by_ids(list(range(0, ent.size))))
|
||||
word = corpus.positional_attributes.get('word')
|
||||
lemma = corpus.positional_attributes.get('lemma')
|
||||
pos = corpus.positional_attributes.get('pos')
|
||||
simple_pos = corpus.positional_attributes.get('simple_pos')
|
||||
payload = {}
|
||||
payload['corpus'] = {'lexicon': {}, 'values': []}
|
||||
payload['corpus']['lexicon'][0] = {
|
||||
'bounds': [0, corpus.size - 1],
|
||||
'counts': {
|
||||
'text': text.size,
|
||||
's': s.size,
|
||||
'ent': ent.size,
|
||||
'token': corpus.size
|
||||
},
|
||||
'freqs': {
|
||||
'word': dict(
|
||||
zip(
|
||||
range(0, word.lexicon_size),
|
||||
word.freqs_by_ids(list(range(0, word.lexicon_size)))
|
||||
)
|
||||
),
|
||||
'lemma': dict(
|
||||
zip(
|
||||
range(0, lemma.lexicon_size),
|
||||
lemma.freqs_by_ids(list(range(0, lemma.lexicon_size)))
|
||||
)
|
||||
),
|
||||
'pos': dict(
|
||||
zip(
|
||||
range(0, pos.lexicon_size),
|
||||
pos.freqs_by_ids(list(range(0, pos.lexicon_size)))
|
||||
)
|
||||
),
|
||||
'simple_pos': dict(
|
||||
zip(
|
||||
range(0, simple_pos.lexicon_size),
|
||||
simple_pos.freqs_by_ids(list(range(0, simple_pos.lexicon_size)))
|
||||
)
|
||||
)
|
||||
}
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
|
||||
if os.path.exists(visualization_data_file_path):
|
||||
with open(visualization_data_file_path, 'r') as f:
|
||||
payload = json.load(f)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
##########################################################################
|
||||
# A faster way to get cpos boundaries for smaller s_attrs #
|
||||
##########################################################################
|
||||
# cqi_corpus.query('Last', '<s> []* </s>;')
|
||||
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
|
||||
# print(cqi_subcorpus.size)
|
||||
# first_match = 0
|
||||
# last_match = cqi_subcorpus.attrs['size'] - 1
|
||||
# match_boundaries = zip(
|
||||
# list(range(first_match, last_match + 1)),
|
||||
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
|
||||
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
||||
# )
|
||||
# for x in match_boundaries:
|
||||
# print(x)
|
||||
cqi_p_attrs = {
|
||||
p_attr.name: p_attr
|
||||
for p_attr in cqi_corpus.positional_attributes.list()
|
||||
}
|
||||
payload['text'] = {'lexicon': {}, 'values': None}
|
||||
for text_id in range(0, text.size):
|
||||
text_lbound, text_rbound = text.cpos_by_id(text_id)
|
||||
text_cpos_range = range(text_lbound, text_rbound + 1)
|
||||
text_s_ids = s.ids_by_cpos(list(text_cpos_range))
|
||||
text_ent_ids = ent.ids_by_cpos(list(text_cpos_range))
|
||||
payload['text']['lexicon'][text_id] = {
|
||||
'bounds': [text_lbound, text_rbound],
|
||||
cqi_s_attrs = {
|
||||
s_attr.name: s_attr
|
||||
for s_attr in cqi_corpus.structural_attributes.list()
|
||||
}
|
||||
payload = {
|
||||
'corpus': {
|
||||
'bounds': [0, cqi_corpus.size - 1],
|
||||
'counts': {
|
||||
's': len([x for x in text_s_ids if x != -1]),
|
||||
'ent': len([x for x in text_ent_ids if x != -1]),
|
||||
'token': text_rbound - text_lbound + 1
|
||||
'token': cqi_corpus.size
|
||||
},
|
||||
'freqs': {
|
||||
'word': dict(
|
||||
Counter(word.ids_by_cpos(list(text_cpos_range)))
|
||||
),
|
||||
'lemma': dict(
|
||||
Counter(lemma.ids_by_cpos(list(text_cpos_range)))
|
||||
),
|
||||
'pos': dict(
|
||||
Counter(pos.ids_by_cpos(list(text_cpos_range)))
|
||||
),
|
||||
'simple_pos': dict(
|
||||
Counter(simple_pos.ids_by_cpos(list(text_cpos_range)))
|
||||
)
|
||||
}
|
||||
}
|
||||
payload['text']['values'] = text_value_names
|
||||
payload['s'] = {'lexicon': {}, 'values': None}
|
||||
for s_id in range(0, s.size):
|
||||
payload['s']['lexicon'][s_id] = {
|
||||
# 'bounds': s.cpos_by_id(s_id)
|
||||
}
|
||||
payload['s']['values'] = [
|
||||
sub_attr.name[(len(s.name) + 1):]
|
||||
for sub_attr in corpus.structural_attributes.list(filters={'part_of': s})
|
||||
]
|
||||
payload['ent'] = {'lexicon': {}, 'values': None}
|
||||
for ent_id in range(0, ent.size):
|
||||
payload['ent']['lexicon'][ent_id] = {
|
||||
# 'bounds': ent.cpos_by_id(ent_id)
|
||||
}
|
||||
payload['ent']['values'] = ent_value_names
|
||||
payload['lookups'] = {
|
||||
'corpus': {},
|
||||
'text': {
|
||||
text_id: {
|
||||
text_value_name: text_values[text_value_name_idx][text_id_idx]
|
||||
for text_value_name_idx, text_value_name in enumerate(text_value_names)
|
||||
} for text_id_idx, text_id in enumerate(range(0, text.size))
|
||||
'freqs': {}
|
||||
},
|
||||
's': {},
|
||||
'ent': {
|
||||
ent_id: {
|
||||
ent_value_name: ent_values[ent_value_name_idx][ent_id_idx]
|
||||
for ent_value_name_idx, ent_value_name in enumerate(ent_value_names)
|
||||
} for ent_id_idx, ent_id in enumerate(range(0, ent.size))
|
||||
},
|
||||
'word': dict(
|
||||
'p_attrs': {},
|
||||
's_attrs': {},
|
||||
'values': {'p_attrs': {}, 's_attrs': {}}
|
||||
}
|
||||
for p_attr in cqi_p_attrs.values():
|
||||
payload['corpus']['freqs'][p_attr.name] = dict(
|
||||
zip(
|
||||
range(0, word.lexicon_size),
|
||||
word.values_by_ids(list(range(0, word.lexicon_size)))
|
||||
)
|
||||
),
|
||||
'lemma': dict(
|
||||
zip(
|
||||
range(0, lemma.lexicon_size),
|
||||
lemma.values_by_ids(list(range(0, lemma.lexicon_size)))
|
||||
)
|
||||
),
|
||||
'pos': dict(
|
||||
zip(
|
||||
range(0, pos.lexicon_size),
|
||||
pos.values_by_ids(list(range(0, pos.lexicon_size)))
|
||||
)
|
||||
),
|
||||
'simple_pos': dict(
|
||||
zip(
|
||||
range(0, simple_pos.lexicon_size),
|
||||
simple_pos.values_by_ids(list(range(0, simple_pos.lexicon_size)))
|
||||
range(0, p_attr.lexicon_size),
|
||||
p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
|
||||
)
|
||||
)
|
||||
}
|
||||
# print(payload)
|
||||
payload['p_attrs'][p_attr.name] = dict(
|
||||
zip(
|
||||
range(0, cqi_corpus.size),
|
||||
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
|
||||
)
|
||||
)
|
||||
payload['values']['p_attrs'][p_attr.name] = dict(
|
||||
zip(
|
||||
range(0, p_attr.lexicon_size),
|
||||
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
|
||||
)
|
||||
)
|
||||
for s_attr in cqi_s_attrs.values():
|
||||
if s_attr.has_values:
|
||||
continue
|
||||
payload['corpus']['counts'][s_attr.name] = s_attr.size
|
||||
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
|
||||
payload['values']['s_attrs'][s_attr.name] = {}
|
||||
for id in range(0, s_attr.size):
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
|
||||
lbound, rbound = s_attr.cpos_by_id(id)
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
|
||||
if s_attr.name not in ['text', 's']:
|
||||
continue
|
||||
cpos_range = range(lbound, rbound + 1)
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
|
||||
if s_attr.name != 'text':
|
||||
continue
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
||||
for p_attr in cqi_p_attrs.values():
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
|
||||
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
||||
s_attr_value_names = [
|
||||
sub_s_attr.name[(len(s_attr.name) + 1):]
|
||||
for sub_s_attr in sub_s_attrs
|
||||
]
|
||||
sub_s_attr_values = [
|
||||
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
|
||||
for sub_s_attr in sub_s_attrs
|
||||
]
|
||||
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
|
||||
payload['values']['s_attrs'][s_attr.name] = {
|
||||
s_attr_id: {
|
||||
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
|
||||
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
||||
payload['s_attrs'][s_attr.name]['values']
|
||||
)
|
||||
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
|
||||
}
|
||||
with open(visualization_data_file_path, 'w') as f:
|
||||
json.dump(payload, f)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
|
@ -32,8 +32,8 @@ def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corp
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_status = cqi_subcorpus.drop()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -34,26 +34,26 @@ class CorpusAnalysisApp {
|
||||
.then(
|
||||
cQiCorpus => {
|
||||
this.data.corpus = {o: cQiCorpus};
|
||||
// this.data.corpus.o.getVisualizationData()
|
||||
// .then(
|
||||
// (data) => {
|
||||
// console.log(data);
|
||||
// this.renderGeneralCorpusInfo(data);
|
||||
// this.renderTextInfoList(data);
|
||||
// this.renderTextProportionsGraphic(data);
|
||||
// this.renderWordFrequenciesGraphic(data);
|
||||
// this.renderBoundsGraphic(data);
|
||||
// }
|
||||
// );
|
||||
this.data.corpus.o.getCorpusData()
|
||||
.then(corpusData => {
|
||||
console.log(corpusData);
|
||||
this.renderGeneralCorpusInfo(corpusData);
|
||||
this.renderTextInfoList(corpusData);
|
||||
this.renderTextProportionsGraphic(corpusData);
|
||||
this.renderFrequenciesGraphic(corpusData);
|
||||
this.renderBoundsGraphic(corpusData);
|
||||
});
|
||||
this.data.corpus.o.getVisualizationData()
|
||||
.then(
|
||||
(data) => {
|
||||
console.log(data);
|
||||
this.renderGeneralCorpusInfo(data);
|
||||
this.renderTextInfoList(data);
|
||||
this.renderTextProportionsGraphic(data);
|
||||
this.renderFrequenciesGraphic(data);
|
||||
this.renderBoundsGraphic(data);
|
||||
}
|
||||
);
|
||||
// this.data.corpus.o.getCorpusData()
|
||||
// .then(corpusData => {
|
||||
// console.log(corpusData);
|
||||
// this.renderGeneralCorpusInfo(corpusData);
|
||||
// this.renderTextInfoList(corpusData);
|
||||
// this.renderTextProportionsGraphic(corpusData);
|
||||
// this.renderFrequenciesGraphic(corpusData);
|
||||
// this.renderBoundsGraphic(corpusData);
|
||||
// });
|
||||
// TODO: Don't do this hgere
|
||||
cQiCorpus.updateDb();
|
||||
this.enableActionElements();
|
||||
@ -117,29 +117,29 @@ class CorpusAnalysisApp {
|
||||
}
|
||||
|
||||
renderGeneralCorpusInfo(corpusData) {
|
||||
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.lexicon[0].counts.token;
|
||||
document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.lexicon[0].counts.s;
|
||||
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.word).length;
|
||||
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.lemma).length;
|
||||
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.pos).length;
|
||||
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.simple_pos).length;
|
||||
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.counts.token;
|
||||
document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.counts.s;
|
||||
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
|
||||
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
|
||||
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
|
||||
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length;
|
||||
}
|
||||
|
||||
renderTextInfoList(corpusData) {
|
||||
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
|
||||
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
|
||||
let texts = corpusData.text.lexicon;
|
||||
let texts = corpusData.s_attrs.text.lexicon;
|
||||
let textData = [];
|
||||
for (let i = 0; i < Object.entries(texts).length; i++) {
|
||||
let resource = {
|
||||
title: corpusData.lookups.text[i].title,
|
||||
publishing_year: corpusData.lookups.text[i].publishing_year,
|
||||
num_tokens: corpusData.text.lexicon[i].counts.token,
|
||||
num_sentences: corpusData.text.lexicon[i].counts.s,
|
||||
num_unique_words: Object.entries(corpusData.text.lexicon[i].freqs.word).length,
|
||||
num_unique_lemmas: Object.entries(corpusData.text.lexicon[i].freqs.lemma).length,
|
||||
num_unique_pos: Object.entries(corpusData.text.lexicon[i].freqs.pos).length,
|
||||
num_unique_simple_pos: Object.entries(corpusData.text.lexicon[i].freqs.simple_pos).length
|
||||
title: corpusData.values.s_attrs.text[i].title,
|
||||
publishing_year: corpusData.values.s_attrs.text[i].publishing_year,
|
||||
num_tokens: corpusData.s_attrs.text.lexicon[i].counts.token,
|
||||
num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s,
|
||||
num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length,
|
||||
num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length,
|
||||
num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length,
|
||||
num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length
|
||||
};
|
||||
|
||||
textData.push(resource);
|
||||
@ -148,33 +148,29 @@ class CorpusAnalysisApp {
|
||||
corpusTextInfoList.add(textData);
|
||||
|
||||
let textCountChipElement = document.querySelector('.text-count-chip');
|
||||
textCountChipElement.innerHTML = `Text count: ${Object.values(corpusData.text.lexicon).length}`;
|
||||
textCountChipElement.innerHTML = `Text count: ${corpusData.corpus.counts.text}`;
|
||||
}
|
||||
|
||||
renderTextProportionsGraphic(corpusData) {
|
||||
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
|
||||
let texts = Object.entries(corpusData.text.lexicon);
|
||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
||||
let graphData = [
|
||||
{
|
||||
values: texts.map(text => text[1].counts.token),
|
||||
labels: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
|
||||
labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
type: 'pie'
|
||||
}
|
||||
];
|
||||
let graphLayout = {
|
||||
// height: 600,
|
||||
// width: 900
|
||||
};
|
||||
let config = {responsive: true};
|
||||
|
||||
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
|
||||
Plotly.newPlot(textProportionsGraphicElement, graphData, config);
|
||||
}
|
||||
|
||||
renderFrequenciesGraphic(corpusData) {
|
||||
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
|
||||
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
|
||||
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
|
||||
let texts = Object.entries(corpusData.text.lexicon);
|
||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
||||
|
||||
|
||||
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
|
||||
@ -196,13 +192,13 @@ class CorpusAnalysisApp {
|
||||
|
||||
createFrequenciesGraphData(category, texts, corpusData) {
|
||||
let graphData = [];
|
||||
let sortedData = Object.entries(corpusData.corpus.lexicon[0].freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
|
||||
let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
|
||||
|
||||
for (let item of sortedData) {
|
||||
let data = {
|
||||
x: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
|
||||
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
y: texts.map(text => text[1].freqs[category][item[0]]),
|
||||
name: corpusData.lookups[category][item[0]],
|
||||
name: corpusData.values.p_attrs[category][item[0]],
|
||||
type: 'bar'
|
||||
};
|
||||
graphData.push(data);
|
||||
@ -215,22 +211,20 @@ class CorpusAnalysisApp {
|
||||
let boundsGraphicElement = document.querySelector('#bounds-graphic');
|
||||
|
||||
let graphData = [];
|
||||
let texts = Object.entries(corpusData.text.lexicon);
|
||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
||||
|
||||
graphData = [{
|
||||
type: 'bar',
|
||||
x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
|
||||
y: texts.map(text => corpusData.lookups.text[text[0]].title),
|
||||
y: texts.map(text => corpusData.values.s_attrs.text[text[0]].title),
|
||||
base: texts.map(text => text[1].bounds[0]),
|
||||
text: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
|
||||
text: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
orientation: 'h',
|
||||
hovertemplate: '%{base} - %{x} <br>%{y}',
|
||||
showlegend: false
|
||||
}];
|
||||
|
||||
let graphLayout = {
|
||||
// height: 600,
|
||||
// width: 2000,
|
||||
barmode: 'stack',
|
||||
type: 'bar',
|
||||
showgrid: false,
|
||||
|
@ -103,6 +103,7 @@ class CorpusTextInfoList extends ResourceList {
|
||||
if (sortElement !== clickedSortElement) {
|
||||
sortElement.classList.remove('asc', 'desc');
|
||||
sortElement.style.color = 'black';
|
||||
sortElement.innerHTML = 'arrow_drop_down';
|
||||
};
|
||||
});
|
||||
clickedSortElement.style.color = '#aa9cc9';
|
||||
|
Loading…
x
Reference in New Issue
Block a user