mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-18 14:00:33 +00:00
Compare commits
No commits in common. "6c31788402820ac5a544100cac70663254ac2900" and "e194ce75415a8896f502ce255e55d6e2f2fe7521" have entirely different histories.
6c31788402
...
e194ce7541
@ -18,8 +18,8 @@ def cqi_connect(cqi_client: cqi.CQiClient):
|
|||||||
'msg': 'Internal Server Error',
|
'msg': 'Internal Server Error',
|
||||||
'payload': {'code': e.args[0], 'desc': e.args[1]}
|
'payload': {'code': e.args[0], 'desc': e.args[1]}
|
||||||
}
|
}
|
||||||
payload = {'code': cqi_status.code,
|
payload = {'code': cqi_status,
|
||||||
'msg': cqi_status.__class__.__name__}
|
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
|
|
||||||
@ -28,8 +28,8 @@ def cqi_connect(cqi_client: cqi.CQiClient):
|
|||||||
@cqi_over_socketio
|
@cqi_over_socketio
|
||||||
def cqi_disconnect(cqi_client: cqi.CQiClient):
|
def cqi_disconnect(cqi_client: cqi.CQiClient):
|
||||||
cqi_status = cqi_client.disconnect()
|
cqi_status = cqi_client.disconnect()
|
||||||
payload = {'code': cqi_status.code,
|
payload = {'code': cqi_status,
|
||||||
'msg': cqi_status.__class__.__name__}
|
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
|
|
||||||
@ -38,6 +38,6 @@ def cqi_disconnect(cqi_client: cqi.CQiClient):
|
|||||||
@cqi_over_socketio
|
@cqi_over_socketio
|
||||||
def cqi_ping(cqi_client: cqi.CQiClient):
|
def cqi_ping(cqi_client: cqi.CQiClient):
|
||||||
cqi_status = cqi_client.ping()
|
cqi_status = cqi_client.ping()
|
||||||
payload = {'code': cqi_status.code,
|
payload = {'code': cqi_status,
|
||||||
'msg': cqi_status.__class__.__name__}
|
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
from collections import Counter
|
from collections import Counter
|
||||||
from flask import session
|
from flask import session
|
||||||
import cqi
|
import cqi
|
||||||
import json
|
|
||||||
import math
|
import math
|
||||||
import os
|
import random
|
||||||
from app import db, socketio
|
from app import db, socketio
|
||||||
from app.decorators import socketio_login_required
|
from app.decorators import socketio_login_required
|
||||||
from app.models import Corpus
|
from app.models import Corpus
|
||||||
@ -17,8 +16,8 @@ from .utils import cqi_over_socketio, lookups_by_cpos
|
|||||||
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
|
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
cqi_status = cqi_corpus.drop()
|
cqi_status = cqi_corpus.drop()
|
||||||
payload = {'code': cqi_status.code,
|
payload = {'code': cqi_status,
|
||||||
'msg': cqi_status.__class__.__name__}
|
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
|
|
||||||
@ -28,8 +27,8 @@ def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
|
|||||||
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
|
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
cqi_status = cqi_corpus.query(subcorpus_name, query)
|
cqi_status = cqi_corpus.query(subcorpus_name, query)
|
||||||
payload = {'code': cqi_status.code,
|
payload = {'code': cqi_status,
|
||||||
'msg': cqi_status.__class__.__name__}
|
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
|
|
||||||
@ -50,109 +49,179 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
|
|||||||
@socketio_login_required
|
@socketio_login_required
|
||||||
@cqi_over_socketio
|
@cqi_over_socketio
|
||||||
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
|
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
corpus = cqi_client.corpora.get(corpus_name)
|
||||||
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
|
# s_attrs = [x for x in corpus.structural_attributes.list() if not x.has_values]
|
||||||
if os.path.exists(visualization_data_file_path):
|
# p_attrs = corpus.positional_attributes.list()
|
||||||
with open(visualization_data_file_path, 'r') as f:
|
# payload = {
|
||||||
payload = json.load(f)
|
# 's_attrs': {},
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
# 'p_attrs': {},
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
# 'values': {
|
||||||
##########################################################################
|
# 's_attrs': {},
|
||||||
# A faster way to get cpos boundaries for smaller s_attrs #
|
# 'p_attrs': {}
|
||||||
##########################################################################
|
# }
|
||||||
# cqi_corpus.query('Last', '<s> []* </s>;')
|
# }
|
||||||
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
|
# for s_attr in s_attrs:
|
||||||
# print(cqi_subcorpus.size)
|
# s_attr_lbound, s_attr_rbound = s_attr.cpos_by_id(text_id)
|
||||||
# first_match = 0
|
# s_attr_cpos_range = range(s_attr_lbound, s_attr_rbound + 1)
|
||||||
# last_match = cqi_subcorpus.attrs['size'] - 1
|
# payload['text']['lexicon'][text_id] = {
|
||||||
# match_boundaries = zip(
|
# 's_attrs': [s_attr_lbound, s_attr_rbound],
|
||||||
# list(range(first_match, last_match + 1)),
|
# 'counts': {
|
||||||
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
|
# 'token': s_attr_rbound - s_attr_lbound + 1
|
||||||
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
# },
|
||||||
# )
|
# 'freqs': {
|
||||||
# for x in match_boundaries:
|
# p_attr.name: dict(Counter(p_attr.ids_by_cpos(list(s_attr_cpos_range))))
|
||||||
# print(x)
|
# for p_attr in p_attrs
|
||||||
cqi_p_attrs = {
|
# }
|
||||||
p_attr.name: p_attr
|
# }
|
||||||
for p_attr in cqi_corpus.positional_attributes.list()
|
# for p_attr in p_attrs:
|
||||||
}
|
# payload['p_attrs'] = dict(
|
||||||
cqi_s_attrs = {
|
|
||||||
s_attr.name: s_attr
|
# )
|
||||||
for s_attr in cqi_corpus.structural_attributes.list()
|
# payload['values']['p_attrs'] = dict(
|
||||||
}
|
# zip(
|
||||||
payload = {
|
# range(0, p_attr.lexicon_size),
|
||||||
'corpus': {
|
# p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
|
||||||
'bounds': [0, cqi_corpus.size - 1],
|
# )
|
||||||
'counts': {
|
# )
|
||||||
'token': cqi_corpus.size
|
text = corpus.structural_attributes.get('text')
|
||||||
},
|
text_value_names = []
|
||||||
'freqs': {}
|
text_values = []
|
||||||
|
for text_sub_attr in corpus.structural_attributes.list(filters={'part_of': text}):
|
||||||
|
text_value_names.append(text_sub_attr.name[(len(text.name) + 1):])
|
||||||
|
text_values.append(text_sub_attr.values_by_ids(list(range(0, text.size))))
|
||||||
|
s = corpus.structural_attributes.get('s')
|
||||||
|
ent = corpus.structural_attributes.get('ent')
|
||||||
|
ent_value_names = []
|
||||||
|
ent_values = []
|
||||||
|
for ent_sub_attr in corpus.structural_attributes.list(filters={'part_of': ent}):
|
||||||
|
ent_value_names.append(ent_sub_attr.name[(len(ent.name) + 1):])
|
||||||
|
ent_values.append(ent_sub_attr.values_by_ids(list(range(0, ent.size))))
|
||||||
|
word = corpus.positional_attributes.get('word')
|
||||||
|
lemma = corpus.positional_attributes.get('lemma')
|
||||||
|
pos = corpus.positional_attributes.get('pos')
|
||||||
|
simple_pos = corpus.positional_attributes.get('simple_pos')
|
||||||
|
payload = {}
|
||||||
|
payload['corpus'] = {'lexicon': {}, 'values': []}
|
||||||
|
payload['corpus']['lexicon'][0] = {
|
||||||
|
'bounds': [0, corpus.size - 1],
|
||||||
|
'counts': {
|
||||||
|
'text': text.size,
|
||||||
|
's': s.size,
|
||||||
|
'ent': ent.size,
|
||||||
|
'token': corpus.size
|
||||||
},
|
},
|
||||||
'p_attrs': {},
|
'freqs': {
|
||||||
's_attrs': {},
|
'word': dict(
|
||||||
'values': {'p_attrs': {}, 's_attrs': {}}
|
zip(
|
||||||
}
|
range(0, word.lexicon_size),
|
||||||
for p_attr in cqi_p_attrs.values():
|
word.freqs_by_ids(list(range(0, word.lexicon_size)))
|
||||||
payload['corpus']['freqs'][p_attr.name] = dict(
|
|
||||||
zip(
|
|
||||||
range(0, p_attr.lexicon_size),
|
|
||||||
p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
|
|
||||||
)
|
|
||||||
)
|
|
||||||
payload['p_attrs'][p_attr.name] = dict(
|
|
||||||
zip(
|
|
||||||
range(0, cqi_corpus.size),
|
|
||||||
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
|
|
||||||
)
|
|
||||||
)
|
|
||||||
payload['values']['p_attrs'][p_attr.name] = dict(
|
|
||||||
zip(
|
|
||||||
range(0, p_attr.lexicon_size),
|
|
||||||
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
|
|
||||||
)
|
|
||||||
)
|
|
||||||
for s_attr in cqi_s_attrs.values():
|
|
||||||
if s_attr.has_values:
|
|
||||||
continue
|
|
||||||
payload['corpus']['counts'][s_attr.name] = s_attr.size
|
|
||||||
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
|
|
||||||
payload['values']['s_attrs'][s_attr.name] = {}
|
|
||||||
for id in range(0, s_attr.size):
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
|
|
||||||
lbound, rbound = s_attr.cpos_by_id(id)
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
|
|
||||||
if s_attr.name not in ['text', 's']:
|
|
||||||
continue
|
|
||||||
cpos_range = range(lbound, rbound + 1)
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
|
|
||||||
if s_attr.name != 'text':
|
|
||||||
continue
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
|
||||||
for p_attr in cqi_p_attrs.values():
|
|
||||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
|
|
||||||
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
|
||||||
s_attr_value_names = [
|
|
||||||
sub_s_attr.name[(len(s_attr.name) + 1):]
|
|
||||||
for sub_s_attr in sub_s_attrs
|
|
||||||
]
|
|
||||||
sub_s_attr_values = [
|
|
||||||
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
|
|
||||||
for sub_s_attr in sub_s_attrs
|
|
||||||
]
|
|
||||||
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
|
|
||||||
payload['values']['s_attrs'][s_attr.name] = {
|
|
||||||
s_attr_id: {
|
|
||||||
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
|
|
||||||
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
|
||||||
payload['s_attrs'][s_attr.name]['values']
|
|
||||||
)
|
)
|
||||||
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
|
),
|
||||||
|
'lemma': dict(
|
||||||
|
zip(
|
||||||
|
range(0, lemma.lexicon_size),
|
||||||
|
lemma.freqs_by_ids(list(range(0, lemma.lexicon_size)))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
'pos': dict(
|
||||||
|
zip(
|
||||||
|
range(0, pos.lexicon_size),
|
||||||
|
pos.freqs_by_ids(list(range(0, pos.lexicon_size)))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
'simple_pos': dict(
|
||||||
|
zip(
|
||||||
|
range(0, simple_pos.lexicon_size),
|
||||||
|
simple_pos.freqs_by_ids(list(range(0, simple_pos.lexicon_size)))
|
||||||
|
)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
with open(visualization_data_file_path, 'w') as f:
|
}
|
||||||
json.dump(payload, f)
|
payload['text'] = {'lexicon': {}, 'values': None}
|
||||||
|
for text_id in range(0, text.size):
|
||||||
|
text_lbound, text_rbound = text.cpos_by_id(text_id)
|
||||||
|
text_cpos_range = range(text_lbound, text_rbound + 1)
|
||||||
|
text_s_ids = s.ids_by_cpos(list(text_cpos_range))
|
||||||
|
text_ent_ids = ent.ids_by_cpos(list(text_cpos_range))
|
||||||
|
payload['text']['lexicon'][text_id] = {
|
||||||
|
'bounds': [text_lbound, text_rbound],
|
||||||
|
'counts': {
|
||||||
|
's': len([x for x in text_s_ids if x != -1]),
|
||||||
|
'ent': len([x for x in text_ent_ids if x != -1]),
|
||||||
|
'token': text_rbound - text_lbound + 1
|
||||||
|
},
|
||||||
|
'freqs': {
|
||||||
|
'word': dict(
|
||||||
|
Counter(word.ids_by_cpos(list(text_cpos_range)))
|
||||||
|
),
|
||||||
|
'lemma': dict(
|
||||||
|
Counter(lemma.ids_by_cpos(list(text_cpos_range)))
|
||||||
|
),
|
||||||
|
'pos': dict(
|
||||||
|
Counter(pos.ids_by_cpos(list(text_cpos_range)))
|
||||||
|
),
|
||||||
|
'simple_pos': dict(
|
||||||
|
Counter(simple_pos.ids_by_cpos(list(text_cpos_range)))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
payload['text']['values'] = text_value_names
|
||||||
|
payload['s'] = {'lexicon': {}, 'values': None}
|
||||||
|
for s_id in range(0, s.size):
|
||||||
|
payload['s']['lexicon'][s_id] = {
|
||||||
|
# 'bounds': s.cpos_by_id(s_id)
|
||||||
|
}
|
||||||
|
payload['s']['values'] = [
|
||||||
|
sub_attr.name[(len(s.name) + 1):]
|
||||||
|
for sub_attr in corpus.structural_attributes.list(filters={'part_of': s})
|
||||||
|
]
|
||||||
|
payload['ent'] = {'lexicon': {}, 'values': None}
|
||||||
|
for ent_id in range(0, ent.size):
|
||||||
|
payload['ent']['lexicon'][ent_id] = {
|
||||||
|
# 'bounds': ent.cpos_by_id(ent_id)
|
||||||
|
}
|
||||||
|
payload['ent']['values'] = ent_value_names
|
||||||
|
payload['lookups'] = {
|
||||||
|
'corpus': {},
|
||||||
|
'text': {
|
||||||
|
text_id: {
|
||||||
|
text_value_name: text_values[text_value_name_idx][text_id_idx]
|
||||||
|
for text_value_name_idx, text_value_name in enumerate(text_value_names)
|
||||||
|
} for text_id_idx, text_id in enumerate(range(0, text.size))
|
||||||
|
},
|
||||||
|
's': {},
|
||||||
|
'ent': {
|
||||||
|
ent_id: {
|
||||||
|
ent_value_name: ent_values[ent_value_name_idx][ent_id_idx]
|
||||||
|
for ent_value_name_idx, ent_value_name in enumerate(ent_value_names)
|
||||||
|
} for ent_id_idx, ent_id in enumerate(range(0, ent.size))
|
||||||
|
},
|
||||||
|
'word': dict(
|
||||||
|
zip(
|
||||||
|
range(0, word.lexicon_size),
|
||||||
|
word.values_by_ids(list(range(0, word.lexicon_size)))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
'lemma': dict(
|
||||||
|
zip(
|
||||||
|
range(0, lemma.lexicon_size),
|
||||||
|
lemma.values_by_ids(list(range(0, lemma.lexicon_size)))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
'pos': dict(
|
||||||
|
zip(
|
||||||
|
range(0, pos.lexicon_size),
|
||||||
|
pos.values_by_ids(list(range(0, pos.lexicon_size)))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
'simple_pos': dict(
|
||||||
|
zip(
|
||||||
|
range(0, simple_pos.lexicon_size),
|
||||||
|
simple_pos.values_by_ids(list(range(0, simple_pos.lexicon_size)))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
# print(payload)
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,8 +32,8 @@ def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corp
|
|||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
cqi_status = cqi_subcorpus.drop()
|
cqi_status = cqi_subcorpus.drop()
|
||||||
payload = {'code': cqi_status.code,
|
payload = {'code': cqi_status,
|
||||||
'msg': cqi_status.__class__.__name__}
|
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -34,26 +34,26 @@ class CorpusAnalysisApp {
|
|||||||
.then(
|
.then(
|
||||||
cQiCorpus => {
|
cQiCorpus => {
|
||||||
this.data.corpus = {o: cQiCorpus};
|
this.data.corpus = {o: cQiCorpus};
|
||||||
this.data.corpus.o.getVisualizationData()
|
// this.data.corpus.o.getVisualizationData()
|
||||||
.then(
|
// .then(
|
||||||
(data) => {
|
// (data) => {
|
||||||
console.log(data);
|
// console.log(data);
|
||||||
this.renderGeneralCorpusInfo(data);
|
// this.renderGeneralCorpusInfo(data);
|
||||||
this.renderTextInfoList(data);
|
// this.renderTextInfoList(data);
|
||||||
this.renderTextProportionsGraphic(data);
|
// this.renderTextProportionsGraphic(data);
|
||||||
this.renderFrequenciesGraphic(data);
|
// this.renderWordFrequenciesGraphic(data);
|
||||||
this.renderBoundsGraphic(data);
|
// this.renderBoundsGraphic(data);
|
||||||
}
|
// }
|
||||||
);
|
// );
|
||||||
// this.data.corpus.o.getCorpusData()
|
this.data.corpus.o.getCorpusData()
|
||||||
// .then(corpusData => {
|
.then(corpusData => {
|
||||||
// console.log(corpusData);
|
console.log(corpusData);
|
||||||
// this.renderGeneralCorpusInfo(corpusData);
|
this.renderGeneralCorpusInfo(corpusData);
|
||||||
// this.renderTextInfoList(corpusData);
|
this.renderTextInfoList(corpusData);
|
||||||
// this.renderTextProportionsGraphic(corpusData);
|
this.renderTextProportionsGraphic(corpusData);
|
||||||
// this.renderFrequenciesGraphic(corpusData);
|
this.renderFrequenciesGraphic(corpusData);
|
||||||
// this.renderBoundsGraphic(corpusData);
|
this.renderBoundsGraphic(corpusData);
|
||||||
// });
|
});
|
||||||
// TODO: Don't do this hgere
|
// TODO: Don't do this hgere
|
||||||
cQiCorpus.updateDb();
|
cQiCorpus.updateDb();
|
||||||
this.enableActionElements();
|
this.enableActionElements();
|
||||||
@ -117,29 +117,29 @@ class CorpusAnalysisApp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
renderGeneralCorpusInfo(corpusData) {
|
renderGeneralCorpusInfo(corpusData) {
|
||||||
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.counts.token;
|
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.lexicon[0].counts.token;
|
||||||
document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.counts.s;
|
document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.lexicon[0].counts.s;
|
||||||
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
|
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.word).length;
|
||||||
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
|
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.lemma).length;
|
||||||
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
|
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.pos).length;
|
||||||
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length;
|
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.simple_pos).length;
|
||||||
}
|
}
|
||||||
|
|
||||||
renderTextInfoList(corpusData) {
|
renderTextInfoList(corpusData) {
|
||||||
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
|
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
|
||||||
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
|
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
|
||||||
let texts = corpusData.s_attrs.text.lexicon;
|
let texts = corpusData.text.lexicon;
|
||||||
let textData = [];
|
let textData = [];
|
||||||
for (let i = 0; i < Object.entries(texts).length; i++) {
|
for (let i = 0; i < Object.entries(texts).length; i++) {
|
||||||
let resource = {
|
let resource = {
|
||||||
title: corpusData.values.s_attrs.text[i].title,
|
title: corpusData.lookups.text[i].title,
|
||||||
publishing_year: corpusData.values.s_attrs.text[i].publishing_year,
|
publishing_year: corpusData.lookups.text[i].publishing_year,
|
||||||
num_tokens: corpusData.s_attrs.text.lexicon[i].counts.token,
|
num_tokens: corpusData.text.lexicon[i].counts.token,
|
||||||
num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s,
|
num_sentences: corpusData.text.lexicon[i].counts.s,
|
||||||
num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length,
|
num_unique_words: Object.entries(corpusData.text.lexicon[i].freqs.word).length,
|
||||||
num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length,
|
num_unique_lemmas: Object.entries(corpusData.text.lexicon[i].freqs.lemma).length,
|
||||||
num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length,
|
num_unique_pos: Object.entries(corpusData.text.lexicon[i].freqs.pos).length,
|
||||||
num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length
|
num_unique_simple_pos: Object.entries(corpusData.text.lexicon[i].freqs.simple_pos).length
|
||||||
};
|
};
|
||||||
|
|
||||||
textData.push(resource);
|
textData.push(resource);
|
||||||
@ -148,29 +148,33 @@ class CorpusAnalysisApp {
|
|||||||
corpusTextInfoList.add(textData);
|
corpusTextInfoList.add(textData);
|
||||||
|
|
||||||
let textCountChipElement = document.querySelector('.text-count-chip');
|
let textCountChipElement = document.querySelector('.text-count-chip');
|
||||||
textCountChipElement.innerHTML = `Text count: ${corpusData.corpus.counts.text}`;
|
textCountChipElement.innerHTML = `Text count: ${Object.values(corpusData.text.lexicon).length}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
renderTextProportionsGraphic(corpusData) {
|
renderTextProportionsGraphic(corpusData) {
|
||||||
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
|
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
|
||||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
let texts = Object.entries(corpusData.text.lexicon);
|
||||||
let graphData = [
|
let graphData = [
|
||||||
{
|
{
|
||||||
values: texts.map(text => text[1].counts.token),
|
values: texts.map(text => text[1].counts.token),
|
||||||
labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
labels: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
|
||||||
type: 'pie'
|
type: 'pie'
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
let graphLayout = {
|
||||||
|
// height: 600,
|
||||||
|
// width: 900
|
||||||
|
};
|
||||||
let config = {responsive: true};
|
let config = {responsive: true};
|
||||||
|
|
||||||
Plotly.newPlot(textProportionsGraphicElement, graphData, config);
|
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
renderFrequenciesGraphic(corpusData) {
|
renderFrequenciesGraphic(corpusData) {
|
||||||
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
|
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
|
||||||
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
|
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
|
||||||
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
|
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
|
||||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
let texts = Object.entries(corpusData.text.lexicon);
|
||||||
|
|
||||||
|
|
||||||
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
|
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
|
||||||
@ -192,13 +196,13 @@ class CorpusAnalysisApp {
|
|||||||
|
|
||||||
createFrequenciesGraphData(category, texts, corpusData) {
|
createFrequenciesGraphData(category, texts, corpusData) {
|
||||||
let graphData = [];
|
let graphData = [];
|
||||||
let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
|
let sortedData = Object.entries(corpusData.corpus.lexicon[0].freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
|
||||||
|
|
||||||
for (let item of sortedData) {
|
for (let item of sortedData) {
|
||||||
let data = {
|
let data = {
|
||||||
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
x: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
|
||||||
y: texts.map(text => text[1].freqs[category][item[0]]),
|
y: texts.map(text => text[1].freqs[category][item[0]]),
|
||||||
name: corpusData.values.p_attrs[category][item[0]],
|
name: corpusData.lookups[category][item[0]],
|
||||||
type: 'bar'
|
type: 'bar'
|
||||||
};
|
};
|
||||||
graphData.push(data);
|
graphData.push(data);
|
||||||
@ -211,20 +215,22 @@ class CorpusAnalysisApp {
|
|||||||
let boundsGraphicElement = document.querySelector('#bounds-graphic');
|
let boundsGraphicElement = document.querySelector('#bounds-graphic');
|
||||||
|
|
||||||
let graphData = [];
|
let graphData = [];
|
||||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
let texts = Object.entries(corpusData.text.lexicon);
|
||||||
|
|
||||||
graphData = [{
|
graphData = [{
|
||||||
type: 'bar',
|
type: 'bar',
|
||||||
x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
|
x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
|
||||||
y: texts.map(text => corpusData.values.s_attrs.text[text[0]].title),
|
y: texts.map(text => corpusData.lookups.text[text[0]].title),
|
||||||
base: texts.map(text => text[1].bounds[0]),
|
base: texts.map(text => text[1].bounds[0]),
|
||||||
text: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
text: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`),
|
||||||
orientation: 'h',
|
orientation: 'h',
|
||||||
hovertemplate: '%{base} - %{x} <br>%{y}',
|
hovertemplate: '%{base} - %{x} <br>%{y}',
|
||||||
showlegend: false
|
showlegend: false
|
||||||
}];
|
}];
|
||||||
|
|
||||||
let graphLayout = {
|
let graphLayout = {
|
||||||
|
// height: 600,
|
||||||
|
// width: 2000,
|
||||||
barmode: 'stack',
|
barmode: 'stack',
|
||||||
type: 'bar',
|
type: 'bar',
|
||||||
showgrid: false,
|
showgrid: false,
|
||||||
|
@ -103,7 +103,6 @@ class CorpusTextInfoList extends ResourceList {
|
|||||||
if (sortElement !== clickedSortElement) {
|
if (sortElement !== clickedSortElement) {
|
||||||
sortElement.classList.remove('asc', 'desc');
|
sortElement.classList.remove('asc', 'desc');
|
||||||
sortElement.style.color = 'black';
|
sortElement.style.color = 'black';
|
||||||
sortElement.innerHTML = 'arrow_drop_down';
|
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
clickedSortElement.style.color = '#aa9cc9';
|
clickedSortElement.style.color = '#aa9cc9';
|
||||||
|
Loading…
x
Reference in New Issue
Block a user