diff --git a/app/corpora/cqi_over_socketio/cqi.py b/app/corpora/cqi_over_socketio/cqi.py index 9d0fbfd6..402b6283 100644 --- a/app/corpora/cqi_over_socketio/cqi.py +++ b/app/corpora/cqi_over_socketio/cqi.py @@ -18,8 +18,8 @@ def cqi_connect(cqi_client: cqi.CQiClient): 'msg': 'Internal Server Error', 'payload': {'code': e.args[0], 'desc': e.args[1]} } - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} + payload = {'code': cqi_status.code, + 'msg': cqi_status.__class__.__name__} return {'code': 200, 'msg': 'OK', 'payload': payload} @@ -28,8 +28,8 @@ def cqi_connect(cqi_client: cqi.CQiClient): @cqi_over_socketio def cqi_disconnect(cqi_client: cqi.CQiClient): cqi_status = cqi_client.disconnect() - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} + payload = {'code': cqi_status.code, + 'msg': cqi_status.__class__.__name__} return {'code': 200, 'msg': 'OK', 'payload': payload} @@ -38,6 +38,6 @@ def cqi_disconnect(cqi_client: cqi.CQiClient): @cqi_over_socketio def cqi_ping(cqi_client: cqi.CQiClient): cqi_status = cqi_client.ping() - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} + payload = {'code': cqi_status.code, + 'msg': cqi_status.__class__.__name__} return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py index 3e49ac09..b9dbe425 100644 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py +++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus.py @@ -16,8 +16,8 @@ from .utils import cqi_over_socketio, lookups_by_cpos def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str): cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_status = cqi_corpus.drop() - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} + payload = {'code': cqi_status.code, + 'msg': cqi_status.__class__.__name__} return {'code': 200, 'msg': 'OK', 'payload': payload} @@ -27,8 +27,8 @@ def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str): def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_status = cqi_corpus.query(subcorpus_name, query) - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} + payload = {'code': cqi_status.code, + 'msg': cqi_status.__class__.__name__} return {'code': 200, 'msg': 'OK', 'payload': payload} @@ -50,177 +50,186 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str): @cqi_over_socketio def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str): corpus = cqi_client.corpora.get(corpus_name) - # s_attrs = [x for x in corpus.structural_attributes.list() if not x.has_values] - # p_attrs = corpus.positional_attributes.list() - # payload = { - # 's_attrs': {}, - # 'p_attrs': {}, - # 'values': { - # 's_attrs': {}, - # 'p_attrs': {} - # } - # } - # for s_attr in s_attrs: - # s_attr_lbound, s_attr_rbound = s_attr.cpos_by_id(text_id) - # s_attr_cpos_range = range(s_attr_lbound, s_attr_rbound + 1) - # payload['text']['lexicon'][text_id] = { - # 's_attrs': [s_attr_lbound, s_attr_rbound], - # 'counts': { - # 'token': s_attr_rbound - s_attr_lbound + 1 - # }, - # 'freqs': { - # p_attr.name: dict(Counter(p_attr.ids_by_cpos(list(s_attr_cpos_range)))) - # for p_attr in p_attrs - # } - # } - # for p_attr in p_attrs: - # payload['p_attrs'] = dict( - - # ) - # payload['values']['p_attrs'] = dict( - # zip( - # range(0, p_attr.lexicon_size), - # p_attr.values_by_ids(list(range(0, p_attr.lexicon_size))) - # ) - # ) - text = corpus.structural_attributes.get('text') - text_value_names = [] - text_values = [] - for text_sub_attr in corpus.structural_attributes.list(filters={'part_of': text}): - text_value_names.append(text_sub_attr.name[(len(text.name) + 1):]) - text_values.append(text_sub_attr.values_by_ids(list(range(0, text.size)))) - s = corpus.structural_attributes.get('s') - ent = corpus.structural_attributes.get('ent') - ent_value_names = [] - ent_values = [] - for ent_sub_attr in corpus.structural_attributes.list(filters={'part_of': ent}): - ent_value_names.append(ent_sub_attr.name[(len(ent.name) + 1):]) - ent_values.append(ent_sub_attr.values_by_ids(list(range(0, ent.size)))) - word = corpus.positional_attributes.get('word') - lemma = corpus.positional_attributes.get('lemma') - pos = corpus.positional_attributes.get('pos') - simple_pos = corpus.positional_attributes.get('simple_pos') - payload = {} - payload['corpus'] = {'lexicon': {}, 'values': []} - payload['corpus']['lexicon'][0] = { - 'bounds': [0, corpus.size - 1], - 'counts': { - 'text': text.size, - 's': s.size, - 'ent': ent.size, - 'token': corpus.size + payload = { + 'corpus': { + 'bounds': [0, corpus.size - 1], + 'counts': {}, + 'freqs': {} }, - 'freqs': { - 'word': dict( - zip( - range(0, word.lexicon_size), - word.freqs_by_ids(list(range(0, word.lexicon_size))) - ) - ), - 'lemma': dict( - zip( - range(0, lemma.lexicon_size), - lemma.freqs_by_ids(list(range(0, lemma.lexicon_size))) - ) - ), - 'pos': dict( - zip( - range(0, pos.lexicon_size), - pos.freqs_by_ids(list(range(0, pos.lexicon_size))) - ) - ), - 'simple_pos': dict( - zip( - range(0, simple_pos.lexicon_size), - simple_pos.freqs_by_ids(list(range(0, simple_pos.lexicon_size))) - ) - ) - } + 'p_attrs': {}, + 's_attrs': {}, + 'values': {'p_attrs': {}, 's_attrs': {}} } - payload['text'] = {'lexicon': {}, 'values': None} - for text_id in range(0, text.size): - text_lbound, text_rbound = text.cpos_by_id(text_id) - text_cpos_range = range(text_lbound, text_rbound + 1) - text_s_ids = s.ids_by_cpos(list(text_cpos_range)) - text_ent_ids = ent.ids_by_cpos(list(text_cpos_range)) - payload['text']['lexicon'][text_id] = { - 'bounds': [text_lbound, text_rbound], - 'counts': { - 's': len([x for x in text_s_ids if x != -1]), - 'ent': len([x for x in text_ent_ids if x != -1]), - 'token': text_rbound - text_lbound + 1 - }, - 'freqs': { - 'word': dict( - Counter(word.ids_by_cpos(list(text_cpos_range))) - ), - 'lemma': dict( - Counter(lemma.ids_by_cpos(list(text_cpos_range))) - ), - 'pos': dict( - Counter(pos.ids_by_cpos(list(text_cpos_range))) - ), - 'simple_pos': dict( - Counter(simple_pos.ids_by_cpos(list(text_cpos_range))) - ) - } - } - payload['text']['values'] = text_value_names - payload['s'] = {'lexicon': {}, 'values': None} - for s_id in range(0, s.size): - payload['s']['lexicon'][s_id] = { - # 'bounds': s.cpos_by_id(s_id) - } - payload['s']['values'] = [ - sub_attr.name[(len(s.name) + 1):] - for sub_attr in corpus.structural_attributes.list(filters={'part_of': s}) - ] - payload['ent'] = {'lexicon': {}, 'values': None} - for ent_id in range(0, ent.size): - payload['ent']['lexicon'][ent_id] = { - # 'bounds': ent.cpos_by_id(ent_id) - } - payload['ent']['values'] = ent_value_names - payload['lookups'] = { - 'corpus': {}, - 'text': { - text_id: { - text_value_name: text_values[text_value_name_idx][text_id_idx] - for text_value_name_idx, text_value_name in enumerate(text_value_names) - } for text_id_idx, text_id in enumerate(range(0, text.size)) - }, - 's': {}, - 'ent': { - ent_id: { - ent_value_name: ent_values[ent_value_name_idx][ent_id_idx] - for ent_value_name_idx, ent_value_name in enumerate(ent_value_names) - } for ent_id_idx, ent_id in enumerate(range(0, ent.size)) - }, - 'word': dict( + for p_attr in corpus.positional_attributes.list(): + payload['corpus']['freqs'][p_attr.name] = dict( zip( - range(0, word.lexicon_size), - word.values_by_ids(list(range(0, word.lexicon_size))) - ) - ), - 'lemma': dict( - zip( - range(0, lemma.lexicon_size), - lemma.values_by_ids(list(range(0, lemma.lexicon_size))) - ) - ), - 'pos': dict( - zip( - range(0, pos.lexicon_size), - pos.values_by_ids(list(range(0, pos.lexicon_size))) - ) - ), - 'simple_pos': dict( - zip( - range(0, simple_pos.lexicon_size), - simple_pos.values_by_ids(list(range(0, simple_pos.lexicon_size))) + range(0, p_attr.lexicon_size), + p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size))) ) ) - } + payload['p_attrs'][p_attr.name] = dict( + zip( + range(0, corpus.size), + p_attr.ids_by_cpos(list(range(0, corpus.size))) + ) + ) + payload['values']['p_attrs'][p_attr.name] = dict( + zip( + range(0, p_attr.lexicon_size), + p_attr.values_by_ids(list(range(0, p_attr.lexicon_size))) + ) + ) + s = corpus.structural_attributes.get('s') + ent = corpus.structural_attributes.get('ent') + for s_attr in corpus.structural_attributes.list(): + if s_attr.has_values: + continue + payload['corpus']['counts'][s_attr.name] = s_attr.size + payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': []} + for id in range(0, s_attr.size): + payload['s_attrs'][s_attr.name]['lexicon'][id] = {} + if s_attr.name != 'text': + continue + lbound, rbound = s_attr.cpos_by_id(id) + payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound] + cpos_range = range(lbound, rbound + 1) + payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {} + payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in s.ids_by_cpos(list(cpos_range)) if x != -1}) + payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in ent.ids_by_cpos(list(cpos_range)) if x != -1}) + payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1 + payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {} + for p_attr in corpus.positional_attributes.list(): + payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range)))) + # for s_attr in s_attrs: + # payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': []} + # payload['values']['s_attrs'][s_attr.name] = {} + # for id in range(0, s_attr.size): + # payload['s_attrs'][s_attr.name]['lexicon'][id] = {} + # if s_attr.name != 'text': + # continue + # lbound, rbound = s_attr.cpos_by_id(id) + # cpos_range = range(lbound, rbound + 1) + # # s_ids + # payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound] + # payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {} + # payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1 + # payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = { + # p_attr.name: dict(Counter(p_attr.ids_by_cpos(list(cpos_range)))) + # for p_attr in p_attrs + # } + # for sub_attr in corpus.structural_attributes.list(filters={'part_of': s_attr}): + # payload['s_attrs'][s_attr.name]['values'].append(sub_attr.name[(len(s_attr.name) + 1):]) + # payload['values']['s_attrs'][s_attr.name][sub_attr.name[(len(s_attr.name) + 1):]] = dict( + # zip( + # range(0, sub_attr.size), + # sub_attr.values_by_ids(list(range(0, sub_attr.size))) + # ) + # ) + # text = corpus.structural_attributes.get('text') + + # text = corpus.structural_attributes.get('text') + # text_value_names = [] + # text_values = [] + # for text_sub_attr in corpus.structural_attributes.list(filters={'part_of': text}): + # text_value_names.append(text_sub_attr.name[(len(text.name) + 1):]) + # text_values.append(text_sub_attr.values_by_ids(list(range(0, text.size)))) + # s = corpus.structural_attributes.get('s') + # ent = corpus.structural_attributes.get('ent') + # ent_value_names = [] + # ent_values = [] + # for ent_sub_attr in corpus.structural_attributes.list(filters={'part_of': ent}): + # ent_value_names.append(ent_sub_attr.name[(len(ent.name) + 1):]) + # ent_values.append(ent_sub_attr.values_by_ids(list(range(0, ent.size)))) + # word = corpus.positional_attributes.get('word') + # lemma = corpus.positional_attributes.get('lemma') + # pos = corpus.positional_attributes.get('pos') + # simple_pos = corpus.positional_attributes.get('simple_pos') + # payload = {} + + # payload['text'] = {'lexicon': {}, 'values': None} + # for text_id in range(0, text.size): + # text_lbound, text_rbound = text.cpos_by_id(text_id) + # text_cpos_range = range(text_lbound, text_rbound + 1) + # text_s_ids = s.ids_by_cpos(list(text_cpos_range)) + # text_ent_ids = ent.ids_by_cpos(list(text_cpos_range)) + # payload['text']['lexicon'][text_id] = { + # 'bounds': [text_lbound, text_rbound], + # 'counts': { + # 's': len([x for x in text_s_ids if x != -1]), + # 'ent': len([x for x in text_ent_ids if x != -1]), + # 'token': text_rbound - text_lbound + 1 + # }, + # 'freqs': { + # 'word': dict( + # Counter(word.ids_by_cpos(list(text_cpos_range))) + # ), + # 'lemma': dict( + # Counter(lemma.ids_by_cpos(list(text_cpos_range))) + # ), + # 'pos': dict( + # Counter(pos.ids_by_cpos(list(text_cpos_range))) + # ), + # 'simple_pos': dict( + # Counter(simple_pos.ids_by_cpos(list(text_cpos_range))) + # ) + # } + # } + # payload['text']['values'] = text_value_names + # payload['s'] = {'lexicon': {}, 'values': None} + # for s_id in range(0, s.size): + # payload['s']['lexicon'][s_id] = { + # # 'bounds': s.cpos_by_id(s_id) + # } + # payload['s']['values'] = [ + # sub_attr.name[(len(s.name) + 1):] + # for sub_attr in corpus.structural_attributes.list(filters={'part_of': s}) + # ] + # payload['ent'] = {'lexicon': {}, 'values': None} + # for ent_id in range(0, ent.size): + # payload['ent']['lexicon'][ent_id] = { + # # 'bounds': ent.cpos_by_id(ent_id) + # } + # payload['ent']['values'] = ent_value_names + # payload['lookups'] = { + # 'corpus': {}, + # 'text': { + # text_id: { + # text_value_name: text_values[text_value_name_idx][text_id_idx] + # for text_value_name_idx, text_value_name in enumerate(text_value_names) + # } for text_id_idx, text_id in enumerate(range(0, text.size)) + # }, + # 's': {}, + # 'ent': { + # ent_id: { + # ent_value_name: ent_values[ent_value_name_idx][ent_id_idx] + # for ent_value_name_idx, ent_value_name in enumerate(ent_value_names) + # } for ent_id_idx, ent_id in enumerate(range(0, ent.size)) + # }, + # 'word': dict( + # zip( + # range(0, word.lexicon_size), + # word.values_by_ids(list(range(0, word.lexicon_size))) + # ) + # ), + # 'lemma': dict( + # zip( + # range(0, lemma.lexicon_size), + # lemma.values_by_ids(list(range(0, lemma.lexicon_size))) + # ) + # ), + # 'pos': dict( + # zip( + # range(0, pos.lexicon_size), + # pos.values_by_ids(list(range(0, pos.lexicon_size))) + # ) + # ), + # 'simple_pos': dict( + # zip( + # range(0, simple_pos.lexicon_size), + # simple_pos.values_by_ids(list(range(0, simple_pos.lexicon_size))) + # ) + # ) + # } # print(payload) return {'code': 200, 'msg': 'OK', 'payload': payload} diff --git a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py index 578968be..38540df7 100644 --- a/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py +++ b/app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py @@ -32,8 +32,8 @@ def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corp cqi_corpus = cqi_client.corpora.get(corpus_name) cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) cqi_status = cqi_subcorpus.drop() - payload = {'code': cqi_status, - 'msg': cqi.api.specification.lookup[cqi_status]} + payload = {'code': cqi_status.code, + 'msg': cqi_status.__class__.__name__} return {'code': 200, 'msg': 'OK', 'payload': payload}