mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-10-31 10:42:43 +00:00 
			
		
		
		
	Compare commits
	
		
			10 Commits
		
	
	
		
			e194ce7541
			...
			6c31788402
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 6c31788402 | ||
|  | 1c98c5070a | ||
|  | 1e33366820 | ||
|  | 71013f1dc5 | ||
|  | 142c82cc36 | ||
|  | f84ac48975 | ||
|  | 2739dc4b4f | ||
|  | eb2abf8282 | ||
|  | 529c778772 | ||
|  | be51044059 | 
| @@ -18,8 +18,8 @@ def cqi_connect(cqi_client: cqi.CQiClient): | |||||||
|             'msg': 'Internal Server Error', |             'msg': 'Internal Server Error', | ||||||
|             'payload': {'code': e.args[0], 'desc': e.args[1]} |             'payload': {'code': e.args[0], 'desc': e.args[1]} | ||||||
|         } |         } | ||||||
|     payload = {'code': cqi_status, |     payload = {'code': cqi_status.code, | ||||||
|                'msg': cqi.api.specification.lookup[cqi_status]} |                'msg': cqi_status.__class__.__name__} | ||||||
|     return {'code': 200, 'msg': 'OK', 'payload': payload} |     return {'code': 200, 'msg': 'OK', 'payload': payload} | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -28,8 +28,8 @@ def cqi_connect(cqi_client: cqi.CQiClient): | |||||||
| @cqi_over_socketio | @cqi_over_socketio | ||||||
| def cqi_disconnect(cqi_client: cqi.CQiClient): | def cqi_disconnect(cqi_client: cqi.CQiClient): | ||||||
|     cqi_status = cqi_client.disconnect() |     cqi_status = cqi_client.disconnect() | ||||||
|     payload = {'code': cqi_status, |     payload = {'code': cqi_status.code, | ||||||
|                'msg': cqi.api.specification.lookup[cqi_status]} |                'msg': cqi_status.__class__.__name__} | ||||||
|     return {'code': 200, 'msg': 'OK', 'payload': payload} |     return {'code': 200, 'msg': 'OK', 'payload': payload} | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -38,6 +38,6 @@ def cqi_disconnect(cqi_client: cqi.CQiClient): | |||||||
| @cqi_over_socketio | @cqi_over_socketio | ||||||
| def cqi_ping(cqi_client: cqi.CQiClient): | def cqi_ping(cqi_client: cqi.CQiClient): | ||||||
|     cqi_status = cqi_client.ping() |     cqi_status = cqi_client.ping() | ||||||
|     payload = {'code': cqi_status, |     payload = {'code': cqi_status.code, | ||||||
|                'msg': cqi.api.specification.lookup[cqi_status]} |                'msg': cqi_status.__class__.__name__} | ||||||
|     return {'code': 200, 'msg': 'OK', 'payload': payload} |     return {'code': 200, 'msg': 'OK', 'payload': payload} | ||||||
|   | |||||||
| @@ -1,8 +1,9 @@ | |||||||
| from collections import Counter | from collections import Counter | ||||||
| from flask import session | from flask import session | ||||||
| import cqi | import cqi | ||||||
|  | import json | ||||||
| import math | import math | ||||||
| import random | import os | ||||||
| from app import db, socketio | from app import db, socketio | ||||||
| from app.decorators import socketio_login_required | from app.decorators import socketio_login_required | ||||||
| from app.models import Corpus | from app.models import Corpus | ||||||
| @@ -16,8 +17,8 @@ from .utils import cqi_over_socketio, lookups_by_cpos | |||||||
| def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str): | def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str): | ||||||
|     cqi_corpus = cqi_client.corpora.get(corpus_name) |     cqi_corpus = cqi_client.corpora.get(corpus_name) | ||||||
|     cqi_status = cqi_corpus.drop() |     cqi_status = cqi_corpus.drop() | ||||||
|     payload = {'code': cqi_status, |     payload = {'code': cqi_status.code, | ||||||
|                'msg': cqi.api.specification.lookup[cqi_status]} |                'msg': cqi_status.__class__.__name__} | ||||||
|     return {'code': 200, 'msg': 'OK', 'payload': payload} |     return {'code': 200, 'msg': 'OK', 'payload': payload} | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -27,8 +28,8 @@ def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str): | |||||||
| def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str):  # noqa | def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str):  # noqa | ||||||
|     cqi_corpus = cqi_client.corpora.get(corpus_name) |     cqi_corpus = cqi_client.corpora.get(corpus_name) | ||||||
|     cqi_status = cqi_corpus.query(subcorpus_name, query) |     cqi_status = cqi_corpus.query(subcorpus_name, query) | ||||||
|     payload = {'code': cqi_status, |     payload = {'code': cqi_status.code, | ||||||
|                'msg': cqi.api.specification.lookup[cqi_status]} |                'msg': cqi_status.__class__.__name__} | ||||||
|     return {'code': 200, 'msg': 'OK', 'payload': payload} |     return {'code': 200, 'msg': 'OK', 'payload': payload} | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -49,179 +50,109 @@ def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str): | |||||||
| @socketio_login_required | @socketio_login_required | ||||||
| @cqi_over_socketio | @cqi_over_socketio | ||||||
| def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str): | def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str): | ||||||
|     corpus = cqi_client.corpora.get(corpus_name) |     corpus = Corpus.query.get(session['d']['corpus_id']) | ||||||
|     # s_attrs = [x for x in corpus.structural_attributes.list() if not x.has_values] |     visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json') | ||||||
|     # p_attrs = corpus.positional_attributes.list() |     if os.path.exists(visualization_data_file_path): | ||||||
|     # payload = { |         with open(visualization_data_file_path, 'r') as f: | ||||||
|     #     's_attrs': {}, |             payload = json.load(f) | ||||||
|     #     'p_attrs': {}, |         return {'code': 200, 'msg': 'OK', 'payload': payload} | ||||||
|     #     'values': { |     cqi_corpus = cqi_client.corpora.get(corpus_name) | ||||||
|     #         's_attrs': {}, |     ########################################################################## | ||||||
|     #         'p_attrs': {} |     # A faster way to get cpos boundaries for smaller s_attrs                # | ||||||
|     #     } |     ########################################################################## | ||||||
|     # } |     # cqi_corpus.query('Last', '<s> []* </s>;') | ||||||
|     # for s_attr in s_attrs: |     # cqi_subcorpus = cqi_corpus.subcorpora.get('Last') | ||||||
|     #     s_attr_lbound, s_attr_rbound = s_attr.cpos_by_id(text_id) |     # print(cqi_subcorpus.size) | ||||||
|     #     s_attr_cpos_range = range(s_attr_lbound, s_attr_rbound + 1) |     # first_match = 0 | ||||||
|     #     payload['text']['lexicon'][text_id] = { |     # last_match = cqi_subcorpus.attrs['size'] - 1 | ||||||
|     #         's_attrs': [s_attr_lbound, s_attr_rbound], |     # match_boundaries = zip( | ||||||
|     #         'counts': { |     #     list(range(first_match, last_match + 1)), | ||||||
|     #             'token': s_attr_rbound - s_attr_lbound + 1 |     #     cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match), | ||||||
|     #         }, |     #     cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match) | ||||||
|     #         'freqs': { |  | ||||||
|     #             p_attr.name: dict(Counter(p_attr.ids_by_cpos(list(s_attr_cpos_range)))) |  | ||||||
|     #             for p_attr in p_attrs |  | ||||||
|     #         } |  | ||||||
|     #     } |  | ||||||
|     # for p_attr in p_attrs: |  | ||||||
|     #     payload['p_attrs'] = dict( |  | ||||||
|  |  | ||||||
|     # ) |     # ) | ||||||
|     #     payload['values']['p_attrs'] = dict( |     # for x in match_boundaries: | ||||||
|     #         zip( |     #     print(x) | ||||||
|     #             range(0, p_attr.lexicon_size), |     cqi_p_attrs = { | ||||||
|     #             p_attr.values_by_ids(list(range(0, p_attr.lexicon_size))) |         p_attr.name: p_attr | ||||||
|     #         ) |         for p_attr in cqi_corpus.positional_attributes.list() | ||||||
|     #     ) |     } | ||||||
|     text = corpus.structural_attributes.get('text') |     cqi_s_attrs = { | ||||||
|     text_value_names = [] |         s_attr.name: s_attr | ||||||
|     text_values = [] |         for s_attr in cqi_corpus.structural_attributes.list() | ||||||
|     for text_sub_attr in corpus.structural_attributes.list(filters={'part_of': text}): |     } | ||||||
|         text_value_names.append(text_sub_attr.name[(len(text.name) + 1):]) |     payload = { | ||||||
|         text_values.append(text_sub_attr.values_by_ids(list(range(0, text.size)))) |         'corpus': { | ||||||
|     s = corpus.structural_attributes.get('s') |             'bounds': [0, cqi_corpus.size - 1], | ||||||
|     ent = corpus.structural_attributes.get('ent') |  | ||||||
|     ent_value_names = [] |  | ||||||
|     ent_values = [] |  | ||||||
|     for ent_sub_attr in corpus.structural_attributes.list(filters={'part_of': ent}): |  | ||||||
|         ent_value_names.append(ent_sub_attr.name[(len(ent.name) + 1):]) |  | ||||||
|         ent_values.append(ent_sub_attr.values_by_ids(list(range(0, ent.size)))) |  | ||||||
|     word = corpus.positional_attributes.get('word') |  | ||||||
|     lemma = corpus.positional_attributes.get('lemma') |  | ||||||
|     pos = corpus.positional_attributes.get('pos') |  | ||||||
|     simple_pos = corpus.positional_attributes.get('simple_pos') |  | ||||||
|     payload = {} |  | ||||||
|     payload['corpus'] = {'lexicon': {}, 'values': []} |  | ||||||
|     payload['corpus']['lexicon'][0] = { |  | ||||||
|         'bounds': [0, corpus.size - 1], |  | ||||||
|             'counts': { |             'counts': { | ||||||
|             'text': text.size, |                 'token': cqi_corpus.size | ||||||
|             's': s.size, |  | ||||||
|             'ent': ent.size, |  | ||||||
|             'token': corpus.size |  | ||||||
|             }, |             }, | ||||||
|         'freqs': { |             'freqs': {} | ||||||
|             'word': dict( |  | ||||||
|                 zip( |  | ||||||
|                     range(0, word.lexicon_size), |  | ||||||
|                     word.freqs_by_ids(list(range(0, word.lexicon_size))) |  | ||||||
|                 ) |  | ||||||
|             ), |  | ||||||
|             'lemma': dict( |  | ||||||
|                 zip( |  | ||||||
|                     range(0, lemma.lexicon_size), |  | ||||||
|                     lemma.freqs_by_ids(list(range(0, lemma.lexicon_size))) |  | ||||||
|                 ) |  | ||||||
|             ), |  | ||||||
|             'pos': dict( |  | ||||||
|                 zip( |  | ||||||
|                     range(0, pos.lexicon_size), |  | ||||||
|                     pos.freqs_by_ids(list(range(0, pos.lexicon_size))) |  | ||||||
|                 ) |  | ||||||
|             ), |  | ||||||
|             'simple_pos': dict( |  | ||||||
|                 zip( |  | ||||||
|                     range(0, simple_pos.lexicon_size), |  | ||||||
|                     simple_pos.freqs_by_ids(list(range(0, simple_pos.lexicon_size))) |  | ||||||
|                 ) |  | ||||||
|             ) |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     payload['text'] = {'lexicon': {}, 'values': None} |  | ||||||
|     for text_id in range(0, text.size): |  | ||||||
|         text_lbound, text_rbound = text.cpos_by_id(text_id) |  | ||||||
|         text_cpos_range = range(text_lbound, text_rbound + 1) |  | ||||||
|         text_s_ids = s.ids_by_cpos(list(text_cpos_range)) |  | ||||||
|         text_ent_ids = ent.ids_by_cpos(list(text_cpos_range)) |  | ||||||
|         payload['text']['lexicon'][text_id] = { |  | ||||||
|             'bounds': [text_lbound, text_rbound], |  | ||||||
|             'counts': { |  | ||||||
|                 's': len([x for x in text_s_ids if x != -1]), |  | ||||||
|                 'ent': len([x for x in text_ent_ids if x != -1]), |  | ||||||
|                 'token': text_rbound - text_lbound + 1 |  | ||||||
|         }, |         }, | ||||||
|             'freqs': { |         'p_attrs': {}, | ||||||
|                 'word': dict( |         's_attrs': {}, | ||||||
|                     Counter(word.ids_by_cpos(list(text_cpos_range))) |         'values': {'p_attrs': {}, 's_attrs': {}} | ||||||
|                 ), |     } | ||||||
|                 'lemma': dict( |     for p_attr in cqi_p_attrs.values(): | ||||||
|                     Counter(lemma.ids_by_cpos(list(text_cpos_range))) |         payload['corpus']['freqs'][p_attr.name] = dict( | ||||||
|                 ), |             zip( | ||||||
|                 'pos': dict( |                 range(0, p_attr.lexicon_size), | ||||||
|                     Counter(pos.ids_by_cpos(list(text_cpos_range))) |                 p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size))) | ||||||
|                 ), |  | ||||||
|                 'simple_pos': dict( |  | ||||||
|                     Counter(simple_pos.ids_by_cpos(list(text_cpos_range))) |  | ||||||
|             ) |             ) | ||||||
|             } |         ) | ||||||
|         } |         payload['p_attrs'][p_attr.name] = dict( | ||||||
|     payload['text']['values'] = text_value_names |             zip( | ||||||
|     payload['s'] = {'lexicon': {}, 'values': None} |                 range(0, cqi_corpus.size), | ||||||
|     for s_id in range(0, s.size): |                 p_attr.ids_by_cpos(list(range(0, cqi_corpus.size))) | ||||||
|         payload['s']['lexicon'][s_id] = { |             ) | ||||||
|             # 'bounds': s.cpos_by_id(s_id) |         ) | ||||||
|         } |         payload['values']['p_attrs'][p_attr.name] = dict( | ||||||
|     payload['s']['values'] = [ |             zip( | ||||||
|         sub_attr.name[(len(s.name) + 1):] |                 range(0, p_attr.lexicon_size), | ||||||
|         for sub_attr in corpus.structural_attributes.list(filters={'part_of': s}) |                 p_attr.values_by_ids(list(range(0, p_attr.lexicon_size))) | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |     for s_attr in cqi_s_attrs.values(): | ||||||
|  |         if s_attr.has_values: | ||||||
|  |             continue | ||||||
|  |         payload['corpus']['counts'][s_attr.name] = s_attr.size | ||||||
|  |         payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None} | ||||||
|  |         payload['values']['s_attrs'][s_attr.name] = {} | ||||||
|  |         for id in range(0, s_attr.size): | ||||||
|  |             payload['s_attrs'][s_attr.name]['lexicon'][id] = {} | ||||||
|  |             lbound, rbound = s_attr.cpos_by_id(id) | ||||||
|  |             payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound] | ||||||
|  |             payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {} | ||||||
|  |             payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1 | ||||||
|  |             if s_attr.name not in ['text', 's']: | ||||||
|  |                 continue | ||||||
|  |             cpos_range = range(lbound, rbound + 1) | ||||||
|  |             payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1}) | ||||||
|  |             if s_attr.name != 'text': | ||||||
|  |                 continue | ||||||
|  |             payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1}) | ||||||
|  |             payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {} | ||||||
|  |             for p_attr in cqi_p_attrs.values(): | ||||||
|  |                 payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range)))) | ||||||
|  |         sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr}) | ||||||
|  |         s_attr_value_names = [ | ||||||
|  |             sub_s_attr.name[(len(s_attr.name) + 1):] | ||||||
|  |             for sub_s_attr in sub_s_attrs | ||||||
|         ] |         ] | ||||||
|     payload['ent'] = {'lexicon': {}, 'values': None} |         sub_s_attr_values = [ | ||||||
|     for ent_id in range(0, ent.size): |             sub_s_attr.values_by_ids(list(range(0, s_attr.size))) | ||||||
|         payload['ent']['lexicon'][ent_id] = { |             for sub_s_attr in sub_s_attrs | ||||||
|             # 'bounds': ent.cpos_by_id(ent_id) |         ] | ||||||
|  |         payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names | ||||||
|  |         payload['values']['s_attrs'][s_attr.name] = { | ||||||
|  |             s_attr_id: { | ||||||
|  |                 s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx] | ||||||
|  |                 for s_attr_value_name_idx, s_attr_value_name in enumerate( | ||||||
|  |                     payload['s_attrs'][s_attr.name]['values'] | ||||||
|  |                 ) | ||||||
|  |             } for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size)) | ||||||
|         } |         } | ||||||
|     payload['ent']['values'] = ent_value_names |     with open(visualization_data_file_path, 'w') as f: | ||||||
|     payload['lookups'] = { |         json.dump(payload, f) | ||||||
|         'corpus': {}, |  | ||||||
|         'text': { |  | ||||||
|             text_id: { |  | ||||||
|                 text_value_name: text_values[text_value_name_idx][text_id_idx] |  | ||||||
|                 for text_value_name_idx, text_value_name in enumerate(text_value_names) |  | ||||||
|             } for text_id_idx, text_id in enumerate(range(0, text.size)) |  | ||||||
|         }, |  | ||||||
|         's': {}, |  | ||||||
|         'ent': { |  | ||||||
|             ent_id: { |  | ||||||
|                 ent_value_name: ent_values[ent_value_name_idx][ent_id_idx] |  | ||||||
|                 for ent_value_name_idx, ent_value_name in enumerate(ent_value_names) |  | ||||||
|             } for ent_id_idx, ent_id in enumerate(range(0, ent.size)) |  | ||||||
|         }, |  | ||||||
|         'word': dict( |  | ||||||
|             zip( |  | ||||||
|                 range(0, word.lexicon_size), |  | ||||||
|                 word.values_by_ids(list(range(0, word.lexicon_size))) |  | ||||||
|             ) |  | ||||||
|         ), |  | ||||||
|         'lemma': dict( |  | ||||||
|             zip( |  | ||||||
|                 range(0, lemma.lexicon_size), |  | ||||||
|                 lemma.values_by_ids(list(range(0, lemma.lexicon_size))) |  | ||||||
|             ) |  | ||||||
|         ), |  | ||||||
|         'pos': dict( |  | ||||||
|             zip( |  | ||||||
|                 range(0, pos.lexicon_size), |  | ||||||
|                 pos.values_by_ids(list(range(0, pos.lexicon_size))) |  | ||||||
|             ) |  | ||||||
|         ), |  | ||||||
|         'simple_pos': dict( |  | ||||||
|             zip( |  | ||||||
|                 range(0, simple_pos.lexicon_size), |  | ||||||
|                 simple_pos.values_by_ids(list(range(0, simple_pos.lexicon_size))) |  | ||||||
|             ) |  | ||||||
|         ) |  | ||||||
|     } |  | ||||||
|     # print(payload) |  | ||||||
|     return {'code': 200, 'msg': 'OK', 'payload': payload} |     return {'code': 200, 'msg': 'OK', 'payload': payload} | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -32,8 +32,8 @@ def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corp | |||||||
|     cqi_corpus = cqi_client.corpora.get(corpus_name) |     cqi_corpus = cqi_client.corpora.get(corpus_name) | ||||||
|     cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) |     cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) | ||||||
|     cqi_status = cqi_subcorpus.drop() |     cqi_status = cqi_subcorpus.drop() | ||||||
|     payload = {'code': cqi_status, |     payload = {'code': cqi_status.code, | ||||||
|                'msg': cqi.api.specification.lookup[cqi_status]} |                'msg': cqi_status.__class__.__name__} | ||||||
|     return {'code': 200, 'msg': 'OK', 'payload': payload} |     return {'code': 200, 'msg': 'OK', 'payload': payload} | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -34,26 +34,26 @@ class CorpusAnalysisApp { | |||||||
|       .then( |       .then( | ||||||
|         cQiCorpus => { |         cQiCorpus => { | ||||||
|           this.data.corpus = {o: cQiCorpus}; |           this.data.corpus = {o: cQiCorpus}; | ||||||
|           // this.data.corpus.o.getVisualizationData() |           this.data.corpus.o.getVisualizationData() | ||||||
|           //   .then( |             .then( | ||||||
|           //     (data) => { |               (data) => { | ||||||
|           //       console.log(data); |                 console.log(data); | ||||||
|           //       this.renderGeneralCorpusInfo(data); |                 this.renderGeneralCorpusInfo(data); | ||||||
|           //       this.renderTextInfoList(data); |                 this.renderTextInfoList(data); | ||||||
|           //       this.renderTextProportionsGraphic(data); |                 this.renderTextProportionsGraphic(data); | ||||||
|           //       this.renderWordFrequenciesGraphic(data); |                 this.renderFrequenciesGraphic(data); | ||||||
|           //       this.renderBoundsGraphic(data); |                 this.renderBoundsGraphic(data); | ||||||
|           //     } |               } | ||||||
|           //   ); |             ); | ||||||
|           this.data.corpus.o.getCorpusData() |           // this.data.corpus.o.getCorpusData() | ||||||
|             .then(corpusData => { |           //   .then(corpusData => { | ||||||
|               console.log(corpusData); |           //     console.log(corpusData); | ||||||
|               this.renderGeneralCorpusInfo(corpusData); |           //     this.renderGeneralCorpusInfo(corpusData); | ||||||
|               this.renderTextInfoList(corpusData); |           //     this.renderTextInfoList(corpusData); | ||||||
|               this.renderTextProportionsGraphic(corpusData); |           //     this.renderTextProportionsGraphic(corpusData); | ||||||
|               this.renderFrequenciesGraphic(corpusData); |           //     this.renderFrequenciesGraphic(corpusData); | ||||||
|               this.renderBoundsGraphic(corpusData); |           //     this.renderBoundsGraphic(corpusData); | ||||||
|             }); |           //   }); | ||||||
|           // TODO: Don't do this hgere |           // TODO: Don't do this hgere | ||||||
|           cQiCorpus.updateDb(); |           cQiCorpus.updateDb(); | ||||||
|           this.enableActionElements(); |           this.enableActionElements(); | ||||||
| @@ -117,29 +117,29 @@ class CorpusAnalysisApp { | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   renderGeneralCorpusInfo(corpusData) { |   renderGeneralCorpusInfo(corpusData) { | ||||||
|     document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.lexicon[0].counts.token; |     document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.counts.token; | ||||||
|     document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.lexicon[0].counts.s; |     document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.counts.s; | ||||||
|     document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.word).length; |     document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length; | ||||||
|     document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.lemma).length; |     document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length; | ||||||
|     document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.pos).length; |     document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length; | ||||||
|     document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.lexicon[0].freqs.simple_pos).length; |     document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   renderTextInfoList(corpusData) { |   renderTextInfoList(corpusData) { | ||||||
|     let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list'); |     let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list'); | ||||||
|     let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement); |     let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement); | ||||||
|     let texts = corpusData.text.lexicon; |     let texts = corpusData.s_attrs.text.lexicon; | ||||||
|     let textData = []; |     let textData = []; | ||||||
|     for (let i = 0; i < Object.entries(texts).length; i++) { |     for (let i = 0; i < Object.entries(texts).length; i++) { | ||||||
|       let resource = { |       let resource = { | ||||||
|         title: corpusData.lookups.text[i].title, |         title: corpusData.values.s_attrs.text[i].title, | ||||||
|         publishing_year: corpusData.lookups.text[i].publishing_year, |         publishing_year: corpusData.values.s_attrs.text[i].publishing_year, | ||||||
|         num_tokens: corpusData.text.lexicon[i].counts.token, |         num_tokens: corpusData.s_attrs.text.lexicon[i].counts.token, | ||||||
|         num_sentences: corpusData.text.lexicon[i].counts.s, |         num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s, | ||||||
|         num_unique_words: Object.entries(corpusData.text.lexicon[i].freqs.word).length, |         num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length, | ||||||
|         num_unique_lemmas: Object.entries(corpusData.text.lexicon[i].freqs.lemma).length, |         num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length, | ||||||
|         num_unique_pos: Object.entries(corpusData.text.lexicon[i].freqs.pos).length, |         num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length, | ||||||
|         num_unique_simple_pos: Object.entries(corpusData.text.lexicon[i].freqs.simple_pos).length |         num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length | ||||||
|       }; |       }; | ||||||
|    |    | ||||||
|       textData.push(resource); |       textData.push(resource); | ||||||
| @@ -148,33 +148,29 @@ class CorpusAnalysisApp { | |||||||
|     corpusTextInfoList.add(textData); |     corpusTextInfoList.add(textData); | ||||||
|  |  | ||||||
|     let textCountChipElement = document.querySelector('.text-count-chip'); |     let textCountChipElement = document.querySelector('.text-count-chip'); | ||||||
|     textCountChipElement.innerHTML = `Text count: ${Object.values(corpusData.text.lexicon).length}`; |     textCountChipElement.innerHTML = `Text count: ${corpusData.corpus.counts.text}`; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   renderTextProportionsGraphic(corpusData) { |   renderTextProportionsGraphic(corpusData) { | ||||||
|     let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic'); |     let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic'); | ||||||
|     let texts = Object.entries(corpusData.text.lexicon); |     let texts = Object.entries(corpusData.s_attrs.text.lexicon); | ||||||
|     let graphData = [ |     let graphData = [ | ||||||
|       { |       { | ||||||
|         values: texts.map(text => text[1].counts.token), |         values: texts.map(text => text[1].counts.token), | ||||||
|         labels: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`), |         labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), | ||||||
|         type: 'pie' |         type: 'pie' | ||||||
|       } |       } | ||||||
|     ]; |     ]; | ||||||
|     let graphLayout = { |  | ||||||
|       // height: 600, |  | ||||||
|       // width: 900 |  | ||||||
|     }; |  | ||||||
|     let config = {responsive: true}; |     let config = {responsive: true}; | ||||||
|  |  | ||||||
|     Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config); |     Plotly.newPlot(textProportionsGraphicElement, graphData, config); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   renderFrequenciesGraphic(corpusData) { |   renderFrequenciesGraphic(corpusData) { | ||||||
|     let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); |     let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]'); | ||||||
|     let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); |     let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown"); | ||||||
|     let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); |     let frequenciesGraphicElement = document.querySelector('#frequencies-graphic'); | ||||||
|     let texts = Object.entries(corpusData.text.lexicon); |     let texts = Object.entries(corpusData.s_attrs.text.lexicon); | ||||||
|      |      | ||||||
|      |      | ||||||
|     frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { |     frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => { | ||||||
| @@ -196,13 +192,13 @@ class CorpusAnalysisApp { | |||||||
|  |  | ||||||
|   createFrequenciesGraphData(category, texts, corpusData) { |   createFrequenciesGraphData(category, texts, corpusData) { | ||||||
|     let graphData = []; |     let graphData = []; | ||||||
|     let sortedData = Object.entries(corpusData.corpus.lexicon[0].freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5); |     let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5); | ||||||
|    |    | ||||||
|     for (let item of sortedData) { |     for (let item of sortedData) { | ||||||
|       let data = { |       let data = { | ||||||
|         x: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`), |         x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), | ||||||
|         y: texts.map(text => text[1].freqs[category][item[0]]), |         y: texts.map(text => text[1].freqs[category][item[0]]), | ||||||
|         name: corpusData.lookups[category][item[0]], |         name: corpusData.values.p_attrs[category][item[0]], | ||||||
|         type: 'bar' |         type: 'bar' | ||||||
|       }; |       }; | ||||||
|       graphData.push(data); |       graphData.push(data); | ||||||
| @@ -215,22 +211,20 @@ class CorpusAnalysisApp { | |||||||
|       let boundsGraphicElement = document.querySelector('#bounds-graphic'); |       let boundsGraphicElement = document.querySelector('#bounds-graphic'); | ||||||
|  |  | ||||||
|       let graphData = []; |       let graphData = []; | ||||||
|       let texts = Object.entries(corpusData.text.lexicon); |       let texts = Object.entries(corpusData.s_attrs.text.lexicon); | ||||||
|  |  | ||||||
|       graphData = [{ |       graphData = [{ | ||||||
|         type: 'bar', |         type: 'bar', | ||||||
|         x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]), |         x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]), | ||||||
|         y: texts.map(text => corpusData.lookups.text[text[0]].title), |         y: texts.map(text => corpusData.values.s_attrs.text[text[0]].title), | ||||||
|         base: texts.map(text => text[1].bounds[0]), |         base: texts.map(text => text[1].bounds[0]), | ||||||
|         text: texts.map(text => `${corpusData.lookups.text[text[0]].title} (${corpusData.lookups.text[text[0]].publishing_year})`), |         text: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`), | ||||||
|         orientation: 'h', |         orientation: 'h', | ||||||
|         hovertemplate: '%{base} - %{x} <br>%{y}', |         hovertemplate: '%{base} - %{x} <br>%{y}', | ||||||
|         showlegend: false |         showlegend: false | ||||||
|       }]; |       }]; | ||||||
|  |  | ||||||
|       let graphLayout = { |       let graphLayout = { | ||||||
|         // height: 600, |  | ||||||
|         // width: 2000, |  | ||||||
|         barmode: 'stack', |         barmode: 'stack', | ||||||
|         type: 'bar', |         type: 'bar', | ||||||
|         showgrid: false, |         showgrid: false, | ||||||
|   | |||||||
| @@ -103,6 +103,7 @@ class CorpusTextInfoList extends ResourceList { | |||||||
|       if (sortElement !== clickedSortElement) { |       if (sortElement !== clickedSortElement) { | ||||||
|         sortElement.classList.remove('asc', 'desc'); |         sortElement.classList.remove('asc', 'desc'); | ||||||
|         sortElement.style.color = 'black'; |         sortElement.style.color = 'black'; | ||||||
|  |         sortElement.innerHTML = 'arrow_drop_down'; | ||||||
|       }; |       }; | ||||||
|     }); |     }); | ||||||
|     clickedSortElement.style.color = '#aa9cc9'; |     clickedSortElement.style.color = '#aa9cc9'; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user