mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 12:22:47 +00:00 
			
		
		
		
	Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder
This commit is contained in:
		@@ -1,4 +1,4 @@
 | 
				
			|||||||
FROM python:3.8.10-slim-buster
 | 
					FROM python:3.11.5-slim-bookworm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
 | 
					LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -121,10 +121,7 @@ class CQiNamespace(Namespace):
 | 
				
			|||||||
            socketio.sleep(3)
 | 
					            socketio.sleep(3)
 | 
				
			||||||
            retry_counter -= 1
 | 
					            retry_counter -= 1
 | 
				
			||||||
            db.session.refresh(db_corpus)
 | 
					            db.session.refresh(db_corpus)
 | 
				
			||||||
        cqi_client: CQiClient = CQiClient(
 | 
					        cqi_client: CQiClient = CQiClient(f'cqpserver_{db_corpus_id}')
 | 
				
			||||||
            f'cqpserver_{db_corpus_id}',
 | 
					 | 
				
			||||||
            timeout=float('inf')
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        session['cqi_over_sio'] = {
 | 
					        session['cqi_over_sio'] = {
 | 
				
			||||||
            'cqi_client': cqi_client,
 | 
					            'cqi_client': cqi_client,
 | 
				
			||||||
            'cqi_client_lock': Lock(),
 | 
					            'cqi_client_lock': Lock(),
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,6 +1,7 @@
 | 
				
			|||||||
from collections import Counter
 | 
					from collections import Counter
 | 
				
			||||||
from cqi import CQiClient
 | 
					from cqi import CQiClient
 | 
				
			||||||
from cqi.models.corpora import Corpus as CQiCorpus
 | 
					from cqi.models.corpora import Corpus as CQiCorpus
 | 
				
			||||||
 | 
					from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
 | 
				
			||||||
from cqi.models.attributes import (
 | 
					from cqi.models.attributes import (
 | 
				
			||||||
    PositionalAttribute as CQiPositionalAttribute,
 | 
					    PositionalAttribute as CQiPositionalAttribute,
 | 
				
			||||||
    StructuralAttribute as CQiStructuralAttribute
 | 
					    StructuralAttribute as CQiStructuralAttribute
 | 
				
			||||||
@@ -40,161 +41,132 @@ def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
 | 
				
			|||||||
def ext_corpus_static_data(corpus: str) -> Dict:
 | 
					def ext_corpus_static_data(corpus: str) -> Dict:
 | 
				
			||||||
    db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
 | 
					    db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
 | 
				
			||||||
    db_corpus: Corpus = Corpus.query.get(db_corpus_id)
 | 
					    db_corpus: Corpus = Corpus.query.get(db_corpus_id)
 | 
				
			||||||
    cache_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
 | 
					
 | 
				
			||||||
    if os.path.exists(cache_file_path):
 | 
					    static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
 | 
				
			||||||
        with open(cache_file_path, 'rb') as f:
 | 
					    if os.path.exists(static_data_file_path):
 | 
				
			||||||
 | 
					        with open(static_data_file_path, 'rb') as f:
 | 
				
			||||||
            return f.read()
 | 
					            return f.read()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
 | 
					    cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
 | 
				
			||||||
    cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
 | 
					    cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
 | 
				
			||||||
    cqi_p_attrs: Dict[str, CQiPositionalAttribute] = {
 | 
					    cqi_p_attrs: List[CQiPositionalAttribute] = cqi_corpus.positional_attributes.list()
 | 
				
			||||||
        p_attr.name: p_attr
 | 
					    cqi_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list()
 | 
				
			||||||
        for p_attr in cqi_corpus.positional_attributes.list()
 | 
					
 | 
				
			||||||
    }
 | 
					    static_data = {
 | 
				
			||||||
    cqi_s_attrs: Dict[str, CQiStructuralAttribute] = {
 | 
					 | 
				
			||||||
        s_attr.name: s_attr
 | 
					 | 
				
			||||||
        for s_attr in cqi_corpus.structural_attributes.list()
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    static_corpus_data = {
 | 
					 | 
				
			||||||
        'corpus': {
 | 
					        'corpus': {
 | 
				
			||||||
            'bounds': [0, cqi_corpus.size - 1],
 | 
					            'bounds': [0, cqi_corpus.size - 1],
 | 
				
			||||||
            'counts': {
 | 
					 | 
				
			||||||
                'token': cqi_corpus.size
 | 
					 | 
				
			||||||
            },
 | 
					 | 
				
			||||||
            'freqs': {}
 | 
					            'freqs': {}
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        'p_attrs': {},
 | 
					        'p_attrs': {},
 | 
				
			||||||
        's_attrs': {},
 | 
					        's_attrs': {},
 | 
				
			||||||
        'values': {'p_attrs': {}, 's_attrs': {}}
 | 
					        'values': {'p_attrs': {}, 's_attrs': {}}
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    for p_attr in cqi_p_attrs.values():
 | 
					
 | 
				
			||||||
        static_corpus_data['corpus']['freqs'][p_attr.name] = {}
 | 
					    for p_attr in cqi_p_attrs:
 | 
				
			||||||
        chunk_size = 10000
 | 
					        print(f'corpus.freqs.{p_attr.name}')
 | 
				
			||||||
        p_attr_id_list = list(range(p_attr.lexicon_size))
 | 
					        static_data['corpus']['freqs'][p_attr.name] = []
 | 
				
			||||||
        chunks = [p_attr_id_list[i:i+chunk_size] for i in range(0, len(p_attr_id_list), chunk_size)]
 | 
					        p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
 | 
				
			||||||
 | 
					        static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list))
 | 
				
			||||||
        del p_attr_id_list
 | 
					        del p_attr_id_list
 | 
				
			||||||
        for chunk in chunks:
 | 
					
 | 
				
			||||||
            # print(f'corpus.freqs.{p_attr.name}: {chunk[0]} - {chunk[-1]}')
 | 
					        print(f'p_attrs.{p_attr.name}')
 | 
				
			||||||
            static_corpus_data['corpus']['freqs'][p_attr.name].update(
 | 
					        static_data['p_attrs'][p_attr.name] = []
 | 
				
			||||||
                dict(zip(chunk, p_attr.freqs_by_ids(chunk)))
 | 
					        cpos_list: List[int] = list(range(cqi_corpus.size))
 | 
				
			||||||
            )
 | 
					        static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list))
 | 
				
			||||||
        del chunks
 | 
					 | 
				
			||||||
        static_corpus_data['p_attrs'][p_attr.name] = {}
 | 
					 | 
				
			||||||
        cpos_list = list(range(cqi_corpus.size))
 | 
					 | 
				
			||||||
        chunks = [cpos_list[i:i+chunk_size] for i in range(0, len(cpos_list), chunk_size)]
 | 
					 | 
				
			||||||
        del cpos_list
 | 
					        del cpos_list
 | 
				
			||||||
        for chunk in chunks:
 | 
					
 | 
				
			||||||
            # print(f'p_attrs.{p_attr.name}: {chunk[0]} - {chunk[-1]}')
 | 
					        print(f'values.p_attrs.{p_attr.name}')
 | 
				
			||||||
            static_corpus_data['p_attrs'][p_attr.name].update(
 | 
					        static_data['values']['p_attrs'][p_attr.name] = []
 | 
				
			||||||
                dict(zip(chunk, p_attr.ids_by_cpos(chunk)))
 | 
					        p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
 | 
				
			||||||
            )
 | 
					        static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list))
 | 
				
			||||||
        del chunks
 | 
					 | 
				
			||||||
        static_corpus_data['values']['p_attrs'][p_attr.name] = {}
 | 
					 | 
				
			||||||
        p_attr_id_list = list(range(p_attr.lexicon_size))
 | 
					 | 
				
			||||||
        chunks = [p_attr_id_list[i:i+chunk_size] for i in range(0, len(p_attr_id_list), chunk_size)]
 | 
					 | 
				
			||||||
        del p_attr_id_list
 | 
					        del p_attr_id_list
 | 
				
			||||||
        for chunk in chunks:
 | 
					
 | 
				
			||||||
            # print(f'values.p_attrs.{p_attr.name}: {chunk[0]} - {chunk[-1]}')
 | 
					    for s_attr in cqi_s_attrs:
 | 
				
			||||||
            static_corpus_data['values']['p_attrs'][p_attr.name].update(
 | 
					 | 
				
			||||||
                dict(zip(chunk, p_attr.values_by_ids(chunk)))
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
        del chunks
 | 
					 | 
				
			||||||
    for s_attr in cqi_s_attrs.values():
 | 
					 | 
				
			||||||
        if s_attr.has_values:
 | 
					        if s_attr.has_values:
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size
 | 
					
 | 
				
			||||||
        static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
 | 
					        static_data['s_attrs'][s_attr.name] = {'lexicon': [], 'values': None}
 | 
				
			||||||
        static_corpus_data['values']['s_attrs'][s_attr.name] = {}
 | 
					
 | 
				
			||||||
        ##########################################################################
 | 
					        if s_attr.name in ['s', 'ent']:
 | 
				
			||||||
 | 
					            ##############################################################
 | 
				
			||||||
            # A faster way to get cpos boundaries for smaller s_attrs    #
 | 
					            # A faster way to get cpos boundaries for smaller s_attrs    #
 | 
				
			||||||
        ##########################################################################
 | 
					            # Note: Needs more testing, don't use it in production       #
 | 
				
			||||||
        # if s_attr.name in ['s', 'ent']:
 | 
					            ##############################################################
 | 
				
			||||||
        #     cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
 | 
					            cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
 | 
				
			||||||
        #     cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
 | 
					            cqi_subcorpus: CQiSubcorpus = cqi_corpus.subcorpora.get('Last')
 | 
				
			||||||
        #     first_match = 0
 | 
					            first_match: int = 0
 | 
				
			||||||
        #     last_match = cqi_subcorpus.size - 1
 | 
					            last_match: int = cqi_subcorpus.size - 1
 | 
				
			||||||
        #     match_boundaries = zip(
 | 
					            match_boundaries = zip(
 | 
				
			||||||
        #         range(first_match, last_match + 1),
 | 
					                range(first_match, last_match + 1),
 | 
				
			||||||
        #         cqi_subcorpus.dump(cqi_subcorpus.fields['match'], first_match, last_match),
 | 
					                cqi_subcorpus.dump(
 | 
				
			||||||
        #         cqi_subcorpus.dump(cqi_subcorpus.fields['matchend'], first_match, last_match)
 | 
					                    cqi_subcorpus.fields['match'],
 | 
				
			||||||
        #     )
 | 
					                    first_match,
 | 
				
			||||||
        #     for id, lbound, rbound in match_boundaries:
 | 
					                    last_match
 | 
				
			||||||
        #         static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
 | 
					                ),
 | 
				
			||||||
        #         static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
 | 
					                cqi_subcorpus.dump(
 | 
				
			||||||
        #         static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
 | 
					                    cqi_subcorpus.fields['matchend'],
 | 
				
			||||||
        #         static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
 | 
					                    first_match,
 | 
				
			||||||
        #     cqi_subcorpus.drop()
 | 
					                    last_match
 | 
				
			||||||
        for id in range(0, s_attr.size):
 | 
					                )
 | 
				
			||||||
            # print(f's_attrs.{s_attr.name}.lexicon.{id}')
 | 
					            )
 | 
				
			||||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {
 | 
					            cqi_subcorpus.drop()
 | 
				
			||||||
                'bounds': None,
 | 
					            del cqi_subcorpus, first_match, last_match
 | 
				
			||||||
                'counts': None,
 | 
					            for id, lbound, rbound in match_boundaries:
 | 
				
			||||||
                'freqs': None
 | 
					                static_data['s_attrs'][s_attr.name]['lexicon'].append({})
 | 
				
			||||||
            }
 | 
					                print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
 | 
				
			||||||
 | 
					                static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
 | 
				
			||||||
 | 
					            del match_boundaries
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if s_attr.name != 'text':
 | 
					        if s_attr.name != 'text':
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for id in range(0, s_attr.size):
 | 
				
			||||||
 | 
					            static_data['s_attrs'][s_attr.name]['lexicon'].append({})
 | 
				
			||||||
 | 
					            # This is a very slow operation, thats why we only use it for
 | 
				
			||||||
 | 
					            # the text attribute
 | 
				
			||||||
            lbound, rbound = s_attr.cpos_by_id(id)
 | 
					            lbound, rbound = s_attr.cpos_by_id(id)
 | 
				
			||||||
            # print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
 | 
					            print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
 | 
				
			||||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
 | 
					            static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
 | 
				
			||||||
            # print(f's_attrs.{s_attr.name}.lexicon.{id}.counts')
 | 
					            static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
 | 
				
			||||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
 | 
					            cpos_list: List[int] = list(range(lbound, rbound + 1))
 | 
				
			||||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
 | 
					            for p_attr in cqi_p_attrs:
 | 
				
			||||||
            cpos_list = list(range(lbound, rbound + 1))
 | 
					                p_attr_ids: List[int] = []
 | 
				
			||||||
            chunks = [cpos_list[i:i+chunk_size] for i in range(0, len(cpos_list), chunk_size)]
 | 
					                p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list))
 | 
				
			||||||
            del cpos_list
 | 
					                print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
 | 
				
			||||||
            ent_ids = set()
 | 
					                static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
 | 
				
			||||||
            for chunk in chunks:
 | 
					 | 
				
			||||||
                # print(f'Gather ent_ids from cpos: {chunk[0]} - {chunk[-1]}')
 | 
					 | 
				
			||||||
                ent_ids.update({x for x in cqi_s_attrs['ent'].ids_by_cpos(chunk) if x != -1})
 | 
					 | 
				
			||||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len(ent_ids)
 | 
					 | 
				
			||||||
            del ent_ids
 | 
					 | 
				
			||||||
            s_ids = set()
 | 
					 | 
				
			||||||
            for chunk in chunks:
 | 
					 | 
				
			||||||
                # print(f'Gather s_ids from cpos: {chunk[0]} - {chunk[-1]}')
 | 
					 | 
				
			||||||
                s_ids.update({x for x in cqi_s_attrs['s'].ids_by_cpos(chunk) if x != -1})
 | 
					 | 
				
			||||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len(s_ids)
 | 
					 | 
				
			||||||
            del s_ids
 | 
					 | 
				
			||||||
            # print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs')
 | 
					 | 
				
			||||||
            static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
 | 
					 | 
				
			||||||
            for p_attr in cqi_p_attrs.values():
 | 
					 | 
				
			||||||
                p_attr_ids = []
 | 
					 | 
				
			||||||
                for chunk in chunks:
 | 
					 | 
				
			||||||
                    # print(f'Gather p_attr_ids from cpos: {chunk[0]} - {chunk[-1]}')
 | 
					 | 
				
			||||||
                    p_attr_ids.extend(p_attr.ids_by_cpos(chunk))
 | 
					 | 
				
			||||||
                static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
 | 
					 | 
				
			||||||
                del p_attr_ids
 | 
					                del p_attr_ids
 | 
				
			||||||
            del chunks
 | 
					            del cpos_list
 | 
				
			||||||
        sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
 | 
					
 | 
				
			||||||
        s_attr_value_names: List[str] = [
 | 
					        sub_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
 | 
				
			||||||
 | 
					        print(f's_attrs.{s_attr.name}.values')
 | 
				
			||||||
 | 
					        static_data['s_attrs'][s_attr.name]['values'] = [
 | 
				
			||||||
            sub_s_attr.name[(len(s_attr.name) + 1):]
 | 
					            sub_s_attr.name[(len(s_attr.name) + 1):]
 | 
				
			||||||
            for sub_s_attr in sub_s_attrs
 | 
					            for sub_s_attr in sub_s_attrs
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
        s_attr_id_list = list(range(s_attr.size))
 | 
					        s_attr_id_list: List[int] = list(range(s_attr.size))
 | 
				
			||||||
        chunks = [s_attr_id_list[i:i+chunk_size] for i in range(0, len(s_attr_id_list), chunk_size)]
 | 
					        sub_s_attr_values: List[str] = []
 | 
				
			||||||
        del s_attr_id_list
 | 
					 | 
				
			||||||
        sub_s_attr_values = []
 | 
					 | 
				
			||||||
        for sub_s_attr in sub_s_attrs:
 | 
					        for sub_s_attr in sub_s_attrs:
 | 
				
			||||||
            tmp = []
 | 
					            tmp = []
 | 
				
			||||||
            for chunk in chunks:
 | 
					            tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list))
 | 
				
			||||||
                tmp.extend(sub_s_attr.values_by_ids(chunk))
 | 
					 | 
				
			||||||
            sub_s_attr_values.append(tmp)
 | 
					            sub_s_attr_values.append(tmp)
 | 
				
			||||||
            del tmp
 | 
					            del tmp
 | 
				
			||||||
        del chunks
 | 
					        del s_attr_id_list
 | 
				
			||||||
        # print(f's_attrs.{s_attr.name}.values')
 | 
					        print(f'values.s_attrs.{s_attr.name}')
 | 
				
			||||||
        static_corpus_data['s_attrs'][s_attr.name]['values'] = s_attr_value_names
 | 
					        static_data['values']['s_attrs'][s_attr.name] = [
 | 
				
			||||||
        # print(f'values.s_attrs.{s_attr.name}')
 | 
					            {
 | 
				
			||||||
        static_corpus_data['values']['s_attrs'][s_attr.name] = {
 | 
					                s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id]
 | 
				
			||||||
            s_attr_id: {
 | 
					 | 
				
			||||||
                s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
 | 
					 | 
				
			||||||
                for s_attr_value_name_idx, s_attr_value_name in enumerate(
 | 
					                for s_attr_value_name_idx, s_attr_value_name in enumerate(
 | 
				
			||||||
                    static_corpus_data['s_attrs'][s_attr.name]['values']
 | 
					                    static_data['s_attrs'][s_attr.name]['values']
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            } for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
 | 
					            } for s_attr_id in range(0, s_attr.size)
 | 
				
			||||||
        }
 | 
					        ]
 | 
				
			||||||
        del sub_s_attr_values
 | 
					        del sub_s_attr_values
 | 
				
			||||||
    with gzip.open(cache_file_path, 'wt') as f:
 | 
					    print('Saving static data to file')
 | 
				
			||||||
        json.dump(static_corpus_data, f)
 | 
					    with gzip.open(static_data_file_path, 'wt') as f:
 | 
				
			||||||
    del static_corpus_data
 | 
					        json.dump(static_data, f)
 | 
				
			||||||
    with open(cache_file_path, 'rb') as f:
 | 
					    del static_data
 | 
				
			||||||
 | 
					    print('Sending static data to client')
 | 
				
			||||||
 | 
					    with open(static_data_file_path, 'rb') as f:
 | 
				
			||||||
        return f.read()
 | 
					        return f.read()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -93,8 +93,8 @@ class CorpusAnalysisStaticVisualization {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  renderGeneralCorpusInfo() {
 | 
					  renderGeneralCorpusInfo() {
 | 
				
			||||||
    let corpusData = this.data.corpus.o.staticData;
 | 
					    let corpusData = this.data.corpus.o.staticData;
 | 
				
			||||||
    document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.counts.token;
 | 
					    document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.bounds[1] - corpusData.corpus.bounds[0];
 | 
				
			||||||
    document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.counts.s;
 | 
					    document.querySelector('.corpus-num-s').innerHTML = corpusData.s_attrs.s.lexicon.length;
 | 
				
			||||||
    document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
 | 
					    document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
 | 
				
			||||||
    document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
 | 
					    document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
 | 
				
			||||||
    document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
 | 
					    document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
 | 
				
			||||||
@@ -111,8 +111,11 @@ class CorpusAnalysisStaticVisualization {
 | 
				
			|||||||
      let resource = {
 | 
					      let resource = {
 | 
				
			||||||
        title: corpusData.values.s_attrs.text[i].title,
 | 
					        title: corpusData.values.s_attrs.text[i].title,
 | 
				
			||||||
        publishing_year: corpusData.values.s_attrs.text[i].publishing_year,
 | 
					        publishing_year: corpusData.values.s_attrs.text[i].publishing_year,
 | 
				
			||||||
        num_tokens: corpusData.s_attrs.text.lexicon[i].counts.token,
 | 
					        // num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s,
 | 
				
			||||||
        num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s,
 | 
					        num_tokens: corpusData.s_attrs.text.lexicon[i].bounds[1] - corpusData.s_attrs.text.lexicon[i].bounds[0],
 | 
				
			||||||
 | 
					        num_sentences: corpusData.s_attrs.s.lexicon.filter((s) => {
 | 
				
			||||||
 | 
					          return s.bounds[0] >= corpusData.s_attrs.text.lexicon[i].bounds[0] && s.bounds[1] <= corpusData.s_attrs.text.lexicon[i].bounds[1];
 | 
				
			||||||
 | 
					        }).length,
 | 
				
			||||||
        num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length,
 | 
					        num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length,
 | 
				
			||||||
        num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length,
 | 
					        num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length,
 | 
				
			||||||
        num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length,
 | 
					        num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length,
 | 
				
			||||||
@@ -125,7 +128,7 @@ class CorpusAnalysisStaticVisualization {
 | 
				
			|||||||
    corpusTextInfoList.add(textData);
 | 
					    corpusTextInfoList.add(textData);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let textCountChipElement = document.querySelector('.text-count-chip');
 | 
					    let textCountChipElement = document.querySelector('.text-count-chip');
 | 
				
			||||||
    textCountChipElement.innerHTML = `Text count: ${corpusData.corpus.counts.text}`;
 | 
					    textCountChipElement.innerHTML = `Text count: ${corpusData.s_attrs.text.lexicon.length}`;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  renderTextProportionsGraphic() {
 | 
					  renderTextProportionsGraphic() {
 | 
				
			||||||
@@ -198,7 +201,7 @@ class CorpusAnalysisStaticVisualization {
 | 
				
			|||||||
      default:
 | 
					      default:
 | 
				
			||||||
        graphData = [
 | 
					        graphData = [
 | 
				
			||||||
          {
 | 
					          {
 | 
				
			||||||
            values: texts.map(text => text[1].counts.token),
 | 
					            values: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
 | 
				
			||||||
            labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
 | 
					            labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
 | 
				
			||||||
            type: graphtype
 | 
					            type: graphtype
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
apifairy
 | 
					apifairy
 | 
				
			||||||
cqi>=0.1.6
 | 
					cqi>=0.1.7
 | 
				
			||||||
dnspython==2.2.1
 | 
					dnspython==2.2.1
 | 
				
			||||||
docker
 | 
					docker
 | 
				
			||||||
eventlet
 | 
					eventlet
 | 
				
			||||||
@@ -11,7 +11,8 @@ Flask-Hashids>=1.0.1
 | 
				
			|||||||
Flask-HTTPAuth
 | 
					Flask-HTTPAuth
 | 
				
			||||||
Flask-Login
 | 
					Flask-Login
 | 
				
			||||||
Flask-Mail
 | 
					Flask-Mail
 | 
				
			||||||
flask-marshmallow==0.14.0 
 | 
					Flask-Marshmallow==0.14.0
 | 
				
			||||||
 | 
					Flask-Menu==0.7.2
 | 
				
			||||||
Flask-Migrate
 | 
					Flask-Migrate
 | 
				
			||||||
Flask-Paranoid
 | 
					Flask-Paranoid
 | 
				
			||||||
Flask-SocketIO
 | 
					Flask-SocketIO
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user