diff --git a/app/corpora/cqi/client.py b/app/corpora/cqi/client.py index 5cc867c1..1f965e15 100644 --- a/app/corpora/cqi/client.py +++ b/app/corpora/cqi/client.py @@ -32,12 +32,14 @@ class CQiClient: def connect(self, username='anonymous', password=''): status = self.api.ctrl_connect(username, password) - self.corpora = CorpusCollection(self) return status def disconnect(self): - del self.corpora return self.api.ctrl_bye() def ping(self): return self.api.ctrl_ping() + + @property + def corpora(self): + return CorpusCollection(client=self) diff --git a/app/corpora/cqi/models/attributes.py b/app/corpora/cqi/models/attributes.py index 20dd359b..ea92d5af 100644 --- a/app/corpora/cqi/models/attributes.py +++ b/app/corpora/cqi/models/attributes.py @@ -1,139 +1,119 @@ -class AttributeCollection: - def __init__(self, client, corpus): - self.client = client - self.corpus = corpus - self.alignment = AlignmentAttributeCollection(client, corpus) - self.positional = PositionalAttributeCollection(client, corpus) - self.structural = StructuralAttributeCollection(client, corpus) +from .ressource import Collection, Model -class Attribute: +class Attribute(Model): """ This is a class representing an attribute. Attributes denote the general category of information. A specific occurence is identified by an Id. - - Attributes: - client (CQiClient): A connected client pointing at the server that this - object is on. - corpus (Corpus): The corpus, this attribute belongs to. - name (str): The name of the Attribute. - size (int): The number of occurences of this attribute within the corpus. """ - def __init__(self, client, corpus, name): - self.client = client - self.corpus = corpus - self.name = name - self._name = '{}.{}'.format(corpus.name, name) - self.size = client.api.cl_attribute_size(self._name) + id_attribute = 'api_name' + + @staticmethod + def _attrs(client, corpus, name): + api_name = '{}.{}'.format(corpus.attrs['api_name'], name) + return {'api_name': api_name, + 'name': name, + 'size': client.api.cl_attribute_size(api_name)} def drop(self): - return self.client.api.cl_drop_attribute(self._name) + return self.client.api.cl_drop_attribute(self.attrs['api_name']) -class AlignmentAttributeCollection: - def __init__(self, client, corpus): - self.client = client +class AttributeCollection(Collection): + model = Attribute + + def __init__(self, client=None, corpus=None): + super(AttributeCollection, self).__init__(client=client) self.corpus = corpus - def get(self, name): - return AlignmentAttribute(self.client, self.corpus, name) + def get(self, attribute_name): + return self.prepare_model(self.model._attrs(self.client, self.corpus, + attribute_name)) def list(self): - return [AlignmentAttribute(self.client, self.corpus, attr) for attr in - self.client.api.corpus_alignment_attributes(self.corpus.name)] + raise NotImplementedError class AlignmentAttribute(Attribute): def cpos_by_ids(self, id_list): - return self.client.api.cl_alg2cpos(self._name, id_list) + return self.client.api.cl_alg2cpos(self.attrs['api_name'], id_list) def ids_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2alg(self._name, cpos_list) + return self.client.api.cl_cpos2alg(self.attrs['api_name'], cpos_list) -class PositionalAttributeCollection: - def __init__(self, client, corpus): - self.client = client - self.corpus = corpus - - def get(self, name): - return PositionalAttribute(self.client, self.corpus, name) +class AlignmentAttributeCollection(AttributeCollection): + model = AlignmentAttribute def list(self): - return [PositionalAttribute(self.client, self.corpus, attr) for attr in - self.client.api.corpus_positional_attributes(self.corpus.name)] + return [self.prepare_model(self.model._attrs(self.client, self.corpus, attr)) # noqa + for attr in self.client.api.corpus_alignment_attributes(self.corpus.attrs['api_name'])] # noqa class PositionalAttribute(Attribute): - def __init__(self, client, corpus, name): - super(PositionalAttribute, self).__init__(client, corpus, name) - self.lexicon_size = client.api.cl_lexicon_size(self._name) + @staticmethod + def _attrs(client, corpus, name): + attrs = super(PositionalAttribute, PositionalAttribute)._attrs(client, corpus, name) # noqa + attrs['lexicon_size'] = client.api.cl_lexicon_size(attrs['api_name']) + return attrs def cpos_by_id(self, id): - return self.client.api.cl_id2cpos(self._name, id) + return self.client.api.cl_id2cpos(self.attrs['api_name'], id) def cpos_by_ids(self, id_list): - return self.client.api.cl_idlist2cpos(self._name, id_list) + return self.client.api.cl_idlist2cpos(self.attrs['api_name'], id_list) def freqs_by_ids(self, id_list): - return self.client.api.cl_id2freq(self._name, id_list) + return self.client.api.cl_id2freq(self.attrs['api_name'], id_list) def ids_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2id(self._name, cpos_list) + return self.client.api.cl_cpos2id(self.attrs['api_name'], cpos_list) def ids_by_regex(self, regex): - return self.client.api.cl_regex2id(self._name, regex) + return self.client.api.cl_regex2id(self.attrs['api_name'], regex) def ids_by_values(self, value_list): - return self.client.api.cl_str2id(self._name, value_list) + return self.client.api.cl_str2id(self.attrs['api_name'], value_list) def values_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2str(self._name, cpos_list) + return self.client.api.cl_cpos2str(self.attrs['api_name'], cpos_list) def values_by_ids(self, id_list): - return self.client.api.cl_id2str(self._name, id_list) + return self.client.api.cl_id2str(self.attrs['api_name'], id_list) -class StructuralAttributeCollection: - def __init__(self, client, corpus): - self.client = client - self.corpus = corpus +class PositionalAttributeCollection(AttributeCollection): + model = PositionalAttribute - def get(self, name): - return StructuralAttribute(self.client, self.corpus, name) - - def list(self, filters={}): - attrs = [StructuralAttribute(self.client, self.corpus, attr) for attr - in self.client.api.corpus_structural_attributes( - self.corpus.name)] - for k, v in filters.items(): - if k == 'part_of': - attrs = list(filter(lambda x: x.name.startswith(v.name + '_'), - attrs)) - return attrs + def list(self): + return [self.prepare_model(self.model._attrs(self.client, self.corpus, attr)) # noqa + for attr in self.client.api.corpus_positional_attributes(self.corpus.attrs['api_name'])] # noqa class StructuralAttribute(Attribute): - def __init__(self, client, corpus, name): - super(StructuralAttribute, self).__init__(client, corpus, name) - self.has_values = \ - client.api.corpus_structural_attribute_has_values(self._name) + @staticmethod + def _attrs(client, corpus, name): + attrs = super(StructuralAttribute, StructuralAttribute)._attrs(client, corpus, name) # noqa + attrs['has_values'] = client.api.corpus_structural_attribute_has_values(attrs['api_name']) # noqa + return attrs def cpos_by_id(self, id): - return self.client.api.cl_struc2cpos(self._name, id) + return self.client.api.cl_struc2cpos(self.attrs['api_name'], id) def ids_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2struc(self._name, cpos_list) + return self.client.api.cl_cpos2struc(self.attrs['api_name'], cpos_list) def lbound_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2lbound(self._name, cpos_list) + return self.client.api.cl_cpos2lbound(self.attrs['api_name'], + cpos_list) def rbound_by_cpos(self, cpos_list): - return self.client.api.cl_cpos2rbound(self._name, cpos_list) + return self.client.api.cl_cpos2rbound(self.attrs['api_name'], + cpos_list) def values_by_ids(self, id_list): - return self.client.api.cl_struc2str(self._name, id_list) + return self.client.api.cl_struc2str(self.attrs['api_name'], id_list) def export(self, first_cpos, last_cpos, context=0, expand_lists=False): first_id, last_id = self.ids_by_cpos([first_cpos, last_cpos]) @@ -150,9 +130,10 @@ class StructuralAttribute(Attribute): else: cpos_list_lbound = first_cpos rc_rbound = \ - self.cpos_by_id(min((last_id + context), (self.size - 1)))[1] + self.cpos_by_id(min((last_id + context), (self.attrs['size'] - 1)))[1] if rc_rbound != last_cpos: - rc_lbound = min((last_cpos + 1), (self.corpus.size - 1)) + rc_lbound = min((last_cpos + 1), + (self.collection.corpus.attrs['size'] - 1)) rc = (rc_lbound, rc_rbound) cpos_list_rbound = rc_rbound else: @@ -164,5 +145,17 @@ class StructuralAttribute(Attribute): 'rc': list(range(rc[0], (rc[1] + 1)))} else: match = {'lc': lc, 'c': c, 'rc': rc} - lookups = self.corpus.lookups_by_cpos(cpos_list) + lookups = self.collection.corpus.lookups_by_cpos(cpos_list) return {'match': match, **lookups} + + +class StructuralAttributeCollection(AttributeCollection): + model = StructuralAttribute + + def list(self, filters={}): + attrs = [self.prepare_model(self.model._attrs(self.client, self.corpus, attr)) # noqa + for attr in self.client.api.corpus_structural_attributes(self.corpus.attrs['api_name'])] # noqa + for k, v in filters.items(): + if k == 'part_of': + attrs = list(filter(lambda x: x.attrs['name'].startswith(v.attrs['name'] + '_'), attrs)) # noqa + return attrs diff --git a/app/corpora/cqi/models/corpora.py b/app/corpora/cqi/models/corpora.py index e81056ab..a12d9b4f 100644 --- a/app/corpora/cqi/models/corpora.py +++ b/app/corpora/cqi/models/corpora.py @@ -1,30 +1,24 @@ -from .attributes import AttributeCollection +from .attributes import (AlignmentAttributeCollection, + PositionalAttributeCollection, + StructuralAttributeCollection) +from .ressource import Collection, Model from .subcorpora import SubcorpusCollection -class CorpusCollection: - def __init__(self, client): - self.client = client +class Corpus(Model): + id_attribute = 'api_name' - def get(self, name): - return Corpus(self.client, name) - - def list(self): - return [Corpus(self.client, corpus) for corpus in - self.client.api.corpus_list_coprora()] - - -class Corpus: - def __init__(self, client, name): - self.client = client - self.name = name - self.size = client.api.cl_attribute_size('{}.word'.format(name)) - # self.info = client.api.corpus_info(name) - self.charset = client.api.corpus_charset(name) - # self.full_name = client.api.corpus_full_name(name) - self.properties = client.api.corpus_properties(name) - self.attributes = AttributeCollection(client, self) - self.subcorpora = SubcorpusCollection(client, self) + @staticmethod + def _attrs(client, name): + api_name = name + return {'api_name': api_name, + 'name': name, + 'size': client.api.cl_attribute_size( + '{}.word'.format(api_name)), + # 'info': client.api.corpus_info(name), + 'charset': client.api.corpus_charset(api_name), + # 'full_name' = client.api.corpus_full_name(name), + 'properties': client.api.corpus_properties(api_name)} def lookups_by_cpos(self, cpos_list): cpos_list = list(set(cpos_list)) @@ -33,35 +27,64 @@ class Corpus: lookups['cpos_lookup'] = {} for cpos in cpos_list: lookups['cpos_lookup'][cpos] = {} - for attr in self.attributes.positional.list(): + for attr in self.positional_attributes.list(): cpos_attr_values = attr.values_by_cpos(cpos_list) for i, cpos in enumerate(cpos_list): - lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i] - for attr in self.attributes.structural.list(): - if attr.has_values: + lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_values[i] + for attr in self.structural_attributes.list(): + if attr.attrs['has_values']: continue cpos_attr_ids = attr.ids_by_cpos(cpos_list) for i, cpos in enumerate(cpos_list): if cpos_attr_ids[i] != -1: - lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i] + lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i] occured_attr_ids = list(filter(lambda x: x != -1, set(cpos_attr_ids))) if not occured_attr_ids: continue subattrs = \ - self.attributes.structural.list(filters={'part_of': attr}) + self.structural_attributes.list(filters={'part_of': attr}) if not subattrs: continue - lookup_name = '{}_lookup'.format(attr.name) + lookup_name = '{}_lookup'.format(attr.attrs['name']) lookups[lookup_name] = {} for attr_id in occured_attr_ids: lookups[lookup_name][attr_id] = {} for subattr in subattrs: subattr_values = subattr.values_by_ids(occured_attr_ids) for i, subattr_value in enumerate(subattr_values): - lookups[lookup_name][occured_attr_ids[i]][subattr.name] = \ + subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] + lookups[lookup_name][occured_attr_ids[i]][subattr_name] = \ subattr_value return lookups def query(self, query, subcorpus_name='Results'): - return self.client.api.cqp_query(self.name, subcorpus_name, query) + return self.client.api.cqp_query(self.attrs['api_name'], + subcorpus_name, query) + + @property + def alignment_attributes(self): + return AlignmentAttributeCollection(client=self.client, corpus=self) + + @property + def positional_attributes(self): + return PositionalAttributeCollection(client=self.client, corpus=self) + + @property + def structural_attributes(self): + return StructuralAttributeCollection(client=self.client, corpus=self) + + @property + def subcorpora(self): + return SubcorpusCollection(client=self.client, corpus=self) + + +class CorpusCollection(Collection): + model = Corpus + + def get(self, corpus_name): + return self.prepare_model(self.model._attrs(self.client, corpus_name)) + + def list(self): + return [self.prepare_model(self.model._attrs(self.client, corpus)) + for corpus in self.client.api.corpus_list_coprora()] diff --git a/app/corpora/cqi/models/ressource.py b/app/corpora/cqi/models/ressource.py new file mode 100644 index 00000000..449bc93b --- /dev/null +++ b/app/corpora/cqi/models/ressource.py @@ -0,0 +1,78 @@ +class Model: + """ + A base class for representing a single object on the server. + """ + id_attribute = 'Id' + + def __init__(self, attrs=None, client=None, collection=None): + #: A client pointing at the server that this object is on. + self.client = client + + #: The collection that this model is part of. + self.collection = collection + + #: The raw representation of this object from the API + self.attrs = attrs or {} + + @staticmethod + def _attrs(client, key): + raise NotImplementedError + + def __repr__(self): + return "<{}: {}>".format(self.__class__.__name__, self.id) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.id == other.id + + def __hash__(self): + return hash("{}:{}".format(self.__class__.__name__, self.id)) + + @property + def id(self): + """ + The ID of the object. + """ + return self.attrs.get(self.id_attribute) + + def reload(self): + """ + Load this object from the server again and update ``attrs`` with the + new data. + """ + new_model = self.collection.get(self.id) + self.attrs = new_model.attrs + + +class Collection: + """ + A base class for representing all objects of a particular type on the + server. + """ + + #: The type of object this collection represents, set by subclasses + model = None + + def __init__(self, client=None): + #: The client pointing at the server that this collection of objects + #: is on. + self.client = client + + def list(self): + raise NotImplementedError + + def get(self, key): + raise NotImplementedError + + def prepare_model(self, attrs): + """ + Create a model from a set of attributes. + """ + if isinstance(attrs, Model): + attrs.client = self.client + attrs.collection = self + return attrs + elif isinstance(attrs, dict): + return self.model(attrs=attrs, client=self.client, collection=self) + else: + raise Exception("Can't create {} from {}".format( + self.model.__name__, attrs)) diff --git a/app/corpora/cqi/models/subcorpora.py b/app/corpora/cqi/models/subcorpora.py index 0e774db0..38d25435 100644 --- a/app/corpora/cqi/models/subcorpora.py +++ b/app/corpora/cqi/models/subcorpora.py @@ -1,54 +1,45 @@ +from .ressource import Collection, Model from ..api.specification import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH, CONST_FIELD_MATCHEND, CONST_FIELD_TARGET) -class SubcorpusCollection: - def __init__(self, client, corpus): - self.client = client - self.corpus = corpus +class Subcorpus(Model): + id_attribute = 'api_name' - def get(self, name): - return Subcorpus(self.client, self.corpus, name) - - def list(self): - return [Subcorpus(self.client, self.corpus, subcorpus) for subcorpus in - self.client.api.cqp_list_subcorpora(self.corpus.name)] - - -class Subcorpus: - def __init__(self, client, corpus, name): - self.client = client - self.corpus = corpus - self.name = name - self._name = '{}:{}'.format(corpus.name, name) - self.fields = {} - if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_MATCH): - self.fields['match'] = CONST_FIELD_MATCH - if client.api.cqp_subcorpus_has_field(self._name, + @staticmethod + def _attrs(client, corpus, name): + api_name = '{}:{}'.format(corpus.attrs['api_name'], name) + fields = {} + if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_MATCH): + fields['match'] = CONST_FIELD_MATCH + if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_MATCHEND): - self.fields['matchend'] = CONST_FIELD_MATCHEND - if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_TARGET): - self.fields['target'] = CONST_FIELD_TARGET - if client.api.cqp_subcorpus_has_field(self._name, CONST_FIELD_KEYWORD): - self.fields['keyword'] = CONST_FIELD_KEYWORD - self.size = client.api.cqp_subcorpus_size(self._name) + fields['matchend'] = CONST_FIELD_MATCHEND + if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_TARGET): + fields['target'] = CONST_FIELD_TARGET + if client.api.cqp_subcorpus_has_field(api_name, CONST_FIELD_KEYWORD): + fields['keyword'] = CONST_FIELD_KEYWORD + return {'api_name': api_name, + 'name': name, + 'fields': fields, + 'size': client.api.cqp_subcorpus_size(api_name)} def drop(self): - return self.client.api.cqp_drop_subcorpus(self._name) + return self.client.api.cqp_drop_subcorpus(self.attrs['api_name']) def dump(self, field, first, last): - return self.client.api.cqp_dump_subcorpus(self._name, field, first, - last) + return self.client.api.cqp_dump_subcorpus(self.attrs['api_name'], + field, first, last) def export(self, context=25, cutoff=float('inf'), expand_lists=False, offset=0): - if self.size == 0: + if self.attrs['size'] == 0: return {"matches": []} first_match = max(0, offset) - last_match = min((offset + cutoff - 1), (self.size - 1)) + last_match = min((offset + cutoff - 1), (self.attrs['size'] - 1)) match_boundaries = \ - zip(self.dump(self.fields['match'], first_match, last_match), - self.dump(self.fields['matchend'], first_match, last_match)) + zip(self.dump(self.attrs['fields']['match'], first_match, last_match), # noqa + self.dump(self.attrs['fields']['matchend'], first_match, last_match)) # noqa cpos_list = [] matches = [] for match_start, match_end in match_boundaries: @@ -64,10 +55,11 @@ class Subcorpus: cpos_list_lbound = lc_lbound else: cpos_list_lbound = match_start - rc_lbound = min((match_end + 1), (self.corpus.size - 1)) + rc_lbound = min((match_end + 1), + (self.collection.corpus.attrs['size'] - 1)) if rc_lbound != match_end: rc_rbound = min((match_end + 1 + context), - (self.corpus.size - 1)) + (self.collection.corpus.attrs['size'] - 1)) rc = (rc_lbound, rc_rbound) cpos_list_rbound = rc_rbound else: @@ -81,14 +73,30 @@ class Subcorpus: else: match = {'lc': lc, 'c': c, 'rc': rc} matches.append(match) - lookups = self.corpus.lookups_by_cpos(cpos_list) + lookups = self.collection.corpus.lookups_by_cpos(cpos_list) return {'matches': matches, **lookups} def fdist_1(self, cutoff, field, attribute): - return self.client.api.cqp_fdist_1(self._name, cutoff, + return self.client.api.cqp_fdist_1(self.attrs['api_name'], cutoff, field, attribute._name) def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2): - return self.client.api.cqp_fdist_2(self._name, cutoff, + return self.client.api.cqp_fdist_2(self.attrs['api_name'], cutoff, field_1, attribute_1._name, field_2, attribute_2._name) + + +class SubcorpusCollection(Collection): + model = Subcorpus + + def __init__(self, client=None, corpus=None): + super(SubcorpusCollection, self).__init__(client=client) + self.corpus = corpus + + def get(self, subcorpus_name): + return self.prepare_model(self.model._attrs(self.client, self.corpus, + subcorpus_name)) + + def list(self): + return [self.prepare_model(self.model._attrs(self.client, self.corpus, subcorpus)) # noqa + for subcorpus in self.client.api.cqp_list_subcorpora(self.corpus.attrs['api_name'])] # noqa diff --git a/app/corpora/events.py b/app/corpora/events.py index 73d189f9..cf9c791c 100644 --- a/app/corpora/events.py +++ b/app/corpora/events.py @@ -67,23 +67,23 @@ def corpus_analysis_query(query): socketio.emit('corpus_analysis_query', response, room=request.sid) return response = {'code': 200, 'desc': None, 'msg': 'OK', - 'payload': {**query_status, 'match_count': results.size}} + 'payload': {**query_status, 'match_count': results.attrs['size']}} socketio.emit('corpus_analysis_query', response, room=request.sid) chunk_size = 100 chunk_start = 0 context = 100 progress = 0 client.status = 'running' - while chunk_start <= results.size: + while chunk_start <= results.attrs['size']: if client.status == 'abort': break chunk = results.export(context=context, cutoff=chunk_size, expand_lists=False, offset=chunk_start) chunk['cpos_ranges'] = True - if (results.size == 0): + if (results.attrs['size'] == 0): progress = 100 else: - progress = ((chunk_start + chunk_size) / results.size) * 100 + progress = ((chunk_start + chunk_size) / results.attrs['size']) * 100 progress = min(100, int(math.ceil(progress))) response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': {'chunk': chunk, 'progress': progress}} @@ -104,12 +104,12 @@ def corpus_analysis_inspect_match(payload): return # Get more context for given match CPOS corpus = client.corpora.get('CORPUS') - s = corpus.attributes.structural.get('s') + s = corpus.structural_attributes.get('s') match_context = s.export(payload['first_cpos'], payload['last_cpos'], context=3, expand_lists=False) match_context['cpos_ranges'] = True - socketio.emit('match_context', - {'payload': match_context}, room=request.sid) + socketio.emit('match_context', {'payload': match_context}, + room=request.sid) def corpus_analysis_session_handler(app, corpus_id, user_id, session_id): diff --git a/app/static/js/nopaque.lists.js b/app/static/js/nopaque.lists.js index 49772278..b5fcb473 100644 --- a/app/static/js/nopaque.lists.js +++ b/app/static/js/nopaque.lists.js @@ -381,7 +381,7 @@ class ResultsList extends List { token = chunk["cpos_lookup"][cpos]; hitCellElement.insertAdjacentHTML("beforeend", `${token["word"]} `); // get text titles of every hit cpos token - textTitles.add(chunk["text_lookup"][token["text"]]["text_title"]); + textTitles.add(chunk["text_lookup"][token["text"]]["title"]); // add button to trigger more context to every match td var inspectBtn = document.createElement("a"); inspectBtn.setAttribute("class", "btn-floating btn-flat waves-effect waves-light grey right inspect disabled");