mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 04:12:45 +00:00 
			
		
		
		
	Update cqi package
This commit is contained in:
		@@ -105,9 +105,8 @@ class StructuralAttributeCollection:
 | 
			
		||||
class StructuralAttribute(Attribute):
 | 
			
		||||
    def __init__(self, client, corpus, name):
 | 
			
		||||
        super(StructuralAttribute, self).__init__(client, corpus, name)
 | 
			
		||||
        self.has_values = client.api.corpus_structural_attribute_has_values(
 | 
			
		||||
            self._name
 | 
			
		||||
        )
 | 
			
		||||
        self.has_values = \
 | 
			
		||||
            client.api.corpus_structural_attribute_has_values(self._name)
 | 
			
		||||
 | 
			
		||||
    def cpos_by_id(self, id):
 | 
			
		||||
        return self.client.api.cl_struc2cpos(self._name, id)
 | 
			
		||||
@@ -124,16 +123,34 @@ class StructuralAttribute(Attribute):
 | 
			
		||||
    def values_by_ids(self, id_list):
 | 
			
		||||
        return self.client.api.cl_struc2str(self._name, id_list)
 | 
			
		||||
 | 
			
		||||
    def lrcontext_by_cpos(self, cpos_first, cpos_last=None, context=3):
 | 
			
		||||
        if cpos_last is None:
 | 
			
		||||
            cpos_last = cpos_first
 | 
			
		||||
        id_l, id_r = self.ids_by_cpos([cpos_first, cpos_last])
 | 
			
		||||
        id_lc = max(0, id_l - context)
 | 
			
		||||
        id_rc = min(id_r + context, self.size - 1)
 | 
			
		||||
        ids = {id: list(range(*self.cpos_by_id(id))) for id
 | 
			
		||||
               in range(id_lc, id_rc)}
 | 
			
		||||
        cpos_list = [cpos for cpos_list in ids.values() for cpos in cpos_list]
 | 
			
		||||
    def export(self, first_cpos, last_cpos, context=0, expand_lists=False):
 | 
			
		||||
        first_id, last_id = self.ids_by_cpos([first_cpos, last_cpos])
 | 
			
		||||
        c = (first_cpos, last_cpos)
 | 
			
		||||
        lc = rc = None
 | 
			
		||||
        if context == 0:
 | 
			
		||||
            cpos_list = list(range(first_cpos, (last_cpos + 1)))
 | 
			
		||||
        else:
 | 
			
		||||
            lc_lbound = self.cpos_by_id(max(0, (first_id - context)))[0]
 | 
			
		||||
            if lc_lbound != first_cpos:
 | 
			
		||||
                lc_rbound = max(0, (first_cpos - 1))
 | 
			
		||||
                lc = (lc_lbound, lc_rbound)
 | 
			
		||||
                cpos_list_lbound = lc_lbound
 | 
			
		||||
            else:
 | 
			
		||||
                cpos_list_lbound = first_cpos
 | 
			
		||||
            rc_rbound = \
 | 
			
		||||
                self.cpos_by_id(min((last_id + context), (self.size - 1)))[1]
 | 
			
		||||
            if rc_rbound != last_cpos:
 | 
			
		||||
                rc_lbound = min((last_cpos + 1), (self.size - 1))
 | 
			
		||||
                rc = (rc_lbound, rc_rbound)
 | 
			
		||||
                cpos_list_rbound = rc_rbound
 | 
			
		||||
            else:
 | 
			
		||||
                cpos_list_rbound = last_cpos
 | 
			
		||||
            cpos_list = list(range(cpos_list_lbound, (cpos_list_rbound + 1)))
 | 
			
		||||
        if expand_lists:
 | 
			
		||||
            match = {'lc': list(range(lc[0], (lc[1] + 1))),
 | 
			
		||||
                     'c': list(range(c[0], (c[1] + 1))),
 | 
			
		||||
                     'rc': list(range(rc[0], (rc[1] + 1)))}
 | 
			
		||||
        else:
 | 
			
		||||
            match = {'lc': lc, 'c': c, 'rc': rc}
 | 
			
		||||
        lookups = self.corpus.lookups_by_cpos(cpos_list)
 | 
			
		||||
        return {'ids': ids,
 | 
			
		||||
                'match_cpos_list': list(range(cpos_first, cpos_last)),
 | 
			
		||||
                **lookups}
 | 
			
		||||
        return {'match': match, **lookups}
 | 
			
		||||
 
 | 
			
		||||
@@ -48,8 +48,8 @@ class Corpus:
 | 
			
		||||
                                           set(cpos_attr_ids)))
 | 
			
		||||
            if not occured_attr_ids:
 | 
			
		||||
                continue
 | 
			
		||||
            subattrs = self.attributes.structural.list(
 | 
			
		||||
                filters={'part_of': attr})
 | 
			
		||||
            subattrs = \
 | 
			
		||||
                self.attributes.structural.list(filters={'part_of': attr})
 | 
			
		||||
            if not subattrs:
 | 
			
		||||
                continue
 | 
			
		||||
            lookup_name = '{}_lookup'.format(attr.name)
 | 
			
		||||
 
 | 
			
		||||
@@ -40,32 +40,53 @@ class Subcorpus:
 | 
			
		||||
        return self.client.api.cqp_dump_subcorpus(self._name, field, first,
 | 
			
		||||
                                                  last)
 | 
			
		||||
 | 
			
		||||
    def dump_values(self, context=25, first_result=0,
 | 
			
		||||
                    num_results=float('inf')):
 | 
			
		||||
    def export(self, context=25, cutoff=float('inf'), expand_lists=False,
 | 
			
		||||
               offset=0):
 | 
			
		||||
        if self.size == 0:
 | 
			
		||||
            return {"matches": []}
 | 
			
		||||
        first_result = max(0, first_result)
 | 
			
		||||
        last_result = min((first_result + num_results - 1), (self.size - 1))
 | 
			
		||||
        matches = []
 | 
			
		||||
        match_boundaries = zip(self.dump(self.fields['match'], first_result,
 | 
			
		||||
                                         last_result),
 | 
			
		||||
                               self.dump(self.fields['matchend'], first_result,
 | 
			
		||||
                                         last_result))
 | 
			
		||||
        for match_start, match_end in match_boundaries:
 | 
			
		||||
            left_start = max(0, match_start - context)
 | 
			
		||||
            right_end = min(self.corpus.size, (match_end + 1 + context))
 | 
			
		||||
            matches.append({'lc': list(range(left_start, match_start)),
 | 
			
		||||
                            'hit': list(range(match_start, match_end + 1)),
 | 
			
		||||
                            'rc': list(range(match_end + 1, right_end))})
 | 
			
		||||
        first_match = max(0, offset)
 | 
			
		||||
        last_match = min((offset + cutoff - 1), (self.size - 1))
 | 
			
		||||
        match_boundaries = \
 | 
			
		||||
            zip(self.dump(self.fields['match'], first_match, last_match),
 | 
			
		||||
                self.dump(self.fields['matchend'], first_match, last_match))
 | 
			
		||||
        cpos_list = []
 | 
			
		||||
        for match in matches:
 | 
			
		||||
            cpos_list += match['lc'] + match['hit'] + match['rc']
 | 
			
		||||
        matches = []
 | 
			
		||||
        for match_start, match_end in match_boundaries:
 | 
			
		||||
            c = (match_start, match_end)
 | 
			
		||||
            lc = rc = None
 | 
			
		||||
            if context == 0:
 | 
			
		||||
                cpos_list += list(range(match_start, (match_end + 1)))
 | 
			
		||||
            else:
 | 
			
		||||
                lc_rbound = max(0, (match_start - 1))
 | 
			
		||||
                if lc_rbound != match_start:
 | 
			
		||||
                    lc_lbound = max(0, match_start - context)
 | 
			
		||||
                    lc = (lc_lbound, lc_rbound)
 | 
			
		||||
                    cpos_list_lbound = lc_lbound
 | 
			
		||||
                else:
 | 
			
		||||
                    cpos_list_lbound = match_start
 | 
			
		||||
                rc_lbound = min((match_end + 1), (self.corpus.size - 1))
 | 
			
		||||
                if rc_lbound != match_end:
 | 
			
		||||
                    rc_rbound = min((match_end + context),
 | 
			
		||||
                                    (self.corpus.size - 1))
 | 
			
		||||
                    rc = (rc_lbound, rc_rbound)
 | 
			
		||||
                    cpos_list_rbound = rc_rbound
 | 
			
		||||
                else:
 | 
			
		||||
                    cpos_list_rbound = match_end
 | 
			
		||||
                cpos_list += list(range(cpos_list_lbound,
 | 
			
		||||
                                        (cpos_list_rbound + 1)))
 | 
			
		||||
            if expand_lists:
 | 
			
		||||
                match = {'lc': list(range(lc[0], (lc[1] + 1))),
 | 
			
		||||
                         'c': list(range(c[0], (c[1] + 1))),
 | 
			
		||||
                         'rc': list(range(rc[0], (rc[1] + 1)))}
 | 
			
		||||
            else:
 | 
			
		||||
                match = {'lc': lc, 'c': c, 'rc': rc}
 | 
			
		||||
            matches.append(match)
 | 
			
		||||
        lookups = self.corpus.lookups_by_cpos(cpos_list)
 | 
			
		||||
        return {'matches': matches, **lookups}
 | 
			
		||||
 | 
			
		||||
    def fdist_1(self, cutoff, field, attribute):
 | 
			
		||||
        return self.client.api.cqp_fdist_1(self._name, cutoff, field,
 | 
			
		||||
                                           attribute._name)
 | 
			
		||||
        return self.client.api.cqp_fdist_1(self._name, cutoff,
 | 
			
		||||
                                           field, attribute._name)
 | 
			
		||||
 | 
			
		||||
    def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
 | 
			
		||||
        return self.client.api.cqp_fdist_2(self._name, cutoff,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user