Update cqi package

This commit is contained in:
Patrick Jentsch 2020-03-28 11:56:01 +01:00
parent 97fb8ded9a
commit a51394dddd
3 changed files with 74 additions and 36 deletions

View File

@ -105,9 +105,8 @@ class StructuralAttributeCollection:
class StructuralAttribute(Attribute):
def __init__(self, client, corpus, name):
super(StructuralAttribute, self).__init__(client, corpus, name)
self.has_values = client.api.corpus_structural_attribute_has_values(
self._name
)
self.has_values = \
client.api.corpus_structural_attribute_has_values(self._name)
def cpos_by_id(self, id):
return self.client.api.cl_struc2cpos(self._name, id)
@ -124,16 +123,34 @@ class StructuralAttribute(Attribute):
def values_by_ids(self, id_list):
return self.client.api.cl_struc2str(self._name, id_list)
def lrcontext_by_cpos(self, cpos_first, cpos_last=None, context=3):
if cpos_last is None:
cpos_last = cpos_first
id_l, id_r = self.ids_by_cpos([cpos_first, cpos_last])
id_lc = max(0, id_l - context)
id_rc = min(id_r + context, self.size - 1)
ids = {id: list(range(*self.cpos_by_id(id))) for id
in range(id_lc, id_rc)}
cpos_list = [cpos for cpos_list in ids.values() for cpos in cpos_list]
def export(self, first_cpos, last_cpos, context=0, expand_lists=False):
first_id, last_id = self.ids_by_cpos([first_cpos, last_cpos])
c = (first_cpos, last_cpos)
lc = rc = None
if context == 0:
cpos_list = list(range(first_cpos, (last_cpos + 1)))
else:
lc_lbound = self.cpos_by_id(max(0, (first_id - context)))[0]
if lc_lbound != first_cpos:
lc_rbound = max(0, (first_cpos - 1))
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
else:
cpos_list_lbound = first_cpos
rc_rbound = \
self.cpos_by_id(min((last_id + context), (self.size - 1)))[1]
if rc_rbound != last_cpos:
rc_lbound = min((last_cpos + 1), (self.size - 1))
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
else:
cpos_list_rbound = last_cpos
cpos_list = list(range(cpos_list_lbound, (cpos_list_rbound + 1)))
if expand_lists:
match = {'lc': list(range(lc[0], (lc[1] + 1))),
'c': list(range(c[0], (c[1] + 1))),
'rc': list(range(rc[0], (rc[1] + 1)))}
else:
match = {'lc': lc, 'c': c, 'rc': rc}
lookups = self.corpus.lookups_by_cpos(cpos_list)
return {'ids': ids,
'match_cpos_list': list(range(cpos_first, cpos_last)),
**lookups}
return {'match': match, **lookups}

View File

@ -48,8 +48,8 @@ class Corpus:
set(cpos_attr_ids)))
if not occured_attr_ids:
continue
subattrs = self.attributes.structural.list(
filters={'part_of': attr})
subattrs = \
self.attributes.structural.list(filters={'part_of': attr})
if not subattrs:
continue
lookup_name = '{}_lookup'.format(attr.name)

View File

@ -40,32 +40,53 @@ class Subcorpus:
return self.client.api.cqp_dump_subcorpus(self._name, field, first,
last)
def dump_values(self, context=25, first_result=0,
num_results=float('inf')):
def export(self, context=25, cutoff=float('inf'), expand_lists=False,
offset=0):
if self.size == 0:
return {"matches": []}
first_result = max(0, first_result)
last_result = min((first_result + num_results - 1), (self.size - 1))
matches = []
match_boundaries = zip(self.dump(self.fields['match'], first_result,
last_result),
self.dump(self.fields['matchend'], first_result,
last_result))
for match_start, match_end in match_boundaries:
left_start = max(0, match_start - context)
right_end = min(self.corpus.size, (match_end + 1 + context))
matches.append({'lc': list(range(left_start, match_start)),
'hit': list(range(match_start, match_end + 1)),
'rc': list(range(match_end + 1, right_end))})
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (self.size - 1))
match_boundaries = \
zip(self.dump(self.fields['match'], first_match, last_match),
self.dump(self.fields['matchend'], first_match, last_match))
cpos_list = []
for match in matches:
cpos_list += match['lc'] + match['hit'] + match['rc']
matches = []
for match_start, match_end in match_boundaries:
c = (match_start, match_end)
lc = rc = None
if context == 0:
cpos_list += list(range(match_start, (match_end + 1)))
else:
lc_rbound = max(0, (match_start - 1))
if lc_rbound != match_start:
lc_lbound = max(0, match_start - context)
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
else:
cpos_list_lbound = match_start
rc_lbound = min((match_end + 1), (self.corpus.size - 1))
if rc_lbound != match_end:
rc_rbound = min((match_end + context),
(self.corpus.size - 1))
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
else:
cpos_list_rbound = match_end
cpos_list += list(range(cpos_list_lbound,
(cpos_list_rbound + 1)))
if expand_lists:
match = {'lc': list(range(lc[0], (lc[1] + 1))),
'c': list(range(c[0], (c[1] + 1))),
'rc': list(range(rc[0], (rc[1] + 1)))}
else:
match = {'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
lookups = self.corpus.lookups_by_cpos(cpos_list)
return {'matches': matches, **lookups}
def fdist_1(self, cutoff, field, attribute):
return self.client.api.cqp_fdist_1(self._name, cutoff, field,
attribute._name)
return self.client.api.cqp_fdist_1(self._name, cutoff,
field, attribute._name)
def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
return self.client.api.cqp_fdist_2(self._name, cutoff,