mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-18 05:50:34 +00:00
Update cqi package
This commit is contained in:
parent
97fb8ded9a
commit
a51394dddd
@ -105,9 +105,8 @@ class StructuralAttributeCollection:
|
||||
class StructuralAttribute(Attribute):
|
||||
def __init__(self, client, corpus, name):
|
||||
super(StructuralAttribute, self).__init__(client, corpus, name)
|
||||
self.has_values = client.api.corpus_structural_attribute_has_values(
|
||||
self._name
|
||||
)
|
||||
self.has_values = \
|
||||
client.api.corpus_structural_attribute_has_values(self._name)
|
||||
|
||||
def cpos_by_id(self, id):
|
||||
return self.client.api.cl_struc2cpos(self._name, id)
|
||||
@ -124,16 +123,34 @@ class StructuralAttribute(Attribute):
|
||||
def values_by_ids(self, id_list):
|
||||
return self.client.api.cl_struc2str(self._name, id_list)
|
||||
|
||||
def lrcontext_by_cpos(self, cpos_first, cpos_last=None, context=3):
|
||||
if cpos_last is None:
|
||||
cpos_last = cpos_first
|
||||
id_l, id_r = self.ids_by_cpos([cpos_first, cpos_last])
|
||||
id_lc = max(0, id_l - context)
|
||||
id_rc = min(id_r + context, self.size - 1)
|
||||
ids = {id: list(range(*self.cpos_by_id(id))) for id
|
||||
in range(id_lc, id_rc)}
|
||||
cpos_list = [cpos for cpos_list in ids.values() for cpos in cpos_list]
|
||||
def export(self, first_cpos, last_cpos, context=0, expand_lists=False):
|
||||
first_id, last_id = self.ids_by_cpos([first_cpos, last_cpos])
|
||||
c = (first_cpos, last_cpos)
|
||||
lc = rc = None
|
||||
if context == 0:
|
||||
cpos_list = list(range(first_cpos, (last_cpos + 1)))
|
||||
else:
|
||||
lc_lbound = self.cpos_by_id(max(0, (first_id - context)))[0]
|
||||
if lc_lbound != first_cpos:
|
||||
lc_rbound = max(0, (first_cpos - 1))
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
else:
|
||||
cpos_list_lbound = first_cpos
|
||||
rc_rbound = \
|
||||
self.cpos_by_id(min((last_id + context), (self.size - 1)))[1]
|
||||
if rc_rbound != last_cpos:
|
||||
rc_lbound = min((last_cpos + 1), (self.size - 1))
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
else:
|
||||
cpos_list_rbound = last_cpos
|
||||
cpos_list = list(range(cpos_list_lbound, (cpos_list_rbound + 1)))
|
||||
if expand_lists:
|
||||
match = {'lc': list(range(lc[0], (lc[1] + 1))),
|
||||
'c': list(range(c[0], (c[1] + 1))),
|
||||
'rc': list(range(rc[0], (rc[1] + 1)))}
|
||||
else:
|
||||
match = {'lc': lc, 'c': c, 'rc': rc}
|
||||
lookups = self.corpus.lookups_by_cpos(cpos_list)
|
||||
return {'ids': ids,
|
||||
'match_cpos_list': list(range(cpos_first, cpos_last)),
|
||||
**lookups}
|
||||
return {'match': match, **lookups}
|
||||
|
@ -48,8 +48,8 @@ class Corpus:
|
||||
set(cpos_attr_ids)))
|
||||
if not occured_attr_ids:
|
||||
continue
|
||||
subattrs = self.attributes.structural.list(
|
||||
filters={'part_of': attr})
|
||||
subattrs = \
|
||||
self.attributes.structural.list(filters={'part_of': attr})
|
||||
if not subattrs:
|
||||
continue
|
||||
lookup_name = '{}_lookup'.format(attr.name)
|
||||
|
@ -40,32 +40,53 @@ class Subcorpus:
|
||||
return self.client.api.cqp_dump_subcorpus(self._name, field, first,
|
||||
last)
|
||||
|
||||
def dump_values(self, context=25, first_result=0,
|
||||
num_results=float('inf')):
|
||||
def export(self, context=25, cutoff=float('inf'), expand_lists=False,
|
||||
offset=0):
|
||||
if self.size == 0:
|
||||
return {"matches": []}
|
||||
first_result = max(0, first_result)
|
||||
last_result = min((first_result + num_results - 1), (self.size - 1))
|
||||
matches = []
|
||||
match_boundaries = zip(self.dump(self.fields['match'], first_result,
|
||||
last_result),
|
||||
self.dump(self.fields['matchend'], first_result,
|
||||
last_result))
|
||||
for match_start, match_end in match_boundaries:
|
||||
left_start = max(0, match_start - context)
|
||||
right_end = min(self.corpus.size, (match_end + 1 + context))
|
||||
matches.append({'lc': list(range(left_start, match_start)),
|
||||
'hit': list(range(match_start, match_end + 1)),
|
||||
'rc': list(range(match_end + 1, right_end))})
|
||||
first_match = max(0, offset)
|
||||
last_match = min((offset + cutoff - 1), (self.size - 1))
|
||||
match_boundaries = \
|
||||
zip(self.dump(self.fields['match'], first_match, last_match),
|
||||
self.dump(self.fields['matchend'], first_match, last_match))
|
||||
cpos_list = []
|
||||
for match in matches:
|
||||
cpos_list += match['lc'] + match['hit'] + match['rc']
|
||||
matches = []
|
||||
for match_start, match_end in match_boundaries:
|
||||
c = (match_start, match_end)
|
||||
lc = rc = None
|
||||
if context == 0:
|
||||
cpos_list += list(range(match_start, (match_end + 1)))
|
||||
else:
|
||||
lc_rbound = max(0, (match_start - 1))
|
||||
if lc_rbound != match_start:
|
||||
lc_lbound = max(0, match_start - context)
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
else:
|
||||
cpos_list_lbound = match_start
|
||||
rc_lbound = min((match_end + 1), (self.corpus.size - 1))
|
||||
if rc_lbound != match_end:
|
||||
rc_rbound = min((match_end + context),
|
||||
(self.corpus.size - 1))
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
else:
|
||||
cpos_list_rbound = match_end
|
||||
cpos_list += list(range(cpos_list_lbound,
|
||||
(cpos_list_rbound + 1)))
|
||||
if expand_lists:
|
||||
match = {'lc': list(range(lc[0], (lc[1] + 1))),
|
||||
'c': list(range(c[0], (c[1] + 1))),
|
||||
'rc': list(range(rc[0], (rc[1] + 1)))}
|
||||
else:
|
||||
match = {'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
lookups = self.corpus.lookups_by_cpos(cpos_list)
|
||||
return {'matches': matches, **lookups}
|
||||
|
||||
def fdist_1(self, cutoff, field, attribute):
|
||||
return self.client.api.cqp_fdist_1(self._name, cutoff, field,
|
||||
attribute._name)
|
||||
return self.client.api.cqp_fdist_1(self._name, cutoff,
|
||||
field, attribute._name)
|
||||
|
||||
def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
|
||||
return self.client.api.cqp_fdist_2(self._name, cutoff,
|
||||
|
Loading…
x
Reference in New Issue
Block a user