mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-15 01:05:42 +00:00
Update cqi package
This commit is contained in:
parent
97fb8ded9a
commit
a51394dddd
@ -105,9 +105,8 @@ class StructuralAttributeCollection:
|
|||||||
class StructuralAttribute(Attribute):
|
class StructuralAttribute(Attribute):
|
||||||
def __init__(self, client, corpus, name):
|
def __init__(self, client, corpus, name):
|
||||||
super(StructuralAttribute, self).__init__(client, corpus, name)
|
super(StructuralAttribute, self).__init__(client, corpus, name)
|
||||||
self.has_values = client.api.corpus_structural_attribute_has_values(
|
self.has_values = \
|
||||||
self._name
|
client.api.corpus_structural_attribute_has_values(self._name)
|
||||||
)
|
|
||||||
|
|
||||||
def cpos_by_id(self, id):
|
def cpos_by_id(self, id):
|
||||||
return self.client.api.cl_struc2cpos(self._name, id)
|
return self.client.api.cl_struc2cpos(self._name, id)
|
||||||
@ -124,16 +123,34 @@ class StructuralAttribute(Attribute):
|
|||||||
def values_by_ids(self, id_list):
|
def values_by_ids(self, id_list):
|
||||||
return self.client.api.cl_struc2str(self._name, id_list)
|
return self.client.api.cl_struc2str(self._name, id_list)
|
||||||
|
|
||||||
def lrcontext_by_cpos(self, cpos_first, cpos_last=None, context=3):
|
def export(self, first_cpos, last_cpos, context=0, expand_lists=False):
|
||||||
if cpos_last is None:
|
first_id, last_id = self.ids_by_cpos([first_cpos, last_cpos])
|
||||||
cpos_last = cpos_first
|
c = (first_cpos, last_cpos)
|
||||||
id_l, id_r = self.ids_by_cpos([cpos_first, cpos_last])
|
lc = rc = None
|
||||||
id_lc = max(0, id_l - context)
|
if context == 0:
|
||||||
id_rc = min(id_r + context, self.size - 1)
|
cpos_list = list(range(first_cpos, (last_cpos + 1)))
|
||||||
ids = {id: list(range(*self.cpos_by_id(id))) for id
|
else:
|
||||||
in range(id_lc, id_rc)}
|
lc_lbound = self.cpos_by_id(max(0, (first_id - context)))[0]
|
||||||
cpos_list = [cpos for cpos_list in ids.values() for cpos in cpos_list]
|
if lc_lbound != first_cpos:
|
||||||
|
lc_rbound = max(0, (first_cpos - 1))
|
||||||
|
lc = (lc_lbound, lc_rbound)
|
||||||
|
cpos_list_lbound = lc_lbound
|
||||||
|
else:
|
||||||
|
cpos_list_lbound = first_cpos
|
||||||
|
rc_rbound = \
|
||||||
|
self.cpos_by_id(min((last_id + context), (self.size - 1)))[1]
|
||||||
|
if rc_rbound != last_cpos:
|
||||||
|
rc_lbound = min((last_cpos + 1), (self.size - 1))
|
||||||
|
rc = (rc_lbound, rc_rbound)
|
||||||
|
cpos_list_rbound = rc_rbound
|
||||||
|
else:
|
||||||
|
cpos_list_rbound = last_cpos
|
||||||
|
cpos_list = list(range(cpos_list_lbound, (cpos_list_rbound + 1)))
|
||||||
|
if expand_lists:
|
||||||
|
match = {'lc': list(range(lc[0], (lc[1] + 1))),
|
||||||
|
'c': list(range(c[0], (c[1] + 1))),
|
||||||
|
'rc': list(range(rc[0], (rc[1] + 1)))}
|
||||||
|
else:
|
||||||
|
match = {'lc': lc, 'c': c, 'rc': rc}
|
||||||
lookups = self.corpus.lookups_by_cpos(cpos_list)
|
lookups = self.corpus.lookups_by_cpos(cpos_list)
|
||||||
return {'ids': ids,
|
return {'match': match, **lookups}
|
||||||
'match_cpos_list': list(range(cpos_first, cpos_last)),
|
|
||||||
**lookups}
|
|
||||||
|
@ -48,8 +48,8 @@ class Corpus:
|
|||||||
set(cpos_attr_ids)))
|
set(cpos_attr_ids)))
|
||||||
if not occured_attr_ids:
|
if not occured_attr_ids:
|
||||||
continue
|
continue
|
||||||
subattrs = self.attributes.structural.list(
|
subattrs = \
|
||||||
filters={'part_of': attr})
|
self.attributes.structural.list(filters={'part_of': attr})
|
||||||
if not subattrs:
|
if not subattrs:
|
||||||
continue
|
continue
|
||||||
lookup_name = '{}_lookup'.format(attr.name)
|
lookup_name = '{}_lookup'.format(attr.name)
|
||||||
|
@ -40,32 +40,53 @@ class Subcorpus:
|
|||||||
return self.client.api.cqp_dump_subcorpus(self._name, field, first,
|
return self.client.api.cqp_dump_subcorpus(self._name, field, first,
|
||||||
last)
|
last)
|
||||||
|
|
||||||
def dump_values(self, context=25, first_result=0,
|
def export(self, context=25, cutoff=float('inf'), expand_lists=False,
|
||||||
num_results=float('inf')):
|
offset=0):
|
||||||
if self.size == 0:
|
if self.size == 0:
|
||||||
return {"matches": []}
|
return {"matches": []}
|
||||||
first_result = max(0, first_result)
|
first_match = max(0, offset)
|
||||||
last_result = min((first_result + num_results - 1), (self.size - 1))
|
last_match = min((offset + cutoff - 1), (self.size - 1))
|
||||||
matches = []
|
match_boundaries = \
|
||||||
match_boundaries = zip(self.dump(self.fields['match'], first_result,
|
zip(self.dump(self.fields['match'], first_match, last_match),
|
||||||
last_result),
|
self.dump(self.fields['matchend'], first_match, last_match))
|
||||||
self.dump(self.fields['matchend'], first_result,
|
|
||||||
last_result))
|
|
||||||
for match_start, match_end in match_boundaries:
|
|
||||||
left_start = max(0, match_start - context)
|
|
||||||
right_end = min(self.corpus.size, (match_end + 1 + context))
|
|
||||||
matches.append({'lc': list(range(left_start, match_start)),
|
|
||||||
'hit': list(range(match_start, match_end + 1)),
|
|
||||||
'rc': list(range(match_end + 1, right_end))})
|
|
||||||
cpos_list = []
|
cpos_list = []
|
||||||
for match in matches:
|
matches = []
|
||||||
cpos_list += match['lc'] + match['hit'] + match['rc']
|
for match_start, match_end in match_boundaries:
|
||||||
|
c = (match_start, match_end)
|
||||||
|
lc = rc = None
|
||||||
|
if context == 0:
|
||||||
|
cpos_list += list(range(match_start, (match_end + 1)))
|
||||||
|
else:
|
||||||
|
lc_rbound = max(0, (match_start - 1))
|
||||||
|
if lc_rbound != match_start:
|
||||||
|
lc_lbound = max(0, match_start - context)
|
||||||
|
lc = (lc_lbound, lc_rbound)
|
||||||
|
cpos_list_lbound = lc_lbound
|
||||||
|
else:
|
||||||
|
cpos_list_lbound = match_start
|
||||||
|
rc_lbound = min((match_end + 1), (self.corpus.size - 1))
|
||||||
|
if rc_lbound != match_end:
|
||||||
|
rc_rbound = min((match_end + context),
|
||||||
|
(self.corpus.size - 1))
|
||||||
|
rc = (rc_lbound, rc_rbound)
|
||||||
|
cpos_list_rbound = rc_rbound
|
||||||
|
else:
|
||||||
|
cpos_list_rbound = match_end
|
||||||
|
cpos_list += list(range(cpos_list_lbound,
|
||||||
|
(cpos_list_rbound + 1)))
|
||||||
|
if expand_lists:
|
||||||
|
match = {'lc': list(range(lc[0], (lc[1] + 1))),
|
||||||
|
'c': list(range(c[0], (c[1] + 1))),
|
||||||
|
'rc': list(range(rc[0], (rc[1] + 1)))}
|
||||||
|
else:
|
||||||
|
match = {'lc': lc, 'c': c, 'rc': rc}
|
||||||
|
matches.append(match)
|
||||||
lookups = self.corpus.lookups_by_cpos(cpos_list)
|
lookups = self.corpus.lookups_by_cpos(cpos_list)
|
||||||
return {'matches': matches, **lookups}
|
return {'matches': matches, **lookups}
|
||||||
|
|
||||||
def fdist_1(self, cutoff, field, attribute):
|
def fdist_1(self, cutoff, field, attribute):
|
||||||
return self.client.api.cqp_fdist_1(self._name, cutoff, field,
|
return self.client.api.cqp_fdist_1(self._name, cutoff,
|
||||||
attribute._name)
|
field, attribute._name)
|
||||||
|
|
||||||
def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
|
def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
|
||||||
return self.client.api.cqp_fdist_2(self._name, cutoff,
|
return self.client.api.cqp_fdist_2(self._name, cutoff,
|
||||||
|
Loading…
Reference in New Issue
Block a user