Add sentences to results

This commit is contained in:
Stephan Porada 2019-11-26 11:48:54 +01:00
parent 52b7202017
commit 00d61a7bf5

View File

@ -1,6 +1,7 @@
from .CQiClient import CQiClient from .CQiClient import CQiClient
from .CQi import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND from .CQi import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND
import collections import collections
import re
from app import logger # only works if imported into opaque web app from app import logger # only works if imported into opaque web app
@ -173,7 +174,7 @@ class CQiWrapper(CQiClient):
# Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for # Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for
# all cpos entries in all_cpos_list # all cpos entries in all_cpos_list
# Also saves these informations into the ordered_matches dict # Also saves these informations into the ordered_matches dict
all_cpos_infos = self.get_cpos_infos(all_cpos_list) all_cpos_infos, s_list = self.get_cpos_infos(all_cpos_list)
for key in ordered_matches.keys(): for key in ordered_matches.keys():
# loops over cpos in cpos_list which holds all match cpos # loops over cpos in cpos_list which holds all match cpos
# Replaces one cpos with the corresponding cpos information created # Replaces one cpos with the corresponding cpos information created
@ -212,6 +213,13 @@ class CQiWrapper(CQiClient):
except UnboundLocalError: except UnboundLocalError:
logger.warning('Context after cpos list is empty.') logger.warning('Context after cpos list is empty.')
pass pass
sentences = {}
s_list = set(s_list)
for s_id in s_list:
s_start, s_end = self.cl_struc2cpos('CORPUS.s', s_id)
sentence = self.cl_cpos2str('CORPUS.word', range(s_start, s_end + 1))
sentences.update({s_id: re.sub(r' (?=\W)', '', ' '.join(sentence))})
ordered_matches['sentences'] = sentences
return ordered_matches return ordered_matches
def get_cpos_infos(self, all_cpos): def get_cpos_infos(self, all_cpos):
@ -220,6 +228,7 @@ class CQiWrapper(CQiClient):
all cpos entries specified in the parameter all_cpos. all cpos entries specified in the parameter all_cpos.
''' '''
cpos_infos = {} cpos_infos = {}
s_list = []
for key in self.attr_strings.keys(): for key in self.attr_strings.keys():
if key == 'positional_attrs': if key == 'positional_attrs':
for p_attr_key in self.attr_strings[key].keys(): for p_attr_key in self.attr_strings[key].keys():
@ -233,6 +242,8 @@ class CQiWrapper(CQiClient):
has_value = self.corpus_structural_attribute_has_values(self.attr_strings[key][struct_attr_key]) has_value = self.corpus_structural_attribute_has_values(self.attr_strings[key][struct_attr_key])
if has_value: if has_value:
match_strs = self.cl_struc2str(self.attr_strings[key][struct_attr_key], struct_entry) match_strs = self.cl_struc2str(self.attr_strings[key][struct_attr_key], struct_entry)
elif self.attr_strings[key][struct_attr_key] == 'CORPUS.s':
s_list.extend(struct_entry)
else: else:
match_strs = [None for i in struct_entry] match_strs = [None for i in struct_entry]
cpos_infos[struct_attr_key] = zip(struct_entry, match_strs) cpos_infos[struct_attr_key] = zip(struct_entry, match_strs)
@ -245,4 +256,4 @@ class CQiWrapper(CQiClient):
dict_cpos_infos = {} dict_cpos_infos = {}
for info in joined_cpos_infos: for info in joined_cpos_infos:
dict_cpos_infos[info[0]] = dict(zip(attr_key_list, info[1:])) dict_cpos_infos[info[0]] = dict(zip(attr_key_list, info[1:]))
return dict_cpos_infos return dict_cpos_infos, s_list