diff --git a/app/corpora/CQiWrapper/CQiWrapper.py b/app/corpora/CQiWrapper/CQiWrapper.py index 06457ad3..e2c9d996 100644 --- a/app/corpora/CQiWrapper/CQiWrapper.py +++ b/app/corpora/CQiWrapper/CQiWrapper.py @@ -1,6 +1,7 @@ from .CQiClient import CQiClient from .CQi import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND import collections +import re from app import logger # only works if imported into opaque web app @@ -173,7 +174,7 @@ class CQiWrapper(CQiClient): # Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for # all cpos entries in all_cpos_list # Also saves these informations into the ordered_matches dict - all_cpos_infos = self.get_cpos_infos(all_cpos_list) + all_cpos_infos, s_list = self.get_cpos_infos(all_cpos_list) for key in ordered_matches.keys(): # loops over cpos in cpos_list which holds all match cpos # Replaces one cpos with the corresponding cpos information created @@ -212,6 +213,13 @@ class CQiWrapper(CQiClient): except UnboundLocalError: logger.warning('Context after cpos list is empty.') pass + sentences = {} + s_list = set(s_list) + for s_id in s_list: + s_start, s_end = self.cl_struc2cpos('CORPUS.s', s_id) + sentence = self.cl_cpos2str('CORPUS.word', range(s_start, s_end + 1)) + sentences.update({s_id: re.sub(r' (?=\W)', '', ' '.join(sentence))}) + ordered_matches['sentences'] = sentences return ordered_matches def get_cpos_infos(self, all_cpos): @@ -220,6 +228,7 @@ class CQiWrapper(CQiClient): all cpos entries specified in the parameter all_cpos. ''' cpos_infos = {} + s_list = [] for key in self.attr_strings.keys(): if key == 'positional_attrs': for p_attr_key in self.attr_strings[key].keys(): @@ -233,6 +242,8 @@ class CQiWrapper(CQiClient): has_value = self.corpus_structural_attribute_has_values(self.attr_strings[key][struct_attr_key]) if has_value: match_strs = self.cl_struc2str(self.attr_strings[key][struct_attr_key], struct_entry) + elif self.attr_strings[key][struct_attr_key] == 'CORPUS.s': + s_list.extend(struct_entry) else: match_strs = [None for i in struct_entry] cpos_infos[struct_attr_key] = zip(struct_entry, match_strs) @@ -245,4 +256,4 @@ class CQiWrapper(CQiClient): dict_cpos_infos = {} for info in joined_cpos_infos: dict_cpos_infos[info[0]] = dict(zip(attr_key_list, info[1:])) - return dict_cpos_infos + return dict_cpos_infos, s_list