#!/usr/bin/env python # -*- coding: utf-8 -*- from utility import delete_folder from utility import update_config from xml.etree import ElementTree from os import path from lxml import etree import os import logging import re class XMLProtocol(object): """Class for standard operations on/with the XML protocols. Has functions for reading, saving and manipulationg an XML protocol. All other classes inherit from this one. """ def __init__(self): super().__init__() self.logger = logging.getLogger(__name__) def read_protcol(self, file_path): """ Takes a file path and parses the file as an XML returns a root element. """ self.file_path = file_path self.filename = os.path.basename(self.file_path) parser = etree.XMLParser(remove_blank_text=True) self.tree = etree.parse(file_path, parser) # for better xml indentation root = self.tree.getroot() self.logger.info("File successfully parsed as XML.") return root def read_xml(self, file_path): """Takes a file path and parses the file as an XML.""" parser = etree.XMLParser(encoding='utf-8', remove_blank_text=True) tree = etree.parse(file_path, parser) # for better xml indentation self.xml_tree = tree.getroot() def save_to_file(self, output_path, file_path, subfolder, config_section, config_key, filename_sufix=""): """ Writes the new markup to a new xml file. Takes the output path and creates a new folder there. Also updates the config file with the new path. """ if(filename_sufix == ""): self.filename = path.basename(file_path) elif(filename_sufix != ""): self.filename = path.basename(file_path)[:-4] + filename_sufix save_path = os.path.join(output_path, subfolder) if not os.path.exists(save_path): os.mkdir(save_path) tree = etree.ElementTree(self.xml_tree) new_filename = self.filename save_file_path = os.path.join(save_path, new_filename) tree.write(save_file_path, pretty_print=True, xml_declaration=True, encoding="utf8", doctype="") self.logger.info("New XML saved to:" + save_file_path) update_config.update_config("config.ini", config_section, config_key, save_path) def beautify_xml_part(self, file_path, xpath, alter_lines=False, line_width=80): """ Beautifies part (element node) of an input XML. """ tmp_path = os.path.join(os.path.dirname(file_path), "tmp") tree = etree.ElementTree(self.xml_tree) self.beautified_part = tree.find(xpath) self.beautified_part = ElementTree.tostring(self.beautified_part) self.beautified_part = etree.fromstring(self.beautified_part) self.beautified_part = etree.ElementTree(self.beautified_part) if not os.path.exists(tmp_path): os.mkdir(tmp_path) tmp_file_path = os.path.join(tmp_path, "tmp.xml") self.beautified_part.write(tmp_file_path, pretty_print=True, xml_declaration=True, encoding="utf8") if(alter_lines is True): os.system("html-beautify -r -q -w {} --no-preserve-newlines {}".format(line_width, tmp_file_path)) self.beautified_part = etree.parse(tmp_file_path).getroot() elif(alter_lines is False): os.system("html-beautify -r -q {}".format(tmp_file_path)) self.beautified_part = etree.parse(tmp_file_path).getroot() update_config.update_config("config.ini", "File paths", "tmp_path", tmp_path) delete_folder.delete_folder(tmp_path) def beautify_xml(self, file_path, alter_lines=False, line_width=80): if(alter_lines is True): os.system("html-beautify -r -q -w {} --no-preserve-newlines {}".format(line_width, file_path)) elif(alter_lines is False): os.system("html-beautify -r -q {}".format(file_path)) def expand_element(self, element_to_expand, expand_attr_key, expand_attr_value, check_child=True): """ This function takes an XPath expression for an xml element. The tag of this element will be expanded with the given expand_attrkey and expand_attr_value. Also needs a regex to determine if the current selected element is an element which should be replaced. For this the text of the first child of the current element is checked against the given regex. Per default the child element text of the current element is checked wether the regex matches the string or not. Set check_child to False to avoid this and just expand the current element. """ elements = self.xml_tree.findall(element_to_expand) for element in elements: if(check_child is True): first_child = element.getchildren()[0] match = self.regex_compiled.search(first_child.text) if(match): element.set(expand_attr_key, expand_attr_value) self.xml_tree = self.xml_tree else: element.set(expand_attr_key, expand_attr_value) self.xml_tree = self.xml_tree def replace_tag_name(self, element_to_replace, tag_name, check_child=True): """ Replaces a given element tag(as XPath) name with a new tag name. """ elements = self.xml_tree.findall(element_to_replace) for element in elements: if(check_child is True): first_child = element.getchildren()[0] match = self.regex_compiled.search(first_child.text) if(match): element.tag = tag_name else: element.tag = tag_name self.xml_tree = self.xml_tree def replace_tag_attr(self, element_to_replace, tag_name, attr_key, attr_value, check_child=True): """ Replaces tag name of given element(as XPath) with new name and adds an attribute Can also check if the child of the current element contains some specific text like in the expand_element function. """ elements = self.xml_tree.findall(element_to_replace) for element in elements: if(check_child is True): first_child = element.getchildren()[0] match = self.regex_compiled.search(first_child.text) if(match): element.tag = tag_name element.set(attr_key, attr_value) else: element.tag = tag_name element.set(attr_key, attr_value) self.xml_tree = self.xml_tree def replace_elements(self, elements_to_replace, replacment_elements, keep_parent_text=False): """ Replaces elements identifeid by XPath with new elements. Can either keep the text of the parent element or not. """ elements = self.xml_tree.findall(elements_to_replace) parents_text_xpath = elements_to_replace + "/" + "parent::node()" + "/" + "text()" elements_text = self.xml_tree.xpath(parents_text_xpath) if(len(elements) == len(replacment_elements)): if(keep_parent_text is False): for element, replacement_element in zip(elements, replacment_elements): element.getparent().replace(element, replacement_element) else: for element, replacement_element in zip(elements, replacment_elements): element.getparent().replace(element, replacement_element) self.xml_tree = self.xml_tree elements = self.xml_tree.findall(elements_to_replace) for element, text in zip(elements, elements_text): element.tail = text self.xml_tree = self.xml_tree else: self.logger.warning(("Elements missmatch. There are " + str(len(elements)) + " that should be repalced." + " There are " + str(len(replacment_elements)) + " present." + " No elements have been replaced.")) def compile_regex(self, regex): self.regex_string = regex """ Takes the input regex string and compiles it for better performance and redability. """ self.regex_compiled = re.compile(self.regex_string, re.MULTILINE) def clean_text(self, regex, xpath, replacement_string="",): """ Replaces regex matches with nothing by default or replacement string for an element matched by the xpath in the xml_tree. Works with matchgroups. """ elements = self.xml_tree.xpath(xpath) for element in elements: replaced = re.sub(regex, replacement_string, element.text) element.text = replaced self.xml_tree = self.xml_tree