210 lines
9.3 KiB
Python
Executable File
210 lines
9.3 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
from utility import delete_folder
|
|
from utility import update_config
|
|
from xml.etree import ElementTree
|
|
from os import path
|
|
from lxml import etree
|
|
import os
|
|
import logging
|
|
import re
|
|
|
|
|
|
class XMLProtocol(object):
|
|
"""Class for standard operations on/with the XML protocols. Has functions
|
|
for reading, saving and manipulationg an XML protocol. All other classes
|
|
inherit from this one.
|
|
"""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
def read_protcol(self, file_path):
|
|
"""
|
|
Takes a file path and parses the file as an XML returns a root element.
|
|
"""
|
|
self.file_path = file_path
|
|
self.filename = os.path.basename(self.file_path)
|
|
parser = etree.XMLParser(remove_blank_text=True)
|
|
self.tree = etree.parse(file_path, parser) # for better xml indentation
|
|
root = self.tree.getroot()
|
|
self.logger.info("File successfully parsed as XML.")
|
|
return root
|
|
|
|
def read_xml(self, file_path):
|
|
"""Takes a file path and parses the file as an XML."""
|
|
parser = etree.XMLParser(encoding='utf-8', remove_blank_text=True)
|
|
tree = etree.parse(file_path, parser) # for better xml indentation
|
|
self.xml_tree = tree.getroot()
|
|
|
|
def save_to_file(self, output_path, file_path, subfolder, config_section,
|
|
config_key, filename_sufix=""):
|
|
"""
|
|
Writes the new markup to a new xml file. Takes the output path and
|
|
creates a new folder there. Also updates the config file with the new
|
|
path.
|
|
"""
|
|
if(filename_sufix == ""):
|
|
self.filename = path.basename(file_path)
|
|
elif(filename_sufix != ""):
|
|
self.filename = path.basename(file_path)[:-4] + filename_sufix
|
|
save_path = os.path.join(output_path, subfolder)
|
|
if not os.path.exists(save_path):
|
|
os.mkdir(save_path)
|
|
tree = etree.ElementTree(self.xml_tree)
|
|
new_filename = self.filename
|
|
save_file_path = os.path.join(save_path, new_filename)
|
|
tree.write(save_file_path,
|
|
pretty_print=True,
|
|
xml_declaration=True,
|
|
encoding="utf8",
|
|
doctype="<!DOCTYPE dbtplenarprotokoll SYSTEM 'dbtplenarprotokoll_minimal.dtd\'>")
|
|
self.logger.info("New XML saved to:" + save_file_path)
|
|
update_config.update_config("config.ini", config_section, config_key,
|
|
save_path)
|
|
|
|
def beautify_xml_part(self, file_path, xpath, alter_lines=False,
|
|
line_width=80):
|
|
"""
|
|
Beautifies part (element node) of an input XML.
|
|
"""
|
|
tmp_path = os.path.join(os.path.dirname(file_path), "tmp")
|
|
tree = etree.ElementTree(self.xml_tree)
|
|
self.beautified_part = tree.find(xpath)
|
|
self.beautified_part = ElementTree.tostring(self.beautified_part)
|
|
self.beautified_part = etree.fromstring(self.beautified_part)
|
|
self.beautified_part = etree.ElementTree(self.beautified_part)
|
|
if not os.path.exists(tmp_path):
|
|
os.mkdir(tmp_path)
|
|
tmp_file_path = os.path.join(tmp_path, "tmp.xml")
|
|
self.beautified_part.write(tmp_file_path,
|
|
pretty_print=True,
|
|
xml_declaration=True,
|
|
encoding="utf8")
|
|
if(alter_lines is True):
|
|
os.system("html-beautify -r -q -w {} --no-preserve-newlines {}".format(line_width, tmp_file_path))
|
|
self.beautified_part = etree.parse(tmp_file_path).getroot()
|
|
elif(alter_lines is False):
|
|
os.system("html-beautify -r -q {}".format(tmp_file_path))
|
|
self.beautified_part = etree.parse(tmp_file_path).getroot()
|
|
update_config.update_config("config.ini", "File paths", "tmp_path",
|
|
tmp_path)
|
|
delete_folder.delete_folder(tmp_path)
|
|
|
|
def beautify_xml(self, file_path, alter_lines=False, line_width=80):
|
|
if(alter_lines is True):
|
|
os.system("html-beautify -r -q -w {} --no-preserve-newlines {}".format(line_width, file_path))
|
|
elif(alter_lines is False):
|
|
os.system("html-beautify -r -q {}".format(file_path))
|
|
|
|
def expand_element(self, element_to_expand, expand_attr_key,
|
|
expand_attr_value, check_child=True):
|
|
"""
|
|
This function takes an XPath expression for an xml element.
|
|
The tag of this element will be expanded with the given
|
|
expand_attrkey and expand_attr_value. Also needs a regex to determine if
|
|
the current selected element is an element which should be replaced.
|
|
For this the text of the first child of the current element is checked
|
|
against the given regex. Per default the child element text of the
|
|
current element is checked wether the regex matches the string or not.
|
|
Set check_child to False to avoid this and just expand the current
|
|
element.
|
|
"""
|
|
elements = self.xml_tree.findall(element_to_expand)
|
|
for element in elements:
|
|
if(check_child is True):
|
|
first_child = element.getchildren()[0]
|
|
match = self.regex_compiled.search(first_child.text)
|
|
if(match):
|
|
element.set(expand_attr_key, expand_attr_value)
|
|
self.xml_tree = self.xml_tree
|
|
else:
|
|
element.set(expand_attr_key, expand_attr_value)
|
|
self.xml_tree = self.xml_tree
|
|
|
|
def replace_tag_name(self, element_to_replace, tag_name, check_child=True):
|
|
"""
|
|
Replaces a given element tag(as XPath) name with a new tag name.
|
|
"""
|
|
elements = self.xml_tree.findall(element_to_replace)
|
|
for element in elements:
|
|
if(check_child is True):
|
|
first_child = element.getchildren()[0]
|
|
match = self.regex_compiled.search(first_child.text)
|
|
if(match):
|
|
element.tag = tag_name
|
|
else:
|
|
element.tag = tag_name
|
|
self.xml_tree = self.xml_tree
|
|
|
|
def replace_tag_attr(self, element_to_replace, tag_name, attr_key,
|
|
attr_value, check_child=True):
|
|
"""
|
|
Replaces tag name of given element(as XPath) with new name and adds an
|
|
attribute Can also check if the child of the current element contains
|
|
some specific text like in the expand_element function.
|
|
"""
|
|
elements = self.xml_tree.findall(element_to_replace)
|
|
for element in elements:
|
|
if(check_child is True):
|
|
first_child = element.getchildren()[0]
|
|
match = self.regex_compiled.search(first_child.text)
|
|
if(match):
|
|
element.tag = tag_name
|
|
element.set(attr_key, attr_value)
|
|
else:
|
|
element.tag = tag_name
|
|
element.set(attr_key, attr_value)
|
|
self.xml_tree = self.xml_tree
|
|
|
|
def replace_elements(self, elements_to_replace, replacment_elements,
|
|
keep_parent_text=False):
|
|
"""
|
|
Replaces elements identifeid by XPath with new elements. Can either keep
|
|
the text of the parent element or not.
|
|
"""
|
|
elements = self.xml_tree.findall(elements_to_replace)
|
|
parents_text_xpath = elements_to_replace + "/" + "parent::node()" + "/" + "text()"
|
|
elements_text = self.xml_tree.xpath(parents_text_xpath)
|
|
if(len(elements) == len(replacment_elements)):
|
|
if(keep_parent_text is False):
|
|
for element, replacement_element in zip(elements, replacment_elements):
|
|
element.getparent().replace(element, replacement_element)
|
|
else:
|
|
for element, replacement_element in zip(elements, replacment_elements):
|
|
element.getparent().replace(element, replacement_element)
|
|
self.xml_tree = self.xml_tree
|
|
elements = self.xml_tree.findall(elements_to_replace)
|
|
for element, text in zip(elements, elements_text):
|
|
element.tail = text
|
|
self.xml_tree = self.xml_tree
|
|
else:
|
|
self.logger.warning(("Elements missmatch. There are "
|
|
+ str(len(elements))
|
|
+ " that should be repalced."
|
|
+ " There are " + str(len(replacment_elements))
|
|
+ " present."
|
|
+ " No elements have been replaced."))
|
|
|
|
def compile_regex(self, regex):
|
|
self.regex_string = regex
|
|
"""
|
|
Takes the input regex string and compiles it for better performance
|
|
and redability.
|
|
"""
|
|
self.regex_compiled = re.compile(self.regex_string, re.MULTILINE)
|
|
|
|
def clean_text(self, regex, xpath, replacement_string="",):
|
|
"""
|
|
Replaces regex matches with nothing by default or replacement string
|
|
for an element matched by the xpath in the xml_tree. Works with
|
|
matchgroups.
|
|
"""
|
|
elements = self.xml_tree.xpath(xpath)
|
|
for element in elements:
|
|
replaced = re.sub(regex, replacement_string, element.text)
|
|
element.text = replaced
|
|
self.xml_tree = self.xml_tree
|