import argparse import os import xml.etree.ElementTree as ET parser = argparse.ArgumentParser() parser.add_argument('-dir', dest='corpus_dir', required=True) args = parser.parse_args() text_nodes = [] for corpus_file in os.listdir(args.corpus_dir): if os.path.isdir(os.path.join(args.corpus_dir, corpus_file)): continue if corpus_file == 'corpus.vrt': continue element_tree = ET.parse(os.path.join(args.corpus_dir, corpus_file)) text_nodes.append(element_tree.find('text')) element_tree = ET.ElementTree(ET.fromstring('')) root = element_tree.getroot() for text_node in text_nodes: root.insert(1, text_node) element_tree.write(os.path.join(args.corpus_dir, 'corpus.vrt'))