Update tei.

This commit is contained in:
Patrick Jentsch 2019-03-13 19:47:14 +01:00
parent 20752241f4
commit 708da35a19

View File

@ -25,7 +25,7 @@ output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n' +
for input_file in input_files: for input_file in input_files:
tree = ET.parse(os.path.join(sys.argv[1], input_file)) tree = ET.parse(os.path.join(sys.argv[1], input_file))
output_file.write(' <pb n="%s" facs="%s.tif"/>\n' % (input_file.split(".")[0], input_file.split(".")[0])) output_file.write(' <pb n="%s"/>\n' % (input_file.split(".")[0].split("-")[1]))
for para in tree.findall(".//*[@class='ocr_par']"): for para in tree.findall(".//*[@class='ocr_par']"):
output_file.write(' <p>\n') output_file.write(' <p>\n')
for line in para.findall(".//*[@class='ocr_line']"): for line in para.findall(".//*[@class='ocr_line']"):