Update hocrtotei

This commit is contained in:
Patrick Jentsch 2019-04-11 13:03:44 +02:00
parent 3131174676
commit c1cba3ced6

View File

@ -25,7 +25,7 @@ output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n' +
for input_file in input_files: for input_file in input_files:
tree = ET.parse(os.path.join(sys.argv[1], input_file)) tree = ET.parse(os.path.join(sys.argv[1], input_file))
output_file.write(' <pb n="%s"/>\n' % (input_file.split(".")[0].split("-")[1])) output_file.write(' <pb n="%s"/>\n' % (input_file.split(".")[0]))
for para in tree.findall(".//*[@class='ocr_par']"): for para in tree.findall(".//*[@class='ocr_par']"):
output_file.write(' <p>\n') output_file.write(' <p>\n')
for line in para.findall(".//*[@class='ocr_line']"): for line in para.findall(".//*[@class='ocr_line']"):