mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-27 09:04:18 +00:00
Update hocrtotei
This commit is contained in:
parent
3131174676
commit
c1cba3ced6
@ -25,7 +25,7 @@ output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n' +
|
|||||||
|
|
||||||
for input_file in input_files:
|
for input_file in input_files:
|
||||||
tree = ET.parse(os.path.join(sys.argv[1], input_file))
|
tree = ET.parse(os.path.join(sys.argv[1], input_file))
|
||||||
output_file.write(' <pb n="%s"/>\n' % (input_file.split(".")[0].split("-")[1]))
|
output_file.write(' <pb n="%s"/>\n' % (input_file.split(".")[0]))
|
||||||
for para in tree.findall(".//*[@class='ocr_par']"):
|
for para in tree.findall(".//*[@class='ocr_par']"):
|
||||||
output_file.write(' <p>\n')
|
output_file.write(' <p>\n')
|
||||||
for line in para.findall(".//*[@class='ocr_line']"):
|
for line in para.findall(".//*[@class='ocr_line']"):
|
||||||
|
Loading…
Reference in New Issue
Block a user