mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-26 18:24:17 +00:00
Update hocrtotei
This commit is contained in:
parent
3131174676
commit
c1cba3ced6
@ -25,7 +25,7 @@ output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n' +
|
||||
|
||||
for input_file in input_files:
|
||||
tree = ET.parse(os.path.join(sys.argv[1], input_file))
|
||||
output_file.write(' <pb n="%s"/>\n' % (input_file.split(".")[0].split("-")[1]))
|
||||
output_file.write(' <pb n="%s"/>\n' % (input_file.split(".")[0]))
|
||||
for para in tree.findall(".//*[@class='ocr_par']"):
|
||||
output_file.write(' <p>\n')
|
||||
for line in para.findall(".//*[@class='ocr_line']"):
|
||||
|
Loading…
Reference in New Issue
Block a user