mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-01-13 18:10:34 +00:00
Update tei.
This commit is contained in:
parent
20752241f4
commit
708da35a19
@ -25,7 +25,7 @@ output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n' +
|
||||
|
||||
for input_file in input_files:
|
||||
tree = ET.parse(os.path.join(sys.argv[1], input_file))
|
||||
output_file.write(' <pb n="%s" facs="%s.tif"/>\n' % (input_file.split(".")[0], input_file.split(".")[0]))
|
||||
output_file.write(' <pb n="%s"/>\n' % (input_file.split(".")[0].split("-")[1]))
|
||||
for para in tree.findall(".//*[@class='ocr_par']"):
|
||||
output_file.write(' <p>\n')
|
||||
for line in para.findall(".//*[@class='ocr_line']"):
|
||||
|
Loading…
x
Reference in New Issue
Block a user