This commit is contained in:
Patrick Jentsch
2018-10-29 10:38:50 +01:00
parent ce864e205a
commit 132490a929
3 changed files with 219 additions and 441 deletions

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3.6
#!/usr/bin/env python3.5
# coding=utf-8
import xml.etree.ElementTree as ET
@ -6,7 +6,7 @@ from xml.sax.saxutils import escape
import os
import sys
input_files = sorted(os.listdir(sys.argv[1]))
input_files = filter(lambda x: x.endswith(".hocr"), sorted(os.listdir(sys.argv[1])))
output_file = open(sys.argv[2], "w")
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n' +
@ -37,6 +37,7 @@ for input_file in input_files:
if not first_word_in_line:
output_file.write('<lb/>\n')
output_file.write(' </p>\n')
output_file.write(' </body>\n' +
' </text>\n' +
'</TEI>')