mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-25 18:04:18 +00:00
Use more descriptive argument names then i and o (now: input and output)
This commit is contained in:
parent
41f70da8eb
commit
e78f667438
@ -9,8 +9,8 @@ import re
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
parser = ArgumentParser(description='Convert hOCR to TEI XML.')
|
||||
parser.add_argument('i', metavar='Path to hOCR input file')
|
||||
parser.add_argument('o', metavar='Path to TEI output file')
|
||||
parser.add_argument('input', metavar='Path to hOCR input file')
|
||||
parser.add_argument('output', metavar='Path to TEI output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
tei = ''
|
||||
@ -31,7 +31,7 @@ tei += ' </teiHeader>\n'
|
||||
tei += ' <text>\n'
|
||||
tei += ' <body>\n'
|
||||
# Conversion start
|
||||
hocr = ET.parse(args.i)
|
||||
hocr = ET.parse(args.input)
|
||||
for page in hocr.findall('.//*[@class="ocr_page"]'):
|
||||
page_properties = page.attrib.get('title')
|
||||
facsimile = re.search(r'image \"(.*?)\"', page_properties).group(1)
|
||||
@ -53,5 +53,5 @@ tei += ' </body>\n'
|
||||
tei += ' </text>\n'
|
||||
tei += '</TEI>\n'
|
||||
|
||||
with open(args.o, 'w') as tei_file:
|
||||
with open(args.output, 'w') as tei_file:
|
||||
tei_file.write(tei)
|
||||
|
Loading…
Reference in New Issue
Block a user