Use argparse in hocrtotei

This commit is contained in:
Patrick Jentsch
2019-05-16 14:21:01 +02:00
parent c39edec1ab
commit b81ad4cc67
2 changed files with 27 additions and 21 deletions

11
ocr
View File

@ -307,8 +307,15 @@ class OCRWorkflow(WorkflowRunner):
'''
hocr_to_tei_jobs = []
for index, job in enumerate(self.jobs):
cmd = 'hocrtotei "%s" "%s"' % (
os.path.join(job['output_dir'], 'tmp'),
files = os.listdir(os.path.join(job['output_dir'], 'tmp'))
files = filter(lambda x: x.endswith('.hocr'), files)
files.sort(key=lambda x: int(re.search(r'\d+', x).group(0)))
files = map(
lambda x: '"' + os.path.join(job['output_dir'], 'tmp', x) + '"',
files
)
cmd = 'hocrtotei %s "%s"' % (
' '.join(files),
os.path.join(
job['output_dir'],
os.path.join(job['output_dir'], job['name'] + '.xml')