mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-07-02 00:20:35 +00:00
Use argparse in hocrtotei
This commit is contained in:
11
ocr
11
ocr
@ -307,8 +307,15 @@ class OCRWorkflow(WorkflowRunner):
|
||||
'''
|
||||
hocr_to_tei_jobs = []
|
||||
for index, job in enumerate(self.jobs):
|
||||
cmd = 'hocrtotei "%s" "%s"' % (
|
||||
os.path.join(job['output_dir'], 'tmp'),
|
||||
files = os.listdir(os.path.join(job['output_dir'], 'tmp'))
|
||||
files = filter(lambda x: x.endswith('.hocr'), files)
|
||||
files.sort(key=lambda x: int(re.search(r'\d+', x).group(0)))
|
||||
files = map(
|
||||
lambda x: '"' + os.path.join(job['output_dir'], 'tmp', x) + '"',
|
||||
files
|
||||
)
|
||||
cmd = 'hocrtotei %s "%s"' % (
|
||||
' '.join(files),
|
||||
os.path.join(
|
||||
job['output_dir'],
|
||||
os.path.join(job['output_dir'], job['name'] + '.xml')
|
||||
|
Reference in New Issue
Block a user