Set relative file paths in hocr

This commit is contained in:
Stephan Porada 2020-06-10 11:48:58 +02:00
parent 018939ae55
commit b77ca5914f

14
ocr
View File

@ -256,6 +256,19 @@ class OCRPipeline(WorkflowRunner):
os.listdir(input_dir)) os.listdir(input_dir))
files.sort(key=lambda x: int(re.search(r'\d+', x).group(0))) files.sort(key=lambda x: int(re.search(r'\d+', x).group(0)))
files = map(lambda x: os.path.join(input_dir, x), files) files = map(lambda x: os.path.join(input_dir, x), files)
# set relative file paths into hocr
relative_files = map(lambda x: os.path.join('..',
'tiff',
os.path.basename(x).replace('.hocr', '.tif')), # noqa
files)
for file, relative_file in zip(files, relative_files):
with open(file, 'r+') as f:
html = f.read()
html = html.replace(file.replace('.hocr', '.tif'),
relative_file)
f.seek(0)
f.truncate(0) # deletes content of file to write new html
f.write(html)
output_path_base = os.path.join(job.output_dir, 'PoCo') output_path_base = os.path.join(job.output_dir, 'PoCo')
output_path = os.path.join(output_path_base, 'hocr') output_path = os.path.join(output_path_base, 'hocr')
cmd = 'cp "{}" "{}"'.format('" "'.join(files), output_path) cmd = 'cp "{}" "{}"'.format('" "'.join(files), output_path)
@ -398,7 +411,6 @@ class OCRPipeline(WorkflowRunner):
zip_jobs.append(self.addTask(command=cmd, dependencies=deps, zip_jobs.append(self.addTask(command=cmd, dependencies=deps,
label=lbl)) label=lbl))
# zip PoCo files # zip PoCo files
# TODO: Fix relative paths before?
poco_paths = [] poco_paths = []
poco_names = [] poco_names = []
for i, job in enumerate(self.jobs): for i, job in enumerate(self.jobs):