mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-26 04:24:18 +00:00
Set relative file paths in hocr
This commit is contained in:
parent
018939ae55
commit
b77ca5914f
14
ocr
14
ocr
@ -256,6 +256,19 @@ class OCRPipeline(WorkflowRunner):
|
|||||||
os.listdir(input_dir))
|
os.listdir(input_dir))
|
||||||
files.sort(key=lambda x: int(re.search(r'\d+', x).group(0)))
|
files.sort(key=lambda x: int(re.search(r'\d+', x).group(0)))
|
||||||
files = map(lambda x: os.path.join(input_dir, x), files)
|
files = map(lambda x: os.path.join(input_dir, x), files)
|
||||||
|
# set relative file paths into hocr
|
||||||
|
relative_files = map(lambda x: os.path.join('..',
|
||||||
|
'tiff',
|
||||||
|
os.path.basename(x).replace('.hocr', '.tif')), # noqa
|
||||||
|
files)
|
||||||
|
for file, relative_file in zip(files, relative_files):
|
||||||
|
with open(file, 'r+') as f:
|
||||||
|
html = f.read()
|
||||||
|
html = html.replace(file.replace('.hocr', '.tif'),
|
||||||
|
relative_file)
|
||||||
|
f.seek(0)
|
||||||
|
f.truncate(0) # deletes content of file to write new html
|
||||||
|
f.write(html)
|
||||||
output_path_base = os.path.join(job.output_dir, 'PoCo')
|
output_path_base = os.path.join(job.output_dir, 'PoCo')
|
||||||
output_path = os.path.join(output_path_base, 'hocr')
|
output_path = os.path.join(output_path_base, 'hocr')
|
||||||
cmd = 'cp "{}" "{}"'.format('" "'.join(files), output_path)
|
cmd = 'cp "{}" "{}"'.format('" "'.join(files), output_path)
|
||||||
@ -398,7 +411,6 @@ class OCRPipeline(WorkflowRunner):
|
|||||||
zip_jobs.append(self.addTask(command=cmd, dependencies=deps,
|
zip_jobs.append(self.addTask(command=cmd, dependencies=deps,
|
||||||
label=lbl))
|
label=lbl))
|
||||||
# zip PoCo files
|
# zip PoCo files
|
||||||
# TODO: Fix relative paths before?
|
|
||||||
poco_paths = []
|
poco_paths = []
|
||||||
poco_names = []
|
poco_names = []
|
||||||
for i, job in enumerate(self.jobs):
|
for i, job in enumerate(self.jobs):
|
||||||
|
Loading…
Reference in New Issue
Block a user