From f51a8c454683252aef22e33391aa4725d018f92c Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Fri, 14 Jan 2022 10:56:16 +0100
Subject: [PATCH] Change output files file format
---
ocr | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/ocr b/ocr
index 54dcd40..def0e93 100755
--- a/ocr
+++ b/ocr
@@ -488,39 +488,41 @@ class MainWorkflow(WorkflowRunner):
# Remove temporary directory
os.rmdir(job.tmp_dir)
# Track output files
+ relative_input = os.path.relpath(job.file, start=self.input_dir)
+ relative_output_dir = os.path.relpath(job.output_dir, start=self.output_dir) # noqa
for x in os.listdir(os.path.join(job.output_dir, 'images')):
self.output_files.append(
{
- 'directory': os.path.join(os.path.relpath(job.output_dir, start=self.output_dir), 'images'), # noqa
- 'filename': x,
+ 'input': relative_input,
+ 'path': os.path.join(relative_output_dir, 'images', x),
'mimetype': 'image/png'
}
)
self.output_files.append(
{
- 'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa
- 'filename': '{}.hocr'.format(job.name),
+ 'input': relative_input,
+ 'path': os.path.join(relative_output_dir, '{}.hocr'.format(job.name)), # noqa
'mimetype': 'application/xhtml+xml'
}
)
self.output_files.append(
{
- 'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa
- 'filename': '{}.pdf'.format(job.name),
+ 'input': relative_input,
+ 'filename': os.path.join(relative_output_dir, '{}.pdf'.format(job.name)), # noqa
'mimetype': 'application/pdf'
}
)
self.output_files.append(
{
- 'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa
- 'filename': '{}.txt'.format(job.name),
+ 'input': relative_input,
+ 'filename': os.path.join(relative_output_dir, '{}.txt'.format(job.name)), # noqa
'mimetype': 'text/plain'
}
)
self.output_files.append(
{
- 'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa
- 'filename': '{}.xml'.format(job.name),
+ 'input': relative_input,
+ 'filename': os.path.join(relative_output_dir, '{}.xml'.format(job.name)), # noqa
'mimetype': 'application/tei+xml'
}
)