From d84db585fac81b48bd1b75ddf8fd6a7b41ecfa1c Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Mon, 15 Apr 2019 09:47:30 +0200
Subject: [PATCH] Sort files in output.
---
ocr | 29 +++++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/ocr b/ocr
index 9f25402..bc96101 100755
--- a/ocr
+++ b/ocr
@@ -84,7 +84,14 @@ class OCRWorkflow(WorkflowRunner):
create_output_directories_job_number = 0
for job in self.jobs:
create_output_directories_job_number += 1
- cmd = 'mkdir -p "%s"' % (os.path.join(job["output_dir"], "tmp"))
+ cmd = 'mkdir -p "%s"' % (
+ os.path.join(job["output_dir"], "tmp", "binarized"),
+ os.path.join(job["output_dir"], "tmp", "hocr"),
+ os.path.join(job["output_dir"], "tmp", "normalized"),
+ os.path.join(job["output_dir"], "tmp", "pdf"),
+ os.path.join(job["output_dir"], "tmp", "tiff"),
+ os.path.join(job["output_dir"], "tmp", "txt")
+ )
create_output_directories_jobs.append(self.addTask(label="create_output_directories_job_-_%i" % (create_output_directories_job_number), command=cmd))
###
@@ -226,7 +233,25 @@ class OCRWorkflow(WorkflowRunner):
###
cleanup_jobs = []
cleanup_job_counter = 0
- if not self.keepIntermediates:
+ if self.keepIntermediates:
+ for job in self.jobs:
+ cleanup_job_counter += 1
+ cmd = 'mv "%s"/*.bin.png "%s" && mv "%s"/*.hocr "%s" && mv "%s"/*.nrm.png "%s" && mv "%s"/*.pdf "%s" && mv "%s"/*.tif "%s" && mv "%s"/*.txt "%s"' % (
+ os.path.join(job["output_dir"], "tmp"),
+ os.path.join(job["output_dir"], "tmp", "binarized"),
+ os.path.join(job["output_dir"], "tmp"),
+ os.path.join(job["output_dir"], "tmp", "hocr"),
+ os.path.join(job["output_dir"], "tmp"),
+ os.path.join(job["output_dir"], "tmp", "normalized"),
+ os.path.join(job["output_dir"], "tmp"),
+ os.path.join(job["output_dir"], "tmp", "pdf"),
+ os.path.join(job["output_dir"], "tmp"),
+ os.path.join(job["output_dir"], "tmp", "tiff"),
+ os.path.join(job["output_dir"], "tmp"),
+ os.path.join(job["output_dir"], "tmp", "txt")
+ )
+ cleanup_jobs.append(self.addTask(label="cleanup_job_-_%i" % (cleanup_job_counter), command=cmd, dependencies=hocr_to_tei_jobs + pdf_merge_jobs + txt_merge_jobs))
+ else:
for job in self.jobs:
cleanup_job_counter += 1
cmd = 'rm -r "%s"' % (