From 5e43e09beb39dea98adf26c7286b6b1941f792f2 Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Mon, 15 Apr 2019 10:25:57 +0200 Subject: [PATCH] Update --- ocr | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/ocr b/ocr index 0eaaf83..80f5b95 100755 --- a/ocr +++ b/ocr @@ -84,14 +84,17 @@ class OCRWorkflow(WorkflowRunner): create_output_directories_job_number = 0 for job in self.jobs: create_output_directories_job_number += 1 - cmd = 'mkdir -p "%s" "%s" "%s" "%s" "%s" "%s"' % ( - os.path.join(job["output_dir"], "tmp", "binarized_png"), + cmd = 'mkdir -p "%s" "%s" "%s" "%s"' % ( os.path.join(job["output_dir"], "tmp", "hocr"), - os.path.join(job["output_dir"], "tmp", "normalized_png"), os.path.join(job["output_dir"], "tmp", "pdf"), os.path.join(job["output_dir"], "tmp", "tiff"), os.path.join(job["output_dir"], "tmp", "txt") ) + if not self.skipBinarization: + cmd += ' "%s" "%s"' % ( + os.path.join(job["output_dir"], "tmp", "binarized_png"), + os.path.join(job["output_dir"], "tmp", "normalized_png"), + ) create_output_directories_jobs.append(self.addTask(label="create_output_directories_job_-_%i" % (create_output_directories_job_number), command=cmd)) ### @@ -237,20 +240,23 @@ class OCRWorkflow(WorkflowRunner): if self.keepIntermediates: for job in self.jobs: cleanup_job_counter += 1 - cmd = 'mv "%s"/*.bin.png "%s" && mv "%s"/*.hocr "%s" && mv "%s"/*.nrm.png "%s" && mv "%s"/*.pdf "%s" && mv "%s"/*.tif "%s" && mv "%s"/*.txt "%s"' % ( - os.path.join(job["output_dir"], "tmp"), - os.path.join(job["output_dir"], "tmp", "binarized_png"), + cmd = 'mv "%s"/*.hocr "%s" && mv "%s"/*.pdf "%s" && mv "%s"/*.tif "%s" && mv "%s"/*.txt "%s"' % ( os.path.join(job["output_dir"], "tmp"), os.path.join(job["output_dir"], "tmp", "hocr"), os.path.join(job["output_dir"], "tmp"), - os.path.join(job["output_dir"], "tmp", "normalized_png"), - os.path.join(job["output_dir"], "tmp"), os.path.join(job["output_dir"], "tmp", "pdf"), os.path.join(job["output_dir"], "tmp"), os.path.join(job["output_dir"], "tmp", "tiff"), os.path.join(job["output_dir"], "tmp"), os.path.join(job["output_dir"], "tmp", "txt") ) + if not self.skipBinarization: + cmd += ' && mv "%s"/*.bin.png "%s" && mv "%s"/*.nrm.png "%s"' % ( + os.path.join(job["output_dir"], "tmp"), + os.path.join(job["output_dir"], "tmp", "binarized_png"), + os.path.join(job["output_dir"], "tmp"), + os.path.join(job["output_dir"], "tmp", "normalized_png"), + ) cleanup_jobs.append(self.addTask(label="cleanup_job_-_%i" % (cleanup_job_counter), command=cmd, dependencies=hocr_to_tei_jobs + pdf_merge_jobs + txt_merge_jobs)) else: for job in self.jobs: