From d4218fcd7cfeb3dee3dd4e97464ae25c0f8a2a63 Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Thu, 11 Apr 2019 13:46:24 +0200 Subject: [PATCH] Aktualisieren ocr --- ocr | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/ocr b/ocr index 47d7c10..8ee7cb9 100755 --- a/ocr +++ b/ocr @@ -120,14 +120,12 @@ class OCRWorkflow(WorkflowRunner): ocropusnlbin_jobs = [] ocropusnlbin_job_number = 0 for job in self.jobs: - # This list is empty if you don't wait for ocropus_nlbin_jobs to complete - for file in os.listdir(os.path.join(job["output_dir"], "tmp", "tiff_files")): - ocropusnlbin_job_number += 1 - cmd = 'ocropus-nlbin -o "%s" "%s"' % ( - os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"), - os.path.join(job["output_dir"], "tmp", "tiff_files", file) - ) - ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs)) + ocropusnlbin_job_number += 1 + cmd = 'ocropus-nlbin -o "%s" "%s"/*' % ( + os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"), + os.path.join(job["output_dir"], "tmp", "tiff_files") + ) + ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs, nCores=max(1, self.nCores / length(self.jobs))) ### # Task "tesseract_job": perform OCR on binarized images