diff --git a/ocr b/ocr index 47d7c10..8ee7cb9 100755 --- a/ocr +++ b/ocr @@ -120,14 +120,12 @@ class OCRWorkflow(WorkflowRunner): ocropusnlbin_jobs = [] ocropusnlbin_job_number = 0 for job in self.jobs: - # This list is empty if you don't wait for ocropus_nlbin_jobs to complete - for file in os.listdir(os.path.join(job["output_dir"], "tmp", "tiff_files")): - ocropusnlbin_job_number += 1 - cmd = 'ocropus-nlbin -o "%s" "%s"' % ( - os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"), - os.path.join(job["output_dir"], "tmp", "tiff_files", file) - ) - ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs)) + ocropusnlbin_job_number += 1 + cmd = 'ocropus-nlbin -o "%s" "%s"/*' % ( + os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"), + os.path.join(job["output_dir"], "tmp", "tiff_files") + ) + ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs, nCores=max(1, self.nCores / length(self.jobs))) ### # Task "tesseract_job": perform OCR on binarized images