From d4218fcd7cfeb3dee3dd4e97464ae25c0f8a2a63 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Thu, 11 Apr 2019 13:46:24 +0200
Subject: [PATCH] Aktualisieren ocr
---
ocr | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/ocr b/ocr
index 47d7c10..8ee7cb9 100755
--- a/ocr
+++ b/ocr
@@ -120,14 +120,12 @@ class OCRWorkflow(WorkflowRunner):
ocropusnlbin_jobs = []
ocropusnlbin_job_number = 0
for job in self.jobs:
- # This list is empty if you don't wait for ocropus_nlbin_jobs to complete
- for file in os.listdir(os.path.join(job["output_dir"], "tmp", "tiff_files")):
- ocropusnlbin_job_number += 1
- cmd = 'ocropus-nlbin -o "%s" "%s"' % (
- os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"),
- os.path.join(job["output_dir"], "tmp", "tiff_files", file)
- )
- ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs))
+ ocropusnlbin_job_number += 1
+ cmd = 'ocropus-nlbin -o "%s" "%s"/*' % (
+ os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"),
+ os.path.join(job["output_dir"], "tmp", "tiff_files")
+ )
+ ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs, nCores=max(1, self.nCores / length(self.jobs)))
###
# Task "tesseract_job": perform OCR on binarized images