From cb68d6de2d0d5f8e1ce9d28f40bd9685ebdbcdd7 Mon Sep 17 00:00:00 2001
From: Stephan Porada <sporada@uni-bielefeld.de>
Date: Wed, 7 Oct 2020 13:46:22 +0200
Subject: [PATCH] One thread per page ocr patch

---
 ocr | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/ocr b/ocr
index 0ae9f52..0cedc33 100755
--- a/ocr
+++ b/ocr
@@ -203,12 +203,6 @@ class OCRPipeline(WorkflowRunner):
         ' ##################################################
         '''
         ocr_tasks = []
-        '''
-        ' Tesseract runs fastest with four cores. So we run it with either four
-        ' or, if there are less then four cores available for this workflow,
-        ' the available core number.
-        '''
-        n_cores = min(4, self.n_cores)
         for i, job in enumerate(self.jobs):
             input_dir = job.intermediate_dir
             output_dir = job.intermediate_dir
@@ -232,7 +226,7 @@ class OCRPipeline(WorkflowRunner):
                 cmd += ' && '
                 cmd += 'sed -i \'s+{}/++g\' "{}".hocr'.format(input_dir, output_file_base)  # noqa
                 lbl = 'ocr_-_{}-{}'.format(i, j)
-                task = self.addTask(command=cmd, dependencies=deps, label=lbl, nCores=n_cores)  # noqa
+                task = self.addTask(command=cmd, dependencies=deps, label=lbl, env={"OMP_THREAD_LIMIT": "1"})  # noqa
                 ocr_tasks.append(task)
 
         '''