From cb68d6de2d0d5f8e1ce9d28f40bd9685ebdbcdd7 Mon Sep 17 00:00:00 2001 From: Stephan Porada Date: Wed, 7 Oct 2020 13:46:22 +0200 Subject: [PATCH] One thread per page ocr patch --- ocr | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ocr b/ocr index 0ae9f52..0cedc33 100755 --- a/ocr +++ b/ocr @@ -203,12 +203,6 @@ class OCRPipeline(WorkflowRunner): ' ################################################## ''' ocr_tasks = [] - ''' - ' Tesseract runs fastest with four cores. So we run it with either four - ' or, if there are less then four cores available for this workflow, - ' the available core number. - ''' - n_cores = min(4, self.n_cores) for i, job in enumerate(self.jobs): input_dir = job.intermediate_dir output_dir = job.intermediate_dir @@ -232,7 +226,7 @@ class OCRPipeline(WorkflowRunner): cmd += ' && ' cmd += 'sed -i \'s+{}/++g\' "{}".hocr'.format(input_dir, output_file_base) # noqa lbl = 'ocr_-_{}-{}'.format(i, j) - task = self.addTask(command=cmd, dependencies=deps, label=lbl, nCores=n_cores) # noqa + task = self.addTask(command=cmd, dependencies=deps, label=lbl, env={"OMP_THREAD_LIMIT": "1"}) # noqa ocr_tasks.append(task) '''