One thread per page ocr patch

2025-07-12 06:53:19 +00:00 · 2020-10-07 13:46:22 +02:00
parent 4b84488fe6
commit cb68d6de2d
1 changed files with 1 additions and 7 deletions
--- a/8
+++ b/8
@ -203,12 +203,6 @@ class OCRPipeline(WorkflowRunner):
        ' ##################################################
        '''
        ocr_tasks = []
-        '''
-        ' Tesseract runs fastest with four cores. So we run it with either four
-        ' or, if there are less then four cores available for this workflow,
-        ' the available core number.
-        '''
-        n_cores = min(4, self.n_cores)
        for i, job in enumerate(self.jobs):
            input_dir = job.intermediate_dir
            output_dir = job.intermediate_dir
@ -232,7 +226,7 @@ class OCRPipeline(WorkflowRunner):
                cmd += ' && '
                cmd += 'sed -i \'s+{}/++g\' "{}".hocr'.format(input_dir, output_file_base)  # noqa
                lbl = 'ocr_-_{}-{}'.format(i, j)
-                task = self.addTask(command=cmd, dependencies=deps, label=lbl, nCores=n_cores)  # noqa
+                task = self.addTask(command=cmd, dependencies=deps, label=lbl, env={"OMP_THREAD_LIMIT": "1"})  # noqa
                ocr_tasks.append(task)

        '''