mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-26 06:04:17 +00:00
One thread per page ocr patch
This commit is contained in:
parent
4b84488fe6
commit
cb68d6de2d
8
ocr
8
ocr
@ -203,12 +203,6 @@ class OCRPipeline(WorkflowRunner):
|
||||
' ##################################################
|
||||
'''
|
||||
ocr_tasks = []
|
||||
'''
|
||||
' Tesseract runs fastest with four cores. So we run it with either four
|
||||
' or, if there are less then four cores available for this workflow,
|
||||
' the available core number.
|
||||
'''
|
||||
n_cores = min(4, self.n_cores)
|
||||
for i, job in enumerate(self.jobs):
|
||||
input_dir = job.intermediate_dir
|
||||
output_dir = job.intermediate_dir
|
||||
@ -232,7 +226,7 @@ class OCRPipeline(WorkflowRunner):
|
||||
cmd += ' && '
|
||||
cmd += 'sed -i \'s+{}/++g\' "{}".hocr'.format(input_dir, output_file_base) # noqa
|
||||
lbl = 'ocr_-_{}-{}'.format(i, j)
|
||||
task = self.addTask(command=cmd, dependencies=deps, label=lbl, nCores=n_cores) # noqa
|
||||
task = self.addTask(command=cmd, dependencies=deps, label=lbl, env={"OMP_THREAD_LIMIT": "1"}) # noqa
|
||||
ocr_tasks.append(task)
|
||||
|
||||
'''
|
||||
|
Loading…
Reference in New Issue
Block a user