mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-01-13 18:40:35 +00:00
One thread per page ocr patch
This commit is contained in:
parent
4b84488fe6
commit
cb68d6de2d
8
ocr
8
ocr
@ -203,12 +203,6 @@ class OCRPipeline(WorkflowRunner):
|
|||||||
' ##################################################
|
' ##################################################
|
||||||
'''
|
'''
|
||||||
ocr_tasks = []
|
ocr_tasks = []
|
||||||
'''
|
|
||||||
' Tesseract runs fastest with four cores. So we run it with either four
|
|
||||||
' or, if there are less then four cores available for this workflow,
|
|
||||||
' the available core number.
|
|
||||||
'''
|
|
||||||
n_cores = min(4, self.n_cores)
|
|
||||||
for i, job in enumerate(self.jobs):
|
for i, job in enumerate(self.jobs):
|
||||||
input_dir = job.intermediate_dir
|
input_dir = job.intermediate_dir
|
||||||
output_dir = job.intermediate_dir
|
output_dir = job.intermediate_dir
|
||||||
@ -232,7 +226,7 @@ class OCRPipeline(WorkflowRunner):
|
|||||||
cmd += ' && '
|
cmd += ' && '
|
||||||
cmd += 'sed -i \'s+{}/++g\' "{}".hocr'.format(input_dir, output_file_base) # noqa
|
cmd += 'sed -i \'s+{}/++g\' "{}".hocr'.format(input_dir, output_file_base) # noqa
|
||||||
lbl = 'ocr_-_{}-{}'.format(i, j)
|
lbl = 'ocr_-_{}-{}'.format(i, j)
|
||||||
task = self.addTask(command=cmd, dependencies=deps, label=lbl, nCores=n_cores) # noqa
|
task = self.addTask(command=cmd, dependencies=deps, label=lbl, env={"OMP_THREAD_LIMIT": "1"}) # noqa
|
||||||
ocr_tasks.append(task)
|
ocr_tasks.append(task)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
Loading…
x
Reference in New Issue
Block a user