mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-01-13 22:50:34 +00:00
Implement the workaround a bit different
This commit is contained in:
parent
a533ef76c6
commit
10b473ae37
11
ocr
11
ocr
@ -175,10 +175,15 @@ class OCRWorkflow(WorkflowRunner):
|
|||||||
'''
|
'''
|
||||||
' Tesseract runs fastest with four cores. So we run it with either four
|
' Tesseract runs fastest with four cores. So we run it with either four
|
||||||
' or, if there are less then four cores available for this workflow, the
|
' or, if there are less then four cores available for this workflow, the
|
||||||
' available core number. The deu_frak language model only supports
|
' available core number.
|
||||||
' single core processing, so we use use only one core for this.
|
|
||||||
'''
|
'''
|
||||||
ocr_job_nCores = 1 if self.lang == "deu_frak" else min(4, self.nCores)
|
ocr_job_nCores = min(4, self.nCores)
|
||||||
|
'''
|
||||||
|
' WORKAROUND: Tesseract only uses one core for the deu_frak language
|
||||||
|
' model, so the workflow will also only reserve one in this case.
|
||||||
|
'''
|
||||||
|
if self.lang == "deu_frak":
|
||||||
|
ocr_job_nCores = 1
|
||||||
for job in self.jobs:
|
for job in self.jobs:
|
||||||
for file in filter(lambda x: x.endswith(".tif") if self.skipBinarization else x.endswith(".bin.png"), os.listdir(os.path.join(job["output_dir"], "tmp"))):
|
for file in filter(lambda x: x.endswith(".tif") if self.skipBinarization else x.endswith(".bin.png"), os.listdir(os.path.join(job["output_dir"], "tmp"))):
|
||||||
ocr_job_number += 1
|
ocr_job_number += 1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user