Implement the workaround a bit different

This commit is contained in:
Patrick Jentsch 2019-04-16 11:38:36 +02:00
parent a533ef76c6
commit 10b473ae37

11
ocr
View File

@ -175,10 +175,15 @@ class OCRWorkflow(WorkflowRunner):
''' '''
' Tesseract runs fastest with four cores. So we run it with either four ' Tesseract runs fastest with four cores. So we run it with either four
' or, if there are less then four cores available for this workflow, the ' or, if there are less then four cores available for this workflow, the
' available core number. The deu_frak language model only supports ' available core number.
' single core processing, so we use use only one core for this.
''' '''
ocr_job_nCores = 1 if self.lang == "deu_frak" else min(4, self.nCores) ocr_job_nCores = min(4, self.nCores)
'''
' WORKAROUND: Tesseract only uses one core for the deu_frak language
' model, so the workflow will also only reserve one in this case.
'''
if self.lang == "deu_frak":
ocr_job_nCores = 1
for job in self.jobs: for job in self.jobs:
for file in filter(lambda x: x.endswith(".tif") if self.skipBinarization else x.endswith(".bin.png"), os.listdir(os.path.join(job["output_dir"], "tmp"))): for file in filter(lambda x: x.endswith(".tif") if self.skipBinarization else x.endswith(".bin.png"), os.listdir(os.path.join(job["output_dir"], "tmp"))):
ocr_job_number += 1 ocr_job_number += 1