mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-26 06:24:19 +00:00
Implement the workaround a bit different
This commit is contained in:
parent
a533ef76c6
commit
10b473ae37
11
ocr
11
ocr
@ -175,10 +175,15 @@ class OCRWorkflow(WorkflowRunner):
|
||||
'''
|
||||
' Tesseract runs fastest with four cores. So we run it with either four
|
||||
' or, if there are less then four cores available for this workflow, the
|
||||
' available core number. The deu_frak language model only supports
|
||||
' single core processing, so we use use only one core for this.
|
||||
' available core number.
|
||||
'''
|
||||
ocr_job_nCores = 1 if self.lang == "deu_frak" else min(4, self.nCores)
|
||||
ocr_job_nCores = min(4, self.nCores)
|
||||
'''
|
||||
' WORKAROUND: Tesseract only uses one core for the deu_frak language
|
||||
' model, so the workflow will also only reserve one in this case.
|
||||
'''
|
||||
if self.lang == "deu_frak":
|
||||
ocr_job_nCores = 1
|
||||
for job in self.jobs:
|
||||
for file in filter(lambda x: x.endswith(".tif") if self.skipBinarization else x.endswith(".bin.png"), os.listdir(os.path.join(job["output_dir"], "tmp"))):
|
||||
ocr_job_number += 1
|
||||
|
Loading…
Reference in New Issue
Block a user