mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-07-02 01:00:33 +00:00
Codestyle
This commit is contained in:
18
ocr
18
ocr
@ -21,7 +21,7 @@ from pyflow import WorkflowRunner
|
||||
''' TODO:
|
||||
' Implement --end-page: Last page to ocr
|
||||
' Implement --memMb: Total amount of memory (RAM) available for this workflow.
|
||||
' Default: 2048 * nCores
|
||||
' Default: 2048 * n_cores
|
||||
' Implement --rotate: Rotate pages from input (90, 180, 270)
|
||||
' Implement --split-pages: Split pages in half after possible rotation
|
||||
' Implement --start-page: First page to ocr
|
||||
@ -123,7 +123,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
' ##################################################
|
||||
'''
|
||||
split_jobs = []
|
||||
split_job_nCores = min(
|
||||
split_job_n_cores = min(
|
||||
self.n_cores,
|
||||
max(1, int(self.n_cores / len(self.jobs)))
|
||||
)
|
||||
@ -148,7 +148,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
command=cmd,
|
||||
dependencies='create_output_directories_job_-_%i' % (index),
|
||||
label='split_job_-_%i' % (index),
|
||||
nCores=split_job_nCores
|
||||
nCores=split_job_n_cores
|
||||
)
|
||||
)
|
||||
|
||||
@ -170,7 +170,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
' four cores available for this workflow, the available core
|
||||
' number.
|
||||
'''
|
||||
binarisation_job_nCores = min(4, self.n_cores)
|
||||
binarisation_job_n_cores = min(4, self.n_cores)
|
||||
for index, job in enumerate(self.jobs):
|
||||
files = os.listdir(os.path.join(job['output_dir'], 'tmp'))
|
||||
files = filter(lambda x: x.endswith('.tif'), files)
|
||||
@ -181,7 +181,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
)
|
||||
cmd = 'ocropus-nlbin --output "%s" --parallel "%i" %s' % (
|
||||
os.path.join(job['output_dir'], 'tmp'),
|
||||
binarisation_job_nCores,
|
||||
binarisation_job_n_cores,
|
||||
' '.join(files)
|
||||
)
|
||||
binarisation_jobs.append(
|
||||
@ -189,7 +189,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
command=cmd,
|
||||
dependencies='split_job_-_%i' % (index),
|
||||
label='binarisation_job_-_%i' % (index),
|
||||
nCores=binarisation_job_nCores
|
||||
nCores=binarisation_job_n_cores
|
||||
)
|
||||
)
|
||||
|
||||
@ -249,13 +249,13 @@ class OCRWorkflow(WorkflowRunner):
|
||||
' or, if there are less then four cores available for this workflow,
|
||||
' the available core number.
|
||||
'''
|
||||
ocr_job_nCores = min(4, self.n_cores)
|
||||
ocr_job_n_cores = min(4, self.n_cores)
|
||||
'''
|
||||
' WORKAROUND: Tesseract only uses one core for the deu_frak language
|
||||
' model, so the workflow will also only reserve one in this case.
|
||||
'''
|
||||
if self.lang == "deu_frak":
|
||||
ocr_job_nCores = 1
|
||||
ocr_job_n_cores = 1
|
||||
for index, job in enumerate(self.jobs):
|
||||
files = os.listdir(os.path.join(job['output_dir'], 'tmp'))
|
||||
if self.skip_binarisation:
|
||||
@ -293,7 +293,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
command=cmd,
|
||||
dependencies=ocr_job_dependencies,
|
||||
label='ocr_job_-_%i-%i' % (index, number),
|
||||
nCores=ocr_job_nCores
|
||||
nCores=ocr_job_n_cores
|
||||
)
|
||||
)
|
||||
number += 1
|
||||
|
Reference in New Issue
Block a user