better multithreading

This commit is contained in:
Patrick Jentsch 2020-07-02 11:49:35 +02:00
parent 7322a5bc7c
commit 82285a8e6c

5
ocr
View File

@ -226,6 +226,7 @@ class OCRPipeline(WorkflowRunner):
' ################################################## ' ##################################################
''' '''
combined_pdf_creation_jobs = [] combined_pdf_creation_jobs = []
n_cores = min(self.n_cores, max(1, int(self.n_cores / len(self.jobs))))
for i, job in enumerate(self.jobs): for i, job in enumerate(self.jobs):
input_dir = os.path.join(job.output_dir, 'tmp') input_dir = os.path.join(job.output_dir, 'tmp')
output_dir = job.output_dir output_dir = job.output_dir
@ -237,6 +238,7 @@ class OCRPipeline(WorkflowRunner):
cmd = 'gs' cmd = 'gs'
cmd += ' -dBATCH' cmd += ' -dBATCH'
cmd += ' -dNOPAUSE' cmd += ' -dNOPAUSE'
cmd += ' -dNumRenderingThreads={}'.format(n_cores)
cmd += ' -dPDFSETTINGS=/ebook' cmd += ' -dPDFSETTINGS=/ebook'
cmd += ' -dQUIET' cmd += ' -dQUIET'
cmd += ' -sDEVICE=pdfwrite' cmd += ' -sDEVICE=pdfwrite'
@ -247,7 +249,8 @@ class OCRPipeline(WorkflowRunner):
lbl = 'combined_pdf_creation_-_{}'.format(i) lbl = 'combined_pdf_creation_-_{}'.format(i)
combined_pdf_creation_jobs.append(self.addTask(command=cmd, combined_pdf_creation_jobs.append(self.addTask(command=cmd,
dependencies=deps, dependencies=deps,
label=lbl)) label=lbl,
nCores=n_cores))
''' '''
' ################################################## ' ##################################################