mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
				synced 2025-11-04 10:52:45 +00:00 
			
		
		
		
	One thread per page ocr patch
This commit is contained in:
		
							
								
								
									
										8
									
								
								ocr
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								ocr
									
									
									
									
									
								
							@@ -203,12 +203,6 @@ class OCRPipeline(WorkflowRunner):
 | 
			
		||||
        ' ##################################################
 | 
			
		||||
        '''
 | 
			
		||||
        ocr_tasks = []
 | 
			
		||||
        '''
 | 
			
		||||
        ' Tesseract runs fastest with four cores. So we run it with either four
 | 
			
		||||
        ' or, if there are less then four cores available for this workflow,
 | 
			
		||||
        ' the available core number.
 | 
			
		||||
        '''
 | 
			
		||||
        n_cores = min(4, self.n_cores)
 | 
			
		||||
        for i, job in enumerate(self.jobs):
 | 
			
		||||
            input_dir = job.intermediate_dir
 | 
			
		||||
            output_dir = job.intermediate_dir
 | 
			
		||||
@@ -232,7 +226,7 @@ class OCRPipeline(WorkflowRunner):
 | 
			
		||||
                cmd += ' && '
 | 
			
		||||
                cmd += 'sed -i \'s+{}/++g\' "{}".hocr'.format(input_dir, output_file_base)  # noqa
 | 
			
		||||
                lbl = 'ocr_-_{}-{}'.format(i, j)
 | 
			
		||||
                task = self.addTask(command=cmd, dependencies=deps, label=lbl, nCores=n_cores)  # noqa
 | 
			
		||||
                task = self.addTask(command=cmd, dependencies=deps, label=lbl, env={"OMP_THREAD_LIMIT": "1"})  # noqa
 | 
			
		||||
                ocr_tasks.append(task)
 | 
			
		||||
 | 
			
		||||
        '''
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user