mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
				synced 2025-10-31 20:03:14 +00:00 
			
		
		
		
	Don't process files in subdirectories
This commit is contained in:
		
							
								
								
									
										5
									
								
								ocr
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								ocr
									
									
									
									
									
								
							| @@ -289,9 +289,8 @@ def collect_jobs(input_dir, output_dir): | ||||
|     jobs = [] | ||||
|     for file in os.listdir(input_dir): | ||||
|         if os.path.isdir(os.path.join(input_dir, file)): | ||||
|             jobs += collect_jobs(os.path.join(input_dir, file), | ||||
|                                  os.path.join(output_dir, file)) | ||||
|         elif file.lower().endswith('.pdf'): | ||||
|             continue | ||||
|         if file.lower().endswith('.pdf'): | ||||
|             job = OCRPipelineJob(os.path.join(input_dir, file), | ||||
|                                  os.path.join(output_dir, file)) | ||||
|             jobs.append(job) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user