mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
				synced 2025-10-31 20:03:14 +00:00 
			
		
		
		
	Add a switch for zip functionality
This commit is contained in:
		
							
								
								
									
										102
									
								
								ocr
									
									
									
									
									
								
							
							
						
						
									
										102
									
								
								ocr
									
									
									
									
									
								
							| @@ -65,6 +65,14 @@ def parse_arguments(): | ||||
|         required=False, | ||||
|         type=int | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         '--zip', | ||||
|         action='store_true', | ||||
|         default=False, | ||||
|         dest='zip', | ||||
|         help='package result files in zip bundles', | ||||
|         required=False | ||||
|     ) | ||||
|     return parser.parse_args() | ||||
|  | ||||
|  | ||||
| @@ -76,6 +84,7 @@ class OCRWorkflow(WorkflowRunner): | ||||
|         self.lang = args.lang | ||||
|         self.n_cores = args.n_cores | ||||
|         self.output_dir = args.output_dir | ||||
|         self.zip = args.zip | ||||
|  | ||||
|     def workflow(self): | ||||
|         if len(self.jobs) == 0: | ||||
| @@ -386,59 +395,60 @@ class OCRWorkflow(WorkflowRunner): | ||||
|                 ) | ||||
|             ) | ||||
|  | ||||
|         all_zip_jobs = [] | ||||
|         all_zip_job_dependencies = (hocr_to_tei_jobs | ||||
|                                     + pdf_merge_jobs | ||||
|                                     + txt_merge_jobs) | ||||
|         cmd = 'cd "%s" && zip all.zip */*.{pdf,txt,xml} -x "pyflow.data*" && cd -' % ( | ||||
|             self.output_dir | ||||
|         ) | ||||
|         all_zip_jobs.append( | ||||
|             self.addTask( | ||||
|                 command=cmd, | ||||
|                 dependencies=all_zip_job_dependencies, | ||||
|                 label='all_zip_job_-_%i' % (index) | ||||
|         if self.zip: | ||||
|             all_zip_jobs = [] | ||||
|             all_zip_job_dependencies = (hocr_to_tei_jobs | ||||
|                                         + pdf_merge_jobs | ||||
|                                         + txt_merge_jobs) | ||||
|             cmd = 'cd "%s" && zip all.zip */*.{pdf,txt,xml} -x "pyflow.data*" && cd -' % ( | ||||
|                 self.output_dir | ||||
|             ) | ||||
|             all_zip_jobs.append( | ||||
|                 self.addTask( | ||||
|                     command=cmd, | ||||
|                     dependencies=all_zip_job_dependencies, | ||||
|                     label='all_zip_job' | ||||
|                 ) | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         pdf_zip_jobs = [] | ||||
|         pdf_zip_job_dependencies = pdf_merge_jobs | ||||
|         cmd = 'cd "%s" && zip pdf.zip */*.pdf -x "pyflow.data*" && cd -' % ( | ||||
|             self.output_dir | ||||
|         ) | ||||
|         pdf_zip_jobs.append( | ||||
|             self.addTask( | ||||
|                 command=cmd, | ||||
|                 dependencies=pdf_zip_job_dependencies, | ||||
|                 label='pdf_zip_job_-_%i' % (index) | ||||
|             pdf_zip_jobs = [] | ||||
|             pdf_zip_job_dependencies = all_zip_jobs | ||||
|             cmd = 'cd "%s" && zip -m pdf.zip */*.pdf -x "pyflow.data*" && cd -' % ( | ||||
|                 self.output_dir | ||||
|             ) | ||||
|             pdf_zip_jobs.append( | ||||
|                 self.addTask( | ||||
|                     command=cmd, | ||||
|                     dependencies=pdf_zip_job_dependencies, | ||||
|                     label='pdf_zip_job' | ||||
|                 ) | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         txt_zip_jobs = [] | ||||
|         txt_zip_job_dependencies = txt_merge_jobs | ||||
|         cmd = 'cd "%s" && zip txt.zip */*.txt -x "pyflow.data*" && cd -' % ( | ||||
|             self.output_dir | ||||
|         ) | ||||
|         txt_zip_jobs.append( | ||||
|             self.addTask( | ||||
|                 command=cmd, | ||||
|                 dependencies=txt_zip_job_dependencies, | ||||
|                 label='txt_zip_job_-_%i' % (index) | ||||
|             txt_zip_jobs = [] | ||||
|             txt_zip_job_dependencies = all_zip_jobs | ||||
|             cmd = 'cd "%s" && zip -m txt.zip */*.txt -x "pyflow.data*" && cd -' % ( | ||||
|                 self.output_dir | ||||
|             ) | ||||
|             txt_zip_jobs.append( | ||||
|                 self.addTask( | ||||
|                     command=cmd, | ||||
|                     dependencies=txt_zip_job_dependencies, | ||||
|                     label='txt_zip_job' | ||||
|                 ) | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         xml_zip_jobs = [] | ||||
|         xml_zip_job_dependencies = hocr_to_tei_jobs | ||||
|         cmd = 'cd "%s" && zip xml.zip */*.xml -x "pyflow.data*" && cd -' % ( | ||||
|             self.output_dir | ||||
|         ) | ||||
|         xml_zip_jobs.append( | ||||
|             self.addTask( | ||||
|                 command=cmd, | ||||
|                 dependencies=xml_zip_job_dependencies, | ||||
|                 label='xml_zip_job_-_%i' % (index) | ||||
|             xml_zip_jobs = [] | ||||
|             xml_zip_job_dependencies = all_zip_jobs | ||||
|             cmd = 'cd "%s" && zip -m xml.zip */*.xml -x "pyflow.data*" && cd -' % ( | ||||
|                 self.output_dir | ||||
|             ) | ||||
|             xml_zip_jobs.append( | ||||
|                 self.addTask( | ||||
|                     command=cmd, | ||||
|                     dependencies=xml_zip_job_dependencies, | ||||
|                     label='xml_zip_job' | ||||
|                 ) | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         ''' | ||||
|         ' ################################################## | ||||
|   | ||||
		Reference in New Issue
	
	Block a user