mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
				synced 2025-10-31 19:53:16 +00:00 
			
		
		
		
	Change output files file format
This commit is contained in:
		
							
								
								
									
										22
									
								
								ocr
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								ocr
									
									
									
									
									
								
							| @@ -488,39 +488,41 @@ class MainWorkflow(WorkflowRunner): | ||||
|             # Remove temporary directory | ||||
|             os.rmdir(job.tmp_dir) | ||||
|             # Track output files | ||||
|             relative_input = os.path.relpath(job.file, start=self.input_dir) | ||||
|             relative_output_dir = os.path.relpath(job.output_dir, start=self.output_dir)  # noqa | ||||
|             for x in os.listdir(os.path.join(job.output_dir, 'images')): | ||||
|                 self.output_files.append( | ||||
|                     { | ||||
|                         'directory': os.path.join(os.path.relpath(job.output_dir, start=self.output_dir), 'images'),  # noqa | ||||
|                         'filename': x, | ||||
|                         'input': relative_input, | ||||
|                         'path': os.path.join(relative_output_dir, 'images', x), | ||||
|                         'mimetype': 'image/png' | ||||
|                     } | ||||
|                 ) | ||||
|             self.output_files.append( | ||||
|                 { | ||||
|                     'directory': os.path.relpath(job.output_dir, start=self.output_dir),  # noqa | ||||
|                     'filename': '{}.hocr'.format(job.name), | ||||
|                     'input': relative_input, | ||||
|                     'path': os.path.join(relative_output_dir, '{}.hocr'.format(job.name)),  # noqa | ||||
|                     'mimetype': 'application/xhtml+xml' | ||||
|                 } | ||||
|             ) | ||||
|             self.output_files.append( | ||||
|                 { | ||||
|                     'directory': os.path.relpath(job.output_dir, start=self.output_dir),  # noqa | ||||
|                     'filename': '{}.pdf'.format(job.name), | ||||
|                     'input': relative_input, | ||||
|                     'filename': os.path.join(relative_output_dir, '{}.pdf'.format(job.name)),  # noqa | ||||
|                     'mimetype': 'application/pdf' | ||||
|                 } | ||||
|             ) | ||||
|             self.output_files.append( | ||||
|                 { | ||||
|                     'directory': os.path.relpath(job.output_dir, start=self.output_dir),  # noqa | ||||
|                     'filename': '{}.txt'.format(job.name), | ||||
|                     'input': relative_input, | ||||
|                     'filename': os.path.join(relative_output_dir, '{}.txt'.format(job.name)),  # noqa | ||||
|                     'mimetype': 'text/plain' | ||||
|                 } | ||||
|             ) | ||||
|             self.output_files.append( | ||||
|                 { | ||||
|                     'directory': os.path.relpath(job.output_dir, start=self.output_dir),  # noqa | ||||
|                     'filename': '{}.xml'.format(job.name), | ||||
|                     'input': relative_input, | ||||
|                     'filename': os.path.join(relative_output_dir, '{}.xml'.format(job.name)),  # noqa | ||||
|                     'mimetype': 'application/tei+xml' | ||||
|                 } | ||||
|             ) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user