mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
				synced 2025-10-31 20:03:14 +00:00 
			
		
		
		
	Set relative file paths in hocr
This commit is contained in:
		
							
								
								
									
										14
									
								
								ocr
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								ocr
									
									
									
									
									
								
							| @@ -256,6 +256,19 @@ class OCRPipeline(WorkflowRunner): | ||||
|                            os.listdir(input_dir)) | ||||
|             files.sort(key=lambda x: int(re.search(r'\d+', x).group(0))) | ||||
|             files = map(lambda x: os.path.join(input_dir, x), files) | ||||
|             # set relative file paths into hocr | ||||
|             relative_files = map(lambda x: os.path.join('..', | ||||
|                                                         'tiff', | ||||
|                                                         os.path.basename(x).replace('.hocr', '.tif')),  # noqa | ||||
|                                  files) | ||||
|             for file, relative_file in zip(files, relative_files): | ||||
|                 with open(file, 'r+') as f: | ||||
|                     html = f.read() | ||||
|                     html = html.replace(file.replace('.hocr', '.tif'), | ||||
|                                         relative_file) | ||||
|                     f.seek(0) | ||||
|                     f.truncate(0)  # deletes content of file to write new html | ||||
|                     f.write(html) | ||||
|             output_path_base = os.path.join(job.output_dir, 'PoCo') | ||||
|             output_path = os.path.join(output_path_base, 'hocr') | ||||
|             cmd = 'cp "{}" "{}"'.format('" "'.join(files), output_path) | ||||
| @@ -398,7 +411,6 @@ class OCRPipeline(WorkflowRunner): | ||||
|             zip_jobs.append(self.addTask(command=cmd, dependencies=deps, | ||||
|                                          label=lbl)) | ||||
|         # zip PoCo files | ||||
|         # TODO: Fix relative paths before? | ||||
|             poco_paths = [] | ||||
|             poco_names = [] | ||||
|             for i, job in enumerate(self.jobs): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user