mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
				synced 2025-10-31 19:13:18 +00:00 
			
		
		
		
	Fixed shlex to shlex()
This commit is contained in:
		
							
								
								
									
										20
									
								
								ocr_pyflow
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								ocr_pyflow
									
									
									
									
									
								
							| @@ -115,7 +115,7 @@ class OCRWorkflow(WorkflowRunner): | ||||
|                 os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"), | ||||
|                 os.path.join(job["output_dir"], "tmp", "tesseract"), | ||||
|                 os.path.join(job["output_dir"], "tmp", "tiff_files")) | ||||
|             cmd = shlex.escape(cmd); | ||||
|             cmd = shlex().escape(cmd); | ||||
|             mkdir_jobs.append(self.addTask(label="mkdir_job_-_%i" % (mkdir_job_number), command=cmd)) | ||||
|  | ||||
|  | ||||
| @@ -140,14 +140,14 @@ class OCRWorkflow(WorkflowRunner): | ||||
|                 os.path.join(job["output_dir"], "tmp", "tiff_files", os.path.basename(job["path"]).rsplit(".", 1)[0] + ".pdf"), | ||||
|                 os.path.join(job["output_dir"], "tmp", "tiff_files", os.path.basename(job["path"]).rsplit(".", 1)[0]), | ||||
|                 os.path.join(job["output_dir"], "tmp", "tiff_files", os.path.basename(job["path"]).rsplit(".", 1)[0] + ".pdf")) | ||||
|             cmd = shlex.escape(cmd); | ||||
|             cmd = shlex().escape(cmd); | ||||
|             split_jobs.append(self.addTask(label="split_job_-_%i" % (split_job_number), command=cmd, dependencies=mkdir_jobs)) | ||||
|         for job in self.jobs["pdf"]: | ||||
|             split_job_number += 1 | ||||
|             cmd = "pdftoppm %s %s -tiff -r 300 -tiffcompression lzw -cropbox" % ( | ||||
|                 job["path"], | ||||
|                 os.path.join(job["output_dir"], "tmp", "tiff_files", os.path.basename(job["path"]).rsplit(".", 1)[0])) | ||||
|             cmd = shlex.escape(cmd); | ||||
|             cmd = shlex().escape(cmd); | ||||
|             split_jobs.append(self.addTask(label="split_job_-_%i" % (split_job_number), command=cmd, dependencies=mkdir_jobs)) | ||||
|  | ||||
|  | ||||
| @@ -162,7 +162,7 @@ class OCRWorkflow(WorkflowRunner): | ||||
|             cmd = "ocropus-nlbin -o %s %s" % ( | ||||
|                 os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"), | ||||
|                 os.path.join(job["output_dir"], "tmp", "tiff_files", os.path.basename(job["path"]).rsplit(".", 1)[0] + "-*.tif")) | ||||
|             cmd = shlex.escape(cmd); | ||||
|             cmd = shlex().escape(cmd); | ||||
|             ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs)) | ||||
|  | ||||
|  | ||||
| @@ -182,7 +182,7 @@ class OCRWorkflow(WorkflowRunner): | ||||
|                     os.path.join(job["output_dir"], "tmp", "tesseract", file.rsplit(".", 2)[0]), | ||||
|                     self.lang, | ||||
|                     "pdf" if self.pdf else "") | ||||
|                 cmd = shlex.escape(cmd); | ||||
|                 cmd = shlex().escape(cmd); | ||||
|                 tesseract_jobs.append(self.addTask(label="tesseract_job_-_%i" % (tesseract_job_number), command=cmd, dependencies=ocropusnlbin_jobs, nCores=min(4, self.nCores))) | ||||
|  | ||||
|  | ||||
| @@ -198,7 +198,7 @@ class OCRWorkflow(WorkflowRunner): | ||||
|                 cmd = "pdftk %s cat output %s" % ( | ||||
|                     os.path.join(job["output_dir"], "tmp", "tesseract", "*.pdf"), | ||||
|                     os.path.join(job["output_dir"], os.path.basename(job["path"].rsplit(".", 1)[0] + ".pdf"))) | ||||
|                 cmd = shlex.escape(cmd); | ||||
|                 cmd = shlex().escape(cmd); | ||||
|                 pdf_merge_jobs.append(self.addTask(label="pdf_merge_job_-_%i" % (pdf_merge_job_number), command=cmd, dependencies=tesseract_jobs)) | ||||
|  | ||||
|  | ||||
| @@ -213,7 +213,7 @@ class OCRWorkflow(WorkflowRunner): | ||||
|                 pdf_to_txt_job_number += 1 | ||||
|                 cmd = "pdftotext -raw %s" % ( | ||||
|                     os.path.join(job["output_dir"], os.path.basename(job["path"].rsplit(".", 1)[0] + ".pdf"))) | ||||
|                 cmd = shlex.escape(cmd); | ||||
|                 cmd = shlex().escape(cmd); | ||||
|                 pdf_merge_jobs.append(self.addTask(label="pdf_to_txt_job_-_%i" % (pdf_to_txt_job_number), command=cmd, dependencies=pdf_merge_jobs)) | ||||
|  | ||||
|  | ||||
| @@ -228,7 +228,7 @@ class OCRWorkflow(WorkflowRunner): | ||||
|             cmd = "mv %s %s" % ( | ||||
|                 os.path.join(job["output_dir"], "tmp", "tesseract", "*.hocr"), | ||||
|                 os.path.join(job["output_dir"], "hocr_files")) | ||||
|             cmd = shlex.escape(cmd); | ||||
|             cmd = shlex().escape(cmd); | ||||
|             move_hocr_jobs.append(self.addTask(label="move_hocr_job_-_%i" % (move_hocr_job_number), command=cmd, dependencies=tesseract_jobs)) | ||||
|  | ||||
|  | ||||
| @@ -243,7 +243,7 @@ class OCRWorkflow(WorkflowRunner): | ||||
|             cmd = "parse_hocr %s %s" % ( | ||||
|                 os.path.join(job["output_dir"], "hocr_files"), | ||||
|                 os.path.join(os.path.join(job["output_dir"], os.path.basename(job["path"]).rsplit(".", 1)[0] + ".xml"))) | ||||
|             cmd = shlex.escape(cmd); | ||||
|             cmd = shlex().escape(cmd); | ||||
|             hocr_to_teip5_jobs.append(self.addTask(label="hocr_to_teip5_job_-_%i" % (hocr_to_teip5_job_number), command=cmd, dependencies=move_hocr_jobs)) | ||||
|  | ||||
|  | ||||
| @@ -258,7 +258,7 @@ class OCRWorkflow(WorkflowRunner): | ||||
|             for job in self.jobs["images"] + self.jobs["pdf"]: | ||||
|                 cleanup_job_counter += 1 | ||||
|                 cmd = "rm -r %s" % (os.path.join(job["output_dir"], "tmp")) | ||||
|                 cmd = shlex.escape(cmd); | ||||
|                 cmd = shlex().escape(cmd); | ||||
|                 cleanup_jobs.append(self.addTask(label="cleanup_job_-_%i" % (cleanup_job_counter), command=cmd)) | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user