Fixed wrong references

This commit is contained in:
Patrick Jentsch 2019-01-15 11:03:09 +01:00
parent 6708375c37
commit f578d89ccc

View File

@ -72,7 +72,7 @@ class OCRWorkflow(WorkflowRunner):
### ###
mkdir_jobs = [] mkdir_jobs = []
mkdir_job_number = 0 mkdir_job_number = 0
for job in jobs: for job in self.jobs:
mkdir_job_number += 1 mkdir_job_number += 1
cmd = 'mkdir -p "%s" "%s" "%s" "%s"' % ( cmd = 'mkdir -p "%s" "%s" "%s" "%s"' % (
os.path.join(job["output_dir"], "hocr_files"), os.path.join(job["output_dir"], "hocr_files"),
@ -88,7 +88,7 @@ class OCRWorkflow(WorkflowRunner):
### ###
split_jobs = [] split_jobs = []
split_job_number = 0 split_job_number = 0
for job in jobs: for job in self.jobs:
split_job_number += 1 split_job_number += 1
if job["basename"].endswith(".tif") or job["basename"].endswith(".tiff"): if job["basename"].endswith(".tif") or job["basename"].endswith(".tiff"):
# TODO: Make the following command work # TODO: Make the following command work
@ -118,7 +118,7 @@ class OCRWorkflow(WorkflowRunner):
### ###
ocropusnlbin_jobs = [] ocropusnlbin_jobs = []
ocropusnlbin_job_number = 0 ocropusnlbin_job_number = 0
for job in jobs: for job in self.jobs:
ocropusnlbin_job_number += 1 ocropusnlbin_job_number += 1
cmd = 'ocropus-nlbin -o "%s" "%s"' % ( cmd = 'ocropus-nlbin -o "%s" "%s"' % (
os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"), os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"),
@ -133,7 +133,7 @@ class OCRWorkflow(WorkflowRunner):
self.waitForTasks() self.waitForTasks()
tesseract_jobs = [] tesseract_jobs = []
tesseract_job_number = 0 tesseract_job_number = 0
for job in jobs: for job in self.jobs:
# This list is empty if you don't wait for ocropus_nlbin_jobs to complete # This list is empty if you don't wait for ocropus_nlbin_jobs to complete
for file in filter(lambda x: x.endswith(".bin.png"), os.listdir(os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"))): for file in filter(lambda x: x.endswith(".bin.png"), os.listdir(os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"))):
tesseract_job_number += 1 tesseract_job_number += 1
@ -150,7 +150,7 @@ class OCRWorkflow(WorkflowRunner):
### ###
hocr_to_teip5_jobs = [] hocr_to_teip5_jobs = []
hocr_to_teip5_job_number = 0 hocr_to_teip5_job_number = 0
for job in jobs: for job in self.jobs:
hocr_to_teip5_job_number += 1 hocr_to_teip5_job_number += 1
cmd = 'parse_hocr "%s" "%s"' % ( cmd = 'parse_hocr "%s" "%s"' % (
os.path.join(job["output_dir"], "tmp", "tesseract"), os.path.join(job["output_dir"], "tmp", "tesseract"),
@ -164,7 +164,7 @@ class OCRWorkflow(WorkflowRunner):
### ###
move_hocr_jobs = [] move_hocr_jobs = []
move_hocr_job_number = 0 move_hocr_job_number = 0
for job in jobs: for job in self.jobs:
move_hocr_job_number += 1 move_hocr_job_number += 1
cmd = 'mv "%s"/*.hocr "%s"' % ( cmd = 'mv "%s"/*.hocr "%s"' % (
os.path.join(job["output_dir"], "tmp", "tesseract"), os.path.join(job["output_dir"], "tmp", "tesseract"),
@ -178,7 +178,7 @@ class OCRWorkflow(WorkflowRunner):
### ###
pdf_merge_jobs = [] pdf_merge_jobs = []
pdf_merge_job_number = 0 pdf_merge_job_number = 0
for job in jobs: for job in self.jobs:
pdf_merge_job_number += 1 pdf_merge_job_number += 1
cmd = 'pdftk "%s"/*.pdf cat output "%s"' % ( cmd = 'pdftk "%s"/*.pdf cat output "%s"' % (
os.path.join(job["output_dir"], "tmp", "tesseract"), os.path.join(job["output_dir"], "tmp", "tesseract"),
@ -192,7 +192,7 @@ class OCRWorkflow(WorkflowRunner):
### ###
pdf_to_txt_jobs = [] pdf_to_txt_jobs = []
pdf_to_txt_job_number = 0 pdf_to_txt_job_number = 0
for job in jobs: for job in self.jobs:
pdf_to_txt_job_number += 1 pdf_to_txt_job_number += 1
cmd = 'pdftotext -raw "%s"' % ( cmd = 'pdftotext -raw "%s"' % (
os.path.join(job["output_dir"], job["basename"].rsplit(".", 1)[0] + ".pdf") os.path.join(job["output_dir"], job["basename"].rsplit(".", 1)[0] + ".pdf")
@ -206,7 +206,7 @@ class OCRWorkflow(WorkflowRunner):
cleanup_jobs = [] cleanup_jobs = []
cleanup_job_counter = 0 cleanup_job_counter = 0
if not self.keepIntermediates: if not self.keepIntermediates:
for job in jobs: for job in self.jobs:
cleanup_job_counter += 1 cleanup_job_counter += 1
cmd = 'rm -r "%s"' % ( cmd = 'rm -r "%s"' % (
os.path.join(job["output_dir"], "tmp") os.path.join(job["output_dir"], "tmp")