Fixed wrong references

This commit is contained in:
Patrick Jentsch 2019-01-15 11:03:09 +01:00
parent 6708375c37
commit f578d89ccc

View File

@ -72,7 +72,7 @@ class OCRWorkflow(WorkflowRunner):
###
mkdir_jobs = []
mkdir_job_number = 0
for job in jobs:
for job in self.jobs:
mkdir_job_number += 1
cmd = 'mkdir -p "%s" "%s" "%s" "%s"' % (
os.path.join(job["output_dir"], "hocr_files"),
@ -88,7 +88,7 @@ class OCRWorkflow(WorkflowRunner):
###
split_jobs = []
split_job_number = 0
for job in jobs:
for job in self.jobs:
split_job_number += 1
if job["basename"].endswith(".tif") or job["basename"].endswith(".tiff"):
# TODO: Make the following command work
@ -118,7 +118,7 @@ class OCRWorkflow(WorkflowRunner):
###
ocropusnlbin_jobs = []
ocropusnlbin_job_number = 0
for job in jobs:
for job in self.jobs:
ocropusnlbin_job_number += 1
cmd = 'ocropus-nlbin -o "%s" "%s"' % (
os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"),
@ -133,7 +133,7 @@ class OCRWorkflow(WorkflowRunner):
self.waitForTasks()
tesseract_jobs = []
tesseract_job_number = 0
for job in jobs:
for job in self.jobs:
# This list is empty if you don't wait for ocropus_nlbin_jobs to complete
for file in filter(lambda x: x.endswith(".bin.png"), os.listdir(os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"))):
tesseract_job_number += 1
@ -150,7 +150,7 @@ class OCRWorkflow(WorkflowRunner):
###
hocr_to_teip5_jobs = []
hocr_to_teip5_job_number = 0
for job in jobs:
for job in self.jobs:
hocr_to_teip5_job_number += 1
cmd = 'parse_hocr "%s" "%s"' % (
os.path.join(job["output_dir"], "tmp", "tesseract"),
@ -164,7 +164,7 @@ class OCRWorkflow(WorkflowRunner):
###
move_hocr_jobs = []
move_hocr_job_number = 0
for job in jobs:
for job in self.jobs:
move_hocr_job_number += 1
cmd = 'mv "%s"/*.hocr "%s"' % (
os.path.join(job["output_dir"], "tmp", "tesseract"),
@ -178,7 +178,7 @@ class OCRWorkflow(WorkflowRunner):
###
pdf_merge_jobs = []
pdf_merge_job_number = 0
for job in jobs:
for job in self.jobs:
pdf_merge_job_number += 1
cmd = 'pdftk "%s"/*.pdf cat output "%s"' % (
os.path.join(job["output_dir"], "tmp", "tesseract"),
@ -192,7 +192,7 @@ class OCRWorkflow(WorkflowRunner):
###
pdf_to_txt_jobs = []
pdf_to_txt_job_number = 0
for job in jobs:
for job in self.jobs:
pdf_to_txt_job_number += 1
cmd = 'pdftotext -raw "%s"' % (
os.path.join(job["output_dir"], job["basename"].rsplit(".", 1)[0] + ".pdf")
@ -206,7 +206,7 @@ class OCRWorkflow(WorkflowRunner):
cleanup_jobs = []
cleanup_job_counter = 0
if not self.keepIntermediates:
for job in jobs:
for job in self.jobs:
cleanup_job_counter += 1
cmd = 'rm -r "%s"' % (
os.path.join(job["output_dir"], "tmp")