mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-01-13 16:50:34 +00:00
Fixed wrong references
This commit is contained in:
parent
6708375c37
commit
f578d89ccc
18
ocr_pyflow
18
ocr_pyflow
@ -72,7 +72,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
###
|
||||
mkdir_jobs = []
|
||||
mkdir_job_number = 0
|
||||
for job in jobs:
|
||||
for job in self.jobs:
|
||||
mkdir_job_number += 1
|
||||
cmd = 'mkdir -p "%s" "%s" "%s" "%s"' % (
|
||||
os.path.join(job["output_dir"], "hocr_files"),
|
||||
@ -88,7 +88,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
###
|
||||
split_jobs = []
|
||||
split_job_number = 0
|
||||
for job in jobs:
|
||||
for job in self.jobs:
|
||||
split_job_number += 1
|
||||
if job["basename"].endswith(".tif") or job["basename"].endswith(".tiff"):
|
||||
# TODO: Make the following command work
|
||||
@ -118,7 +118,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
###
|
||||
ocropusnlbin_jobs = []
|
||||
ocropusnlbin_job_number = 0
|
||||
for job in jobs:
|
||||
for job in self.jobs:
|
||||
ocropusnlbin_job_number += 1
|
||||
cmd = 'ocropus-nlbin -o "%s" "%s"' % (
|
||||
os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"),
|
||||
@ -133,7 +133,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
self.waitForTasks()
|
||||
tesseract_jobs = []
|
||||
tesseract_job_number = 0
|
||||
for job in jobs:
|
||||
for job in self.jobs:
|
||||
# This list is empty if you don't wait for ocropus_nlbin_jobs to complete
|
||||
for file in filter(lambda x: x.endswith(".bin.png"), os.listdir(os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"))):
|
||||
tesseract_job_number += 1
|
||||
@ -150,7 +150,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
###
|
||||
hocr_to_teip5_jobs = []
|
||||
hocr_to_teip5_job_number = 0
|
||||
for job in jobs:
|
||||
for job in self.jobs:
|
||||
hocr_to_teip5_job_number += 1
|
||||
cmd = 'parse_hocr "%s" "%s"' % (
|
||||
os.path.join(job["output_dir"], "tmp", "tesseract"),
|
||||
@ -164,7 +164,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
###
|
||||
move_hocr_jobs = []
|
||||
move_hocr_job_number = 0
|
||||
for job in jobs:
|
||||
for job in self.jobs:
|
||||
move_hocr_job_number += 1
|
||||
cmd = 'mv "%s"/*.hocr "%s"' % (
|
||||
os.path.join(job["output_dir"], "tmp", "tesseract"),
|
||||
@ -178,7 +178,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
###
|
||||
pdf_merge_jobs = []
|
||||
pdf_merge_job_number = 0
|
||||
for job in jobs:
|
||||
for job in self.jobs:
|
||||
pdf_merge_job_number += 1
|
||||
cmd = 'pdftk "%s"/*.pdf cat output "%s"' % (
|
||||
os.path.join(job["output_dir"], "tmp", "tesseract"),
|
||||
@ -192,7 +192,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
###
|
||||
pdf_to_txt_jobs = []
|
||||
pdf_to_txt_job_number = 0
|
||||
for job in jobs:
|
||||
for job in self.jobs:
|
||||
pdf_to_txt_job_number += 1
|
||||
cmd = 'pdftotext -raw "%s"' % (
|
||||
os.path.join(job["output_dir"], job["basename"].rsplit(".", 1)[0] + ".pdf")
|
||||
@ -206,7 +206,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
cleanup_jobs = []
|
||||
cleanup_job_counter = 0
|
||||
if not self.keepIntermediates:
|
||||
for job in jobs:
|
||||
for job in self.jobs:
|
||||
cleanup_job_counter += 1
|
||||
cmd = 'rm -r "%s"' % (
|
||||
os.path.join(job["output_dir"], "tmp")
|
||||
|
Loading…
x
Reference in New Issue
Block a user