mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-01-31 02:19:02 +00:00
Start one ocropus-nlbin job per page instead of one per document
This commit is contained in:
parent
2648f40b92
commit
a947e36997
5
ocr
5
ocr
@ -116,13 +116,16 @@ class OCRWorkflow(WorkflowRunner):
|
|||||||
# Task "ocropus_nlbin_job": binarize tiff files from previous split
|
# Task "ocropus_nlbin_job": binarize tiff files from previous split
|
||||||
# Dependencies: split_jobs
|
# Dependencies: split_jobs
|
||||||
###
|
###
|
||||||
|
self.waitForTasks()
|
||||||
ocropusnlbin_jobs = []
|
ocropusnlbin_jobs = []
|
||||||
ocropusnlbin_job_number = 0
|
ocropusnlbin_job_number = 0
|
||||||
for job in self.jobs:
|
for job in self.jobs:
|
||||||
|
# This list is empty if you don't wait for ocropus_nlbin_jobs to complete
|
||||||
|
for file in os.listdir(os.path.join(job["output_dir"], "tmp", "tiff_files")):
|
||||||
ocropusnlbin_job_number += 1
|
ocropusnlbin_job_number += 1
|
||||||
cmd = 'ocropus-nlbin -o "%s" "%s"/*' % (
|
cmd = 'ocropus-nlbin -o "%s" "%s"/*' % (
|
||||||
os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"),
|
os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"),
|
||||||
os.path.join(job["output_dir"], "tmp", "tiff_files")
|
os.path.join(job["output_dir"], "tmp", "tiff_files", file)
|
||||||
)
|
)
|
||||||
ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs))
|
ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user