mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-26 19:14:18 +00:00
Aktualisieren ocr
This commit is contained in:
parent
fd7ad08e1e
commit
d4218fcd7c
14
ocr
14
ocr
@ -120,14 +120,12 @@ class OCRWorkflow(WorkflowRunner):
|
||||
ocropusnlbin_jobs = []
|
||||
ocropusnlbin_job_number = 0
|
||||
for job in self.jobs:
|
||||
# This list is empty if you don't wait for ocropus_nlbin_jobs to complete
|
||||
for file in os.listdir(os.path.join(job["output_dir"], "tmp", "tiff_files")):
|
||||
ocropusnlbin_job_number += 1
|
||||
cmd = 'ocropus-nlbin -o "%s" "%s"' % (
|
||||
os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"),
|
||||
os.path.join(job["output_dir"], "tmp", "tiff_files", file)
|
||||
)
|
||||
ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs))
|
||||
ocropusnlbin_job_number += 1
|
||||
cmd = 'ocropus-nlbin -o "%s" "%s"/*' % (
|
||||
os.path.join(job["output_dir"], "tmp", "ocropus-nlbin"),
|
||||
os.path.join(job["output_dir"], "tmp", "tiff_files")
|
||||
)
|
||||
ocropusnlbin_jobs.append(self.addTask(label="ocropusnlbin_job_-_%i" % (ocropusnlbin_job_number), command=cmd, dependencies=split_jobs, nCores=max(1, self.nCores / length(self.jobs)))
|
||||
|
||||
###
|
||||
# Task "tesseract_job": perform OCR on binarized images
|
||||
|
Loading…
Reference in New Issue
Block a user