mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-01-13 18:20:34 +00:00
Update
This commit is contained in:
parent
5e11fcae01
commit
5e43e09beb
22
ocr
22
ocr
@ -84,14 +84,17 @@ class OCRWorkflow(WorkflowRunner):
|
|||||||
create_output_directories_job_number = 0
|
create_output_directories_job_number = 0
|
||||||
for job in self.jobs:
|
for job in self.jobs:
|
||||||
create_output_directories_job_number += 1
|
create_output_directories_job_number += 1
|
||||||
cmd = 'mkdir -p "%s" "%s" "%s" "%s" "%s" "%s"' % (
|
cmd = 'mkdir -p "%s" "%s" "%s" "%s"' % (
|
||||||
os.path.join(job["output_dir"], "tmp", "binarized_png"),
|
|
||||||
os.path.join(job["output_dir"], "tmp", "hocr"),
|
os.path.join(job["output_dir"], "tmp", "hocr"),
|
||||||
os.path.join(job["output_dir"], "tmp", "normalized_png"),
|
|
||||||
os.path.join(job["output_dir"], "tmp", "pdf"),
|
os.path.join(job["output_dir"], "tmp", "pdf"),
|
||||||
os.path.join(job["output_dir"], "tmp", "tiff"),
|
os.path.join(job["output_dir"], "tmp", "tiff"),
|
||||||
os.path.join(job["output_dir"], "tmp", "txt")
|
os.path.join(job["output_dir"], "tmp", "txt")
|
||||||
)
|
)
|
||||||
|
if not self.skipBinarization:
|
||||||
|
cmd += ' "%s" "%s"' % (
|
||||||
|
os.path.join(job["output_dir"], "tmp", "binarized_png"),
|
||||||
|
os.path.join(job["output_dir"], "tmp", "normalized_png"),
|
||||||
|
)
|
||||||
create_output_directories_jobs.append(self.addTask(label="create_output_directories_job_-_%i" % (create_output_directories_job_number), command=cmd))
|
create_output_directories_jobs.append(self.addTask(label="create_output_directories_job_-_%i" % (create_output_directories_job_number), command=cmd))
|
||||||
|
|
||||||
###
|
###
|
||||||
@ -237,20 +240,23 @@ class OCRWorkflow(WorkflowRunner):
|
|||||||
if self.keepIntermediates:
|
if self.keepIntermediates:
|
||||||
for job in self.jobs:
|
for job in self.jobs:
|
||||||
cleanup_job_counter += 1
|
cleanup_job_counter += 1
|
||||||
cmd = 'mv "%s"/*.bin.png "%s" && mv "%s"/*.hocr "%s" && mv "%s"/*.nrm.png "%s" && mv "%s"/*.pdf "%s" && mv "%s"/*.tif "%s" && mv "%s"/*.txt "%s"' % (
|
cmd = 'mv "%s"/*.hocr "%s" && mv "%s"/*.pdf "%s" && mv "%s"/*.tif "%s" && mv "%s"/*.txt "%s"' % (
|
||||||
os.path.join(job["output_dir"], "tmp"),
|
|
||||||
os.path.join(job["output_dir"], "tmp", "binarized_png"),
|
|
||||||
os.path.join(job["output_dir"], "tmp"),
|
os.path.join(job["output_dir"], "tmp"),
|
||||||
os.path.join(job["output_dir"], "tmp", "hocr"),
|
os.path.join(job["output_dir"], "tmp", "hocr"),
|
||||||
os.path.join(job["output_dir"], "tmp"),
|
os.path.join(job["output_dir"], "tmp"),
|
||||||
os.path.join(job["output_dir"], "tmp", "normalized_png"),
|
|
||||||
os.path.join(job["output_dir"], "tmp"),
|
|
||||||
os.path.join(job["output_dir"], "tmp", "pdf"),
|
os.path.join(job["output_dir"], "tmp", "pdf"),
|
||||||
os.path.join(job["output_dir"], "tmp"),
|
os.path.join(job["output_dir"], "tmp"),
|
||||||
os.path.join(job["output_dir"], "tmp", "tiff"),
|
os.path.join(job["output_dir"], "tmp", "tiff"),
|
||||||
os.path.join(job["output_dir"], "tmp"),
|
os.path.join(job["output_dir"], "tmp"),
|
||||||
os.path.join(job["output_dir"], "tmp", "txt")
|
os.path.join(job["output_dir"], "tmp", "txt")
|
||||||
)
|
)
|
||||||
|
if not self.skipBinarization:
|
||||||
|
cmd += ' && mv "%s"/*.bin.png "%s" && mv "%s"/*.nrm.png "%s"' % (
|
||||||
|
os.path.join(job["output_dir"], "tmp"),
|
||||||
|
os.path.join(job["output_dir"], "tmp", "binarized_png"),
|
||||||
|
os.path.join(job["output_dir"], "tmp"),
|
||||||
|
os.path.join(job["output_dir"], "tmp", "normalized_png"),
|
||||||
|
)
|
||||||
cleanup_jobs.append(self.addTask(label="cleanup_job_-_%i" % (cleanup_job_counter), command=cmd, dependencies=hocr_to_tei_jobs + pdf_merge_jobs + txt_merge_jobs))
|
cleanup_jobs.append(self.addTask(label="cleanup_job_-_%i" % (cleanup_job_counter), command=cmd, dependencies=hocr_to_tei_jobs + pdf_merge_jobs + txt_merge_jobs))
|
||||||
else:
|
else:
|
||||||
for job in self.jobs:
|
for job in self.jobs:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user