Add ocr to filenames

This commit is contained in:
stephan 2020-02-18 10:16:24 +01:00
parent c1f5252633
commit eb5ccf4e21

10
ocr
View File

@ -75,7 +75,7 @@ def parse_arguments():
default='ocr-result-files', default='ocr-result-files',
dest='zip', dest='zip',
type=str, type=str,
help='package result files in zip bundles', help='package result files in zip bundles and asign an filename prefix',
required=False required=False
) )
return parser.parse_args() return parser.parse_args()
@ -405,7 +405,7 @@ class OCRWorkflow(WorkflowRunner):
all_zip_job_dependencies = (hocr_to_tei_jobs all_zip_job_dependencies = (hocr_to_tei_jobs
+ pdf_merge_jobs + pdf_merge_jobs
+ txt_merge_jobs) + txt_merge_jobs)
cmd = 'cd "%s" && zip "%s"-all-files.zip */*.{pdf,txt,xml} -x "pyflow.data*" && cd -' % ( cmd = 'cd "%s" && zip "%s"-all-ocr-files.zip */*.{pdf,txt,xml} -x "pyflow.data*" && cd -' % (
self.output_dir, self.output_dir,
self.zip self.zip
) )
@ -419,7 +419,7 @@ class OCRWorkflow(WorkflowRunner):
pdf_zip_jobs = [] pdf_zip_jobs = []
pdf_zip_job_dependencies = all_zip_jobs pdf_zip_job_dependencies = all_zip_jobs
cmd = 'cd "%s" && zip -m "%s"-pdf.zip */*.pdf -x "pyflow.data*" && cd -' % ( cmd = 'cd "%s" && zip -m "%s"-ocr-pdf.zip */*.pdf -x "pyflow.data*" && cd -' % (
self.output_dir, self.output_dir,
self.zip self.zip
) )
@ -433,7 +433,7 @@ class OCRWorkflow(WorkflowRunner):
txt_zip_jobs = [] txt_zip_jobs = []
txt_zip_job_dependencies = all_zip_jobs txt_zip_job_dependencies = all_zip_jobs
cmd = 'cd "%s" && zip -m "%s"-txt.zip */*.txt -x "pyflow.data*" && cd -' % ( cmd = 'cd "%s" && zip -m "%s"-ocr-txt.zip */*.txt -x "pyflow.data*" && cd -' % (
self.output_dir, self.output_dir,
self.zip self.zip
) )
@ -447,7 +447,7 @@ class OCRWorkflow(WorkflowRunner):
xml_zip_jobs = [] xml_zip_jobs = []
xml_zip_job_dependencies = all_zip_jobs xml_zip_job_dependencies = all_zip_jobs
cmd = 'cd "%s" && zip -m "%s"-xml.zip */*.xml -x "pyflow.data*" && cd -' % ( cmd = 'cd "%s" && zip -m "%s"-ocr-xml.zip */*.xml -x "pyflow.data*" && cd -' % (
self.output_dir, self.output_dir,
self.zip self.zip
) )