diff --git a/ocr b/ocr index a25e4c2..0c9eafb 100755 --- a/ocr +++ b/ocr @@ -67,9 +67,9 @@ def parse_arguments(): ) parser.add_argument( '--zip', - action='store_true', - default=False, + default='ocr-result-files', dest='zip', + type=str, help='package result files in zip bundles', required=False ) @@ -400,8 +400,9 @@ class OCRWorkflow(WorkflowRunner): all_zip_job_dependencies = (hocr_to_tei_jobs + pdf_merge_jobs + txt_merge_jobs) - cmd = 'cd "%s" && zip all.zip */*.{pdf,txt,xml} -x "pyflow.data*" && cd -' % ( - self.output_dir + cmd = 'cd "%s" && zip "%s"-all-files.zip */*.{pdf,txt,xml} -x "pyflow.data*" && cd -' % ( + self.output_dir, + self.zip ) all_zip_jobs.append( self.addTask( @@ -413,8 +414,9 @@ class OCRWorkflow(WorkflowRunner): pdf_zip_jobs = [] pdf_zip_job_dependencies = all_zip_jobs - cmd = 'cd "%s" && zip -m pdf.zip */*.pdf -x "pyflow.data*" && cd -' % ( - self.output_dir + cmd = 'cd "%s" && zip -m "%s"-pdf.zip */*.pdf -x "pyflow.data*" && cd -' % ( + self.output_dir, + self.zip ) pdf_zip_jobs.append( self.addTask( @@ -426,8 +428,9 @@ class OCRWorkflow(WorkflowRunner): txt_zip_jobs = [] txt_zip_job_dependencies = all_zip_jobs - cmd = 'cd "%s" && zip -m txt.zip */*.txt -x "pyflow.data*" && cd -' % ( - self.output_dir + cmd = 'cd "%s" && zip -m "%s"-txt.zip */*.txt -x "pyflow.data*" && cd -' % ( + self.output_dir, + self.zip ) txt_zip_jobs.append( self.addTask( @@ -439,8 +442,9 @@ class OCRWorkflow(WorkflowRunner): xml_zip_jobs = [] xml_zip_job_dependencies = all_zip_jobs - cmd = 'cd "%s" && zip -m xml.zip */*.xml -x "pyflow.data*" && cd -' % ( - self.output_dir + cmd = 'cd "%s" && zip -m "%s"-xml.zip */*.xml -x "pyflow.data*" && cd -' % ( + self.output_dir, + self.zip ) xml_zip_jobs.append( self.addTask(