Add a switch for zip functionality

This commit is contained in:
Patrick Jentsch 2020-02-03 15:00:27 +01:00
parent dfc05be7db
commit 6c4a642cb7

30
ocr
View File

@ -65,6 +65,14 @@ def parse_arguments():
required=False, required=False,
type=int type=int
) )
parser.add_argument(
'--zip',
action='store_true',
default=False,
dest='zip',
help='package result files in zip bundles',
required=False
)
return parser.parse_args() return parser.parse_args()
@ -76,6 +84,7 @@ class OCRWorkflow(WorkflowRunner):
self.lang = args.lang self.lang = args.lang
self.n_cores = args.n_cores self.n_cores = args.n_cores
self.output_dir = args.output_dir self.output_dir = args.output_dir
self.zip = args.zip
def workflow(self): def workflow(self):
if len(self.jobs) == 0: if len(self.jobs) == 0:
@ -386,6 +395,7 @@ class OCRWorkflow(WorkflowRunner):
) )
) )
if self.zip:
all_zip_jobs = [] all_zip_jobs = []
all_zip_job_dependencies = (hocr_to_tei_jobs all_zip_job_dependencies = (hocr_to_tei_jobs
+ pdf_merge_jobs + pdf_merge_jobs
@ -397,46 +407,46 @@ class OCRWorkflow(WorkflowRunner):
self.addTask( self.addTask(
command=cmd, command=cmd,
dependencies=all_zip_job_dependencies, dependencies=all_zip_job_dependencies,
label='all_zip_job_-_%i' % (index) label='all_zip_job'
) )
) )
pdf_zip_jobs = [] pdf_zip_jobs = []
pdf_zip_job_dependencies = pdf_merge_jobs pdf_zip_job_dependencies = all_zip_jobs
cmd = 'cd "%s" && zip pdf.zip */*.pdf -x "pyflow.data*" && cd -' % ( cmd = 'cd "%s" && zip -m pdf.zip */*.pdf -x "pyflow.data*" && cd -' % (
self.output_dir self.output_dir
) )
pdf_zip_jobs.append( pdf_zip_jobs.append(
self.addTask( self.addTask(
command=cmd, command=cmd,
dependencies=pdf_zip_job_dependencies, dependencies=pdf_zip_job_dependencies,
label='pdf_zip_job_-_%i' % (index) label='pdf_zip_job'
) )
) )
txt_zip_jobs = [] txt_zip_jobs = []
txt_zip_job_dependencies = txt_merge_jobs txt_zip_job_dependencies = all_zip_jobs
cmd = 'cd "%s" && zip txt.zip */*.txt -x "pyflow.data*" && cd -' % ( cmd = 'cd "%s" && zip -m txt.zip */*.txt -x "pyflow.data*" && cd -' % (
self.output_dir self.output_dir
) )
txt_zip_jobs.append( txt_zip_jobs.append(
self.addTask( self.addTask(
command=cmd, command=cmd,
dependencies=txt_zip_job_dependencies, dependencies=txt_zip_job_dependencies,
label='txt_zip_job_-_%i' % (index) label='txt_zip_job'
) )
) )
xml_zip_jobs = [] xml_zip_jobs = []
xml_zip_job_dependencies = hocr_to_tei_jobs xml_zip_job_dependencies = all_zip_jobs
cmd = 'cd "%s" && zip xml.zip */*.xml -x "pyflow.data*" && cd -' % ( cmd = 'cd "%s" && zip -m xml.zip */*.xml -x "pyflow.data*" && cd -' % (
self.output_dir self.output_dir
) )
xml_zip_jobs.append( xml_zip_jobs.append(
self.addTask( self.addTask(
command=cmd, command=cmd,
dependencies=xml_zip_job_dependencies, dependencies=xml_zip_job_dependencies,
label='xml_zip_job_-_%i' % (index) label='xml_zip_job'
) )
) )