mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-01-30 21:29:01 +00:00
Add a switch for zip functionality
This commit is contained in:
parent
dfc05be7db
commit
6c4a642cb7
30
ocr
30
ocr
@ -65,6 +65,14 @@ def parse_arguments():
|
|||||||
required=False,
|
required=False,
|
||||||
type=int
|
type=int
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--zip',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
dest='zip',
|
||||||
|
help='package result files in zip bundles',
|
||||||
|
required=False
|
||||||
|
)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@ -76,6 +84,7 @@ class OCRWorkflow(WorkflowRunner):
|
|||||||
self.lang = args.lang
|
self.lang = args.lang
|
||||||
self.n_cores = args.n_cores
|
self.n_cores = args.n_cores
|
||||||
self.output_dir = args.output_dir
|
self.output_dir = args.output_dir
|
||||||
|
self.zip = args.zip
|
||||||
|
|
||||||
def workflow(self):
|
def workflow(self):
|
||||||
if len(self.jobs) == 0:
|
if len(self.jobs) == 0:
|
||||||
@ -386,6 +395,7 @@ class OCRWorkflow(WorkflowRunner):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.zip:
|
||||||
all_zip_jobs = []
|
all_zip_jobs = []
|
||||||
all_zip_job_dependencies = (hocr_to_tei_jobs
|
all_zip_job_dependencies = (hocr_to_tei_jobs
|
||||||
+ pdf_merge_jobs
|
+ pdf_merge_jobs
|
||||||
@ -397,46 +407,46 @@ class OCRWorkflow(WorkflowRunner):
|
|||||||
self.addTask(
|
self.addTask(
|
||||||
command=cmd,
|
command=cmd,
|
||||||
dependencies=all_zip_job_dependencies,
|
dependencies=all_zip_job_dependencies,
|
||||||
label='all_zip_job_-_%i' % (index)
|
label='all_zip_job'
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
pdf_zip_jobs = []
|
pdf_zip_jobs = []
|
||||||
pdf_zip_job_dependencies = pdf_merge_jobs
|
pdf_zip_job_dependencies = all_zip_jobs
|
||||||
cmd = 'cd "%s" && zip pdf.zip */*.pdf -x "pyflow.data*" && cd -' % (
|
cmd = 'cd "%s" && zip -m pdf.zip */*.pdf -x "pyflow.data*" && cd -' % (
|
||||||
self.output_dir
|
self.output_dir
|
||||||
)
|
)
|
||||||
pdf_zip_jobs.append(
|
pdf_zip_jobs.append(
|
||||||
self.addTask(
|
self.addTask(
|
||||||
command=cmd,
|
command=cmd,
|
||||||
dependencies=pdf_zip_job_dependencies,
|
dependencies=pdf_zip_job_dependencies,
|
||||||
label='pdf_zip_job_-_%i' % (index)
|
label='pdf_zip_job'
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
txt_zip_jobs = []
|
txt_zip_jobs = []
|
||||||
txt_zip_job_dependencies = txt_merge_jobs
|
txt_zip_job_dependencies = all_zip_jobs
|
||||||
cmd = 'cd "%s" && zip txt.zip */*.txt -x "pyflow.data*" && cd -' % (
|
cmd = 'cd "%s" && zip -m txt.zip */*.txt -x "pyflow.data*" && cd -' % (
|
||||||
self.output_dir
|
self.output_dir
|
||||||
)
|
)
|
||||||
txt_zip_jobs.append(
|
txt_zip_jobs.append(
|
||||||
self.addTask(
|
self.addTask(
|
||||||
command=cmd,
|
command=cmd,
|
||||||
dependencies=txt_zip_job_dependencies,
|
dependencies=txt_zip_job_dependencies,
|
||||||
label='txt_zip_job_-_%i' % (index)
|
label='txt_zip_job'
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
xml_zip_jobs = []
|
xml_zip_jobs = []
|
||||||
xml_zip_job_dependencies = hocr_to_tei_jobs
|
xml_zip_job_dependencies = all_zip_jobs
|
||||||
cmd = 'cd "%s" && zip xml.zip */*.xml -x "pyflow.data*" && cd -' % (
|
cmd = 'cd "%s" && zip -m xml.zip */*.xml -x "pyflow.data*" && cd -' % (
|
||||||
self.output_dir
|
self.output_dir
|
||||||
)
|
)
|
||||||
xml_zip_jobs.append(
|
xml_zip_jobs.append(
|
||||||
self.addTask(
|
self.addTask(
|
||||||
command=cmd,
|
command=cmd,
|
||||||
dependencies=xml_zip_job_dependencies,
|
dependencies=xml_zip_job_dependencies,
|
||||||
label='xml_zip_job_-_%i' % (index)
|
label='xml_zip_job'
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user