mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-25 18:14:17 +00:00
Add ocr to filenames
This commit is contained in:
parent
c1f5252633
commit
eb5ccf4e21
10
ocr
10
ocr
@ -75,7 +75,7 @@ def parse_arguments():
|
||||
default='ocr-result-files',
|
||||
dest='zip',
|
||||
type=str,
|
||||
help='package result files in zip bundles',
|
||||
help='package result files in zip bundles and asign an filename prefix',
|
||||
required=False
|
||||
)
|
||||
return parser.parse_args()
|
||||
@ -405,7 +405,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
all_zip_job_dependencies = (hocr_to_tei_jobs
|
||||
+ pdf_merge_jobs
|
||||
+ txt_merge_jobs)
|
||||
cmd = 'cd "%s" && zip "%s"-all-files.zip */*.{pdf,txt,xml} -x "pyflow.data*" && cd -' % (
|
||||
cmd = 'cd "%s" && zip "%s"-all-ocr-files.zip */*.{pdf,txt,xml} -x "pyflow.data*" && cd -' % (
|
||||
self.output_dir,
|
||||
self.zip
|
||||
)
|
||||
@ -419,7 +419,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
|
||||
pdf_zip_jobs = []
|
||||
pdf_zip_job_dependencies = all_zip_jobs
|
||||
cmd = 'cd "%s" && zip -m "%s"-pdf.zip */*.pdf -x "pyflow.data*" && cd -' % (
|
||||
cmd = 'cd "%s" && zip -m "%s"-ocr-pdf.zip */*.pdf -x "pyflow.data*" && cd -' % (
|
||||
self.output_dir,
|
||||
self.zip
|
||||
)
|
||||
@ -433,7 +433,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
|
||||
txt_zip_jobs = []
|
||||
txt_zip_job_dependencies = all_zip_jobs
|
||||
cmd = 'cd "%s" && zip -m "%s"-txt.zip */*.txt -x "pyflow.data*" && cd -' % (
|
||||
cmd = 'cd "%s" && zip -m "%s"-ocr-txt.zip */*.txt -x "pyflow.data*" && cd -' % (
|
||||
self.output_dir,
|
||||
self.zip
|
||||
)
|
||||
@ -447,7 +447,7 @@ class OCRWorkflow(WorkflowRunner):
|
||||
|
||||
xml_zip_jobs = []
|
||||
xml_zip_job_dependencies = all_zip_jobs
|
||||
cmd = 'cd "%s" && zip -m "%s"-xml.zip */*.xml -x "pyflow.data*" && cd -' % (
|
||||
cmd = 'cd "%s" && zip -m "%s"-ocr-xml.zip */*.xml -x "pyflow.data*" && cd -' % (
|
||||
self.output_dir,
|
||||
self.zip
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user