mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
				synced 2025-11-04 02:32:44 +00:00 
			
		
		
		
	add zip creation of results
This commit is contained in:
		@@ -19,6 +19,7 @@ RUN apt-get update \
 | 
			
		||||
      python2.7 \
 | 
			
		||||
      python3.5 \
 | 
			
		||||
      wget \
 | 
			
		||||
      zip \
 | 
			
		||||
 && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
 | 
			
		||||
ENV OCROPY_VERSION 1.3.3
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										56
									
								
								ocr
									
									
									
									
									
								
							
							
						
						
									
										56
									
								
								ocr
									
									
									
									
									
								
							@@ -16,6 +16,7 @@ import os
 | 
			
		||||
import re
 | 
			
		||||
import sys
 | 
			
		||||
from pyflow import WorkflowRunner
 | 
			
		||||
from zipfile import ZipFile
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def parse_arguments():
 | 
			
		||||
@@ -74,6 +75,7 @@ class OCRWorkflow(WorkflowRunner):
 | 
			
		||||
        self.keep_intermediates = args.keep_intermediates
 | 
			
		||||
        self.lang = args.lang
 | 
			
		||||
        self.n_cores = args.n_cores
 | 
			
		||||
        self.output_dir = args.output_dir
 | 
			
		||||
 | 
			
		||||
    def workflow(self):
 | 
			
		||||
        if len(self.jobs) == 0:
 | 
			
		||||
@@ -384,6 +386,60 @@ class OCRWorkflow(WorkflowRunner):
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        all_zip_jobs = []
 | 
			
		||||
        all_zip_job_dependencies = (hocr_to_tei_jobs
 | 
			
		||||
                                    + pdf_merge_jobs
 | 
			
		||||
                                    + txt_merge_jobs)
 | 
			
		||||
        cmd = 'cd "%s" && zip all.zip */*.{pdf,txt,xml} -x "pyflow.data*" && cd -' % (
 | 
			
		||||
            self.output_dir
 | 
			
		||||
        )
 | 
			
		||||
        all_zip_jobs.append(
 | 
			
		||||
            self.addTask(
 | 
			
		||||
                command=cmd,
 | 
			
		||||
                dependencies=all_zip_job_dependencies,
 | 
			
		||||
                label='all_zip_job_-_%i' % (index)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        pdf_zip_jobs = []
 | 
			
		||||
        pdf_zip_job_dependencies = pdf_merge_jobs
 | 
			
		||||
        cmd = 'cd "%s" && zip pdf.zip */*.pdf -x "pyflow.data*" && cd -' % (
 | 
			
		||||
            self.output_dir
 | 
			
		||||
        )
 | 
			
		||||
        pdf_zip_jobs.append(
 | 
			
		||||
            self.addTask(
 | 
			
		||||
                command=cmd,
 | 
			
		||||
                dependencies=pdf_zip_job_dependencies,
 | 
			
		||||
                label='pdf_zip_job_-_%i' % (index)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        txt_zip_jobs = []
 | 
			
		||||
        txt_zip_job_dependencies = txt_merge_jobs
 | 
			
		||||
        cmd = 'cd "%s" && zip txt.zip */*.txt -x "pyflow.data*" && cd -' % (
 | 
			
		||||
            self.output_dir
 | 
			
		||||
        )
 | 
			
		||||
        txt_zip_jobs.append(
 | 
			
		||||
            self.addTask(
 | 
			
		||||
                command=cmd,
 | 
			
		||||
                dependencies=txt_zip_job_dependencies,
 | 
			
		||||
                label='txt_zip_job_-_%i' % (index)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        xml_zip_jobs = []
 | 
			
		||||
        xml_zip_job_dependencies = hocr_to_tei_jobs
 | 
			
		||||
        cmd = 'cd "%s" && zip xml.zip */*.xml -x "pyflow.data*" && cd -' % (
 | 
			
		||||
            self.output_dir
 | 
			
		||||
        )
 | 
			
		||||
        xml_zip_jobs.append(
 | 
			
		||||
            self.addTask(
 | 
			
		||||
                command=cmd,
 | 
			
		||||
                dependencies=xml_zip_job_dependencies,
 | 
			
		||||
                label='xml_zip_job_-_%i' % (index)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        '''
 | 
			
		||||
        ' ##################################################
 | 
			
		||||
        ' # Cleanup                                        #
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user