mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup.git
				synced 2025-10-31 10:42:45 +00:00 
			
		
		
		
	rework how pdfs are created 2
This commit is contained in:
		| @@ -6,18 +6,21 @@ LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de" | ||||
|  | ||||
| ENV LANG=C.UTF-8 | ||||
|  | ||||
|  | ||||
| RUN mkdir /usr/share/man/man1/ # needed to install pdftk https://github.com/geerlingguy/ansible-role-java/issues/64 | ||||
| RUN apt-get update \ | ||||
|  && apt-get install -y --no-install-recommends \ | ||||
|       ca-certificates \ | ||||
|       imagemagick \ | ||||
|       python3.7 \ | ||||
|       pdftk \ | ||||
|       zip | ||||
|  | ||||
|  | ||||
| RUN rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
| RUN rm -f /etc/ImageMagick-6/policy.xml | ||||
|  | ||||
| COPY file-setup /usr/local/bin | ||||
| COPY policy.xml /etc/ImageMagick-6 | ||||
|  | ||||
|  | ||||
| ENTRYPOINT ["file-setup"] | ||||
|   | ||||
							
								
								
									
										31
									
								
								file-setup
									
									
									
									
									
								
							
							
						
						
									
										31
									
								
								file-setup
									
									
									
									
									
								
							| @@ -28,22 +28,39 @@ def parse_arguments(): | ||||
| def merge_images(input_dir, output_dir, output_file_base, zip): | ||||
|     try: | ||||
|         os.mkdir(output_dir) | ||||
|         tmp_dir_path = os.path.join(input_dir, 'tmp') | ||||
|         os.mkdir(tmp_dir_path) | ||||
|         tmp_dir = os.path.join(input_dir, 'tmp') | ||||
|         os.mkdir(tmp_dir) | ||||
|     except FileExistsError: | ||||
|         pass | ||||
|     try: | ||||
|         tmp_dir = os.path.join(input_dir, 'tmp') | ||||
|         os.mkdir(tmp_dir) | ||||
|     except FileExistsError: | ||||
|         pass | ||||
|     # Sort filenames into a list ordered with version flag -v | ||||
|     cmd = 'ls -Q -v "{i}"/*.* > "{i}"/file_list.txt'.format(i=input_dir) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     cmd = ('mogrify -compress LZW -format pdf ', | ||||
|            + '-path @"{o}" "{i}"/file_list.txt').format(i=input_dir, | ||||
|                                                         o=tmp_dir_path) | ||||
|     # Convert all image files into pdf files | ||||
|     cmd = ('mogrify -compress LZW -format pdf ' | ||||
|            + '-path "{o}" @"{i}"/file_list.txt').format(i=input_dir, | ||||
|                                                         o=tmp_dir) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     # remove image files | ||||
|     cmd = 'xargs rm <{i}/file_list.txt'.format(i=input_dir) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     # remove file list | ||||
|     cmd = 'rm "{i}"/file_list.txt'.format(i=input_dir) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     cmd = ('pdftk "{tmp_dir_path}"/*.pdf cat ', | ||||
|            + 'output "{o}"/"{ofb}".pdf').format(i=tmp_dir_path, | ||||
|     # join all pdfs into one pdf | ||||
|     cmd = ('pdftk "{tmp_i}"/*.pdf cat ' | ||||
|            + 'output "{o}"/"{ofb}".pdf').format(tmp_i=tmp_dir, | ||||
|                                                 o=output_dir, | ||||
|                                                 ofb=output_file_base) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     # remove single pdf files | ||||
|     cmd = 'rm -fr {i}/tmp'.format(i=input_dir) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     # zip stuff | ||||
|     if zip is not None: | ||||
|         # Remove .zip file extension if provided | ||||
|         if zip.endswith('.zip'): | ||||
|   | ||||
							
								
								
									
										87
									
								
								policy.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								policy.xml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,87 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <!DOCTYPE policymap [ | ||||
| <!ELEMENT policymap (policy)+> | ||||
| <!ELEMENT policy (#PCDATA)> | ||||
| <!ATTLIST policy domain (delegate|coder|filter|path|resource) #IMPLIED> | ||||
| <!ATTLIST policy name CDATA #IMPLIED> | ||||
| <!ATTLIST policy rights CDATA #IMPLIED> | ||||
| <!ATTLIST policy pattern CDATA #IMPLIED> | ||||
| <!ATTLIST policy value CDATA #IMPLIED> | ||||
| ]> | ||||
| <!-- | ||||
|   Configure ImageMagick policies. | ||||
|  | ||||
|   Domains include system, delegate, coder, filter, path, or resource. | ||||
|  | ||||
|   Rights include none, read, write, and execute.  Use | to combine them, | ||||
|   for example: "read | write" to permit read from, or write to, a path. | ||||
|  | ||||
|   Use a glob expression as a pattern. | ||||
|  | ||||
|   Suppose we do not want users to process MPEG video images: | ||||
|  | ||||
|     <policy domain="delegate" rights="none" pattern="mpeg:decode" /> | ||||
|  | ||||
|   Here we do not want users reading images from HTTP: | ||||
|  | ||||
|     <policy domain="coder" rights="none" pattern="HTTP" /> | ||||
|  | ||||
|   Lets prevent users from executing any image filters: | ||||
|  | ||||
|     <policy domain="filter" rights="none" pattern="*" /> | ||||
|  | ||||
|   The /repository file system is restricted to read only.  We use a glob | ||||
|   expression to match all paths that start with /repository: | ||||
|  | ||||
|     <policy domain="path" rights="read" pattern="/repository/*" /> | ||||
|  | ||||
|   Let's prevent possible exploits by removing the right to use indirect reads. | ||||
|  | ||||
|     <policy domain="path" rights="none" pattern="@*" /> | ||||
|  | ||||
|   Any large image is cached to disk rather than memory: | ||||
|  | ||||
|     <policy domain="resource" name="area" value="1GB"/> | ||||
|  | ||||
|   Define arguments for the memory, map, area, width, height, and disk resources | ||||
|   with SI prefixes (.e.g 100MB).  In addition, resource policies are maximums | ||||
|   for each instance of ImageMagick (e.g. policy memory limit 1GB, -limit 2GB | ||||
|   exceeds policy maximum so memory limit is 1GB). | ||||
| --> | ||||
| <policymap> | ||||
|   <!-- <policy domain="system" name="shred" value="2"/> --> | ||||
|   <!-- <policy domain="system" name="precision" value="6"/> --> | ||||
|   <!-- <policy domain="system" name="memory-map" value="anonymous"/> --> | ||||
|   <!-- <policy domain="system" name="max-memory-request" value="256MiB"/> --> | ||||
|   <!-- <policy domain="resource" name="temporary-path" value="/tmp"/> --> | ||||
|   <policy domain="resource" name="memory" value="256MiB"/> | ||||
|   <policy domain="resource" name="map" value="512MiB"/> | ||||
|   <policy domain="resource" name="width" value="16KP"/> | ||||
|   <policy domain="resource" name="height" value="16KP"/> | ||||
|   <!-- <policy domain="resource" name="list-length" value="128"/> --> | ||||
|   <policy domain="resource" name="area" value="128MB"/> | ||||
|   <policy domain="resource" name="disk" value="1GiB"/> | ||||
|   <!-- <policy domain="resource" name="file" value="768"/> --> | ||||
|   <!-- <policy domain="resource" name="thread" value="4"/> --> | ||||
|   <!-- <policy domain="resource" name="throttle" value="0"/> --> | ||||
|   <!-- <policy domain="resource" name="time" value="3600"/> --> | ||||
|   <!-- <policy domain="coder" rights="none" pattern="MVG" /> --> | ||||
|   <policy domain="module" rights="read|write" pattern="{PS,PDF,XPS}" /> | ||||
|   <!-- <policy domain="delegate" rights="none" pattern="HTTPS" /> --> | ||||
|   <!-- <policy domain="path" rights="none" pattern="@*" /> --> | ||||
|   <!-- <policy domain="cache" name="memory-map" value="anonymous"/> --> | ||||
|   <!-- <policy domain="cache" name="synchronize" value="True"/> --> | ||||
|   <!-- <policy domain="cache" name="shared-secret" value="passphrase" stealth="true"/> --> | ||||
|   <!-- <policy domain="system" name="pixel-cache-memory" value="anonymous"/> --> | ||||
|   <!-- <policy domain="system" name="shred" value="2"/> --> | ||||
|   <!-- <policy domain="system" name="precision" value="6"/> --> | ||||
|   <!-- not needed due to the need to use explicitly by mvg: --> | ||||
|   <!-- <policy domain="delegate" rights="none" pattern="MVG" /> --> | ||||
|   <!-- use curl --> | ||||
|   <policy domain="delegate" rights="none" pattern="URL" /> | ||||
|   <policy domain="delegate" rights="none" pattern="HTTPS" /> | ||||
|   <policy domain="delegate" rights="none" pattern="HTTP" /> | ||||
|   <!-- in order to avoid to get image with password text --> | ||||
|   <!-- <policy domain="path" rights="none" pattern="@*"/> --> | ||||
| </policymap> | ||||
|  | ||||
| @@ -14,6 +14,7 @@ GID = str(os.getgid()) | ||||
| parser = ArgumentParser(add_help=False) | ||||
| parser.add_argument('-i') | ||||
| parser.add_argument('-o') | ||||
| parser.add_argument('-f') | ||||
| args, remaining_args = parser.parse_known_args() | ||||
|  | ||||
| cmd = ['docker', 'run', '--rm', '-it', '-u', '{}:{}'.format(UID, GID)] | ||||
| @@ -25,5 +26,4 @@ if args.i is not None: | ||||
|     remaining_args.insert(0, CONTAINER_INPUT_DIR) | ||||
| cmd.append(CONTAINER_IMAGE) | ||||
| cmd += remaining_args | ||||
|  | ||||
| subprocess.run(cmd) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user