mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup.git
				synced 2025-10-31 10:42:45 +00:00 
			
		
		
		
	Less complex, lower output file size.
This commit is contained in:
		
							
								
								
									
										14
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -1,26 +1,24 @@ | ||||
| FROM debian:10-slim | ||||
|  | ||||
|  | ||||
| LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de" | ||||
| LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>, Stephan Porada <sporada@uni-bielefeld.de>" | ||||
|  | ||||
|  | ||||
| ENV LANG=C.UTF-8 | ||||
|  | ||||
| RUN mkdir /usr/share/man/man1/ # needed to install pdftk https://github.com/geerlingguy/ansible-role-java/issues/64 | ||||
|  | ||||
| RUN apt-get update \ | ||||
|  && apt-get install -y --no-install-recommends \ | ||||
|       ca-certificates \ | ||||
|       imagemagick \ | ||||
|       python3.7 \ | ||||
|       pdftk \ | ||||
|       zip | ||||
|       zip \ | ||||
|  && rm -r /var/lib/apt/lists/* | ||||
|  | ||||
|  | ||||
| RUN rm -rf /var/lib/apt/lists/* | ||||
| RUN rm -f /etc/ImageMagick-6/policy.xml | ||||
| RUN cat /etc/ImageMagick-6/policy.xml && rm /etc/ImageMagick-6/policy.xml | ||||
|  | ||||
|  | ||||
| COPY file-setup /usr/local/bin | ||||
| COPY policy.xml /etc/ImageMagick-6 | ||||
|  | ||||
|  | ||||
| ENTRYPOINT ["file-setup"] | ||||
|   | ||||
							
								
								
									
										53
									
								
								file-setup
									
									
									
									
									
								
							
							
						
						
									
										53
									
								
								file-setup
									
									
									
									
									
								
							| @@ -3,20 +3,21 @@ | ||||
|  | ||||
|  | ||||
| """ | ||||
| merge_images | ||||
|  | ||||
| Usage:  For usage instructions run with option --help | ||||
| Author: Stephan Porada <sporada@uni-bielefeld.de> | ||||
| file-setup | ||||
|  | ||||
| Usage: For usage instructions run with option --help | ||||
| Authors: Patrick Jentsch <p.jentsch@uni-bielefeld.de | ||||
|          Stephan Porada <sporada@uni-bielefeld.de> | ||||
| """ | ||||
|  | ||||
| from argparse import ArgumentParser | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
|  | ||||
|  | ||||
| def parse_arguments(): | ||||
|     parser = ArgumentParser(description='Merges images into one .pdf file.') | ||||
|     parser = ArgumentParser(description='Merge images (JPEG, PNG or TIFF) into one PDF file.') | ||||
|     parser.add_argument('-i', '--input-directory', | ||||
|                         help='Input directory', | ||||
|                         required=True) | ||||
| @@ -31,45 +32,37 @@ def parse_arguments(): | ||||
|     return parser.parse_args() | ||||
|  | ||||
|  | ||||
| def natural_sorted(iterable): | ||||
|     """ Sort the given list in the way that humans expect. | ||||
|     """ | ||||
|     convert = lambda text: int(text) if text.isdigit() else text | ||||
|     alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] | ||||
|     return sorted(iterable, key=alphanum_key) | ||||
|  | ||||
|  | ||||
| def merge_images(input_dir, output_dir, output_file_base, zip): | ||||
|     try: | ||||
|         os.mkdir(output_dir) | ||||
|         tmp_dir = os.path.join(input_dir, 'tmp') | ||||
|         os.mkdir(tmp_dir) | ||||
|     except FileExistsError: | ||||
|         pass | ||||
|     try: | ||||
|         tmp_dir = os.path.join(input_dir, 'tmp') | ||||
|         os.mkdir(tmp_dir) | ||||
|     except FileExistsError: | ||||
|         pass | ||||
|     # Sort filenames into a list ordered with version flag -v | ||||
|     cmd = 'ls -Q -v "{i}"/*.* > "{i}"/file_list.txt'.format(i=input_dir) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     # Convert all image files into pdf files | ||||
|     cmd = ('mogrify -compress LZW -format pdf ' | ||||
|            + '-path "{}" @"{}"/file_list.txt'.format(tmp_dir, input_dir)) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     # remove file list | ||||
|     cmd = 'rm "{}"/file_list.txt'.format(input_dir) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     # join all pdfs into one pdf | ||||
|     cmd = ('pdftk "{}"/*.pdf cat '.format(tmp_dir) | ||||
|            + 'output "{}"/"{}".pdf').format(output_dir, output_file_base) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     # remove single pdf files | ||||
|     cmd = 'rm -r "{}"'.format(tmp_dir) | ||||
|     files = filter(lambda x: x.lower().endswith(('.jpg', '.jpeg', '.png', '.tif', '.tiff')), | ||||
|                    os.listdir(input_dir)) | ||||
|     files = natural_sorted(files) | ||||
|     files = map(lambda x: os.path.join(input_dir, x), files) | ||||
|     output_file = os.path.join(output_dir, '{}.pdf'.format(output_file_base)) | ||||
|     # Convert input files to a single PDF | ||||
|     cmd = 'convert "{}" "{}"'.format('" "'.join(files), output_file) | ||||
|     subprocess.run(cmd, shell=True) | ||||
|     # zip stuff | ||||
|     if zip is not None: | ||||
|         # Remove .zip file extension if provided | ||||
|         if zip.endswith('.zip'): | ||||
|         if zip.lower().endswith('.zip'): | ||||
|             zip = zip[:-4] | ||||
|             zip = zip if zip else 'output' | ||||
|         cmd = 'cd "{}"'.format(output_dir) | ||||
|         cmd += ' && ' | ||||
|         cmd += 'zip' | ||||
|         cmd += ' "{}".zip "{}".pdf'.format(zip, output_file_base) | ||||
|         cmd += ' "{}.zip" "{}.pdf"'.format(zip, output_file_base) | ||||
|         cmd += ' && ' | ||||
|         cmd += 'cd -' | ||||
|         subprocess.run(cmd, shell=True) | ||||
|   | ||||
							
								
								
									
										87
									
								
								policy.xml
									
									
									
									
									
								
							
							
						
						
									
										87
									
								
								policy.xml
									
									
									
									
									
								
							| @@ -1,87 +0,0 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <!DOCTYPE policymap [ | ||||
| <!ELEMENT policymap (policy)+> | ||||
| <!ELEMENT policy (#PCDATA)> | ||||
| <!ATTLIST policy domain (delegate|coder|filter|path|resource) #IMPLIED> | ||||
| <!ATTLIST policy name CDATA #IMPLIED> | ||||
| <!ATTLIST policy rights CDATA #IMPLIED> | ||||
| <!ATTLIST policy pattern CDATA #IMPLIED> | ||||
| <!ATTLIST policy value CDATA #IMPLIED> | ||||
| ]> | ||||
| <!-- | ||||
|   Configure ImageMagick policies. | ||||
|  | ||||
|   Domains include system, delegate, coder, filter, path, or resource. | ||||
|  | ||||
|   Rights include none, read, write, and execute.  Use | to combine them, | ||||
|   for example: "read | write" to permit read from, or write to, a path. | ||||
|  | ||||
|   Use a glob expression as a pattern. | ||||
|  | ||||
|   Suppose we do not want users to process MPEG video images: | ||||
|  | ||||
|     <policy domain="delegate" rights="none" pattern="mpeg:decode" /> | ||||
|  | ||||
|   Here we do not want users reading images from HTTP: | ||||
|  | ||||
|     <policy domain="coder" rights="none" pattern="HTTP" /> | ||||
|  | ||||
|   Lets prevent users from executing any image filters: | ||||
|  | ||||
|     <policy domain="filter" rights="none" pattern="*" /> | ||||
|  | ||||
|   The /repository file system is restricted to read only.  We use a glob | ||||
|   expression to match all paths that start with /repository: | ||||
|  | ||||
|     <policy domain="path" rights="read" pattern="/repository/*" /> | ||||
|  | ||||
|   Let's prevent possible exploits by removing the right to use indirect reads. | ||||
|  | ||||
|     <policy domain="path" rights="none" pattern="@*" /> | ||||
|  | ||||
|   Any large image is cached to disk rather than memory: | ||||
|  | ||||
|     <policy domain="resource" name="area" value="1GB"/> | ||||
|  | ||||
|   Define arguments for the memory, map, area, width, height, and disk resources | ||||
|   with SI prefixes (.e.g 100MB).  In addition, resource policies are maximums | ||||
|   for each instance of ImageMagick (e.g. policy memory limit 1GB, -limit 2GB | ||||
|   exceeds policy maximum so memory limit is 1GB). | ||||
| --> | ||||
| <policymap> | ||||
|   <!-- <policy domain="system" name="shred" value="2"/> --> | ||||
|   <!-- <policy domain="system" name="precision" value="6"/> --> | ||||
|   <!-- <policy domain="system" name="memory-map" value="anonymous"/> --> | ||||
|   <!-- <policy domain="system" name="max-memory-request" value="256MiB"/> --> | ||||
|   <!-- <policy domain="resource" name="temporary-path" value="/tmp"/> --> | ||||
|   <policy domain="resource" name="memory" value="256MiB"/> | ||||
|   <policy domain="resource" name="map" value="512MiB"/> | ||||
|   <policy domain="resource" name="width" value="16KP"/> | ||||
|   <policy domain="resource" name="height" value="16KP"/> | ||||
|   <!-- <policy domain="resource" name="list-length" value="128"/> --> | ||||
|   <policy domain="resource" name="area" value="128MB"/> | ||||
|   <policy domain="resource" name="disk" value="1GiB"/> | ||||
|   <!-- <policy domain="resource" name="file" value="768"/> --> | ||||
|   <!-- <policy domain="resource" name="thread" value="4"/> --> | ||||
|   <!-- <policy domain="resource" name="throttle" value="0"/> --> | ||||
|   <!-- <policy domain="resource" name="time" value="3600"/> --> | ||||
|   <!-- <policy domain="coder" rights="none" pattern="MVG" /> --> | ||||
|   <policy domain="module" rights="read|write" pattern="{PS,PDF,XPS}" /> | ||||
|   <!-- <policy domain="delegate" rights="none" pattern="HTTPS" /> --> | ||||
|   <!-- <policy domain="path" rights="none" pattern="@*" /> --> | ||||
|   <!-- <policy domain="cache" name="memory-map" value="anonymous"/> --> | ||||
|   <!-- <policy domain="cache" name="synchronize" value="True"/> --> | ||||
|   <!-- <policy domain="cache" name="shared-secret" value="passphrase" stealth="true"/> --> | ||||
|   <!-- <policy domain="system" name="pixel-cache-memory" value="anonymous"/> --> | ||||
|   <!-- <policy domain="system" name="shred" value="2"/> --> | ||||
|   <!-- <policy domain="system" name="precision" value="6"/> --> | ||||
|   <!-- not needed due to the need to use explicitly by mvg: --> | ||||
|   <!-- <policy domain="delegate" rights="none" pattern="MVG" /> --> | ||||
|   <!-- use curl --> | ||||
|   <policy domain="delegate" rights="none" pattern="URL" /> | ||||
|   <policy domain="delegate" rights="none" pattern="HTTPS" /> | ||||
|   <policy domain="delegate" rights="none" pattern="HTTP" /> | ||||
|   <!-- in order to avoid to get image with password text --> | ||||
|   <!-- <policy domain="path" rights="none" pattern="@*"/> --> | ||||
| </policymap> | ||||
|  | ||||
| @@ -29,4 +29,5 @@ if args.input_directory is not None: | ||||
|     remaining_args.insert(0, '-i') | ||||
| cmd.append(CONTAINER_IMAGE) | ||||
| cmd += remaining_args | ||||
|  | ||||
| subprocess.run(cmd) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user