From 4e17867248a230e0615fc5652e21d75cd9e24a41 Mon Sep 17 00:00:00 2001 From: Stephan Porada Date: Wed, 3 Jun 2020 10:34:42 +0200 Subject: [PATCH] rework how pdfs are created 2 --- Dockerfile | 7 ++-- file-setup | 31 +++++++++++++---- policy.xml | 87 ++++++++++++++++++++++++++++++++++++++++++++++ wrapper/file-setup | 2 +- 4 files changed, 117 insertions(+), 10 deletions(-) create mode 100644 policy.xml diff --git a/Dockerfile b/Dockerfile index 1cea690..2a18842 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,18 +6,21 @@ LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de" ENV LANG=C.UTF-8 - +RUN mkdir /usr/share/man/man1/ # needed to install pdftk https://github.com/geerlingguy/ansible-role-java/issues/64 RUN apt-get update \ && apt-get install -y --no-install-recommends \ + ca-certificates \ imagemagick \ python3.7 \ + pdftk \ zip RUN rm -rf /var/lib/apt/lists/* - +RUN rm -f /etc/ImageMagick-6/policy.xml COPY file-setup /usr/local/bin +COPY policy.xml /etc/ImageMagick-6 ENTRYPOINT ["file-setup"] diff --git a/file-setup b/file-setup index df02006..00975f9 100755 --- a/file-setup +++ b/file-setup @@ -28,22 +28,39 @@ def parse_arguments(): def merge_images(input_dir, output_dir, output_file_base, zip): try: os.mkdir(output_dir) - tmp_dir_path = os.path.join(input_dir, 'tmp') - os.mkdir(tmp_dir_path) + tmp_dir = os.path.join(input_dir, 'tmp') + os.mkdir(tmp_dir) except FileExistsError: pass + try: + tmp_dir = os.path.join(input_dir, 'tmp') + os.mkdir(tmp_dir) + except FileExistsError: + pass + # Sort filenames into a list ordered with version flag -v cmd = 'ls -Q -v "{i}"/*.* > "{i}"/file_list.txt'.format(i=input_dir) subprocess.run(cmd, shell=True) - cmd = ('mogrify -compress LZW -format pdf ', - + '-path @"{o}" "{i}"/file_list.txt').format(i=input_dir, - o=tmp_dir_path) + # Convert all image files into pdf files + cmd = ('mogrify -compress LZW -format pdf ' + + '-path "{o}" @"{i}"/file_list.txt').format(i=input_dir, + o=tmp_dir) subprocess.run(cmd, shell=True) + # remove image files + cmd = 'xargs rm <{i}/file_list.txt'.format(i=input_dir) + subprocess.run(cmd, shell=True) + # remove file list cmd = 'rm "{i}"/file_list.txt'.format(i=input_dir) subprocess.run(cmd, shell=True) - cmd = ('pdftk "{tmp_dir_path}"/*.pdf cat ', - + 'output "{o}"/"{ofb}".pdf').format(i=tmp_dir_path, + # join all pdfs into one pdf + cmd = ('pdftk "{tmp_i}"/*.pdf cat ' + + 'output "{o}"/"{ofb}".pdf').format(tmp_i=tmp_dir, o=output_dir, ofb=output_file_base) + subprocess.run(cmd, shell=True) + # remove single pdf files + cmd = 'rm -fr {i}/tmp'.format(i=input_dir) + subprocess.run(cmd, shell=True) + # zip stuff if zip is not None: # Remove .zip file extension if provided if zip.endswith('.zip'): diff --git a/policy.xml b/policy.xml new file mode 100644 index 0000000..f5ef390 --- /dev/null +++ b/policy.xml @@ -0,0 +1,87 @@ + + + + + + + + +]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/wrapper/file-setup b/wrapper/file-setup index 6735642..8d55e33 100755 --- a/wrapper/file-setup +++ b/wrapper/file-setup @@ -14,6 +14,7 @@ GID = str(os.getgid()) parser = ArgumentParser(add_help=False) parser.add_argument('-i') parser.add_argument('-o') +parser.add_argument('-f') args, remaining_args = parser.parse_known_args() cmd = ['docker', 'run', '--rm', '-it', '-u', '{}:{}'.format(UID, GID)] @@ -25,5 +26,4 @@ if args.i is not None: remaining_args.insert(0, CONTAINER_INPUT_DIR) cmd.append(CONTAINER_IMAGE) cmd += remaining_args - subprocess.run(cmd)