mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup.git
synced 2025-01-13 11:40:35 +00:00
Less complex, lower output file size.
This commit is contained in:
parent
09b40c47c5
commit
18e5ee21dc
14
Dockerfile
14
Dockerfile
@ -1,26 +1,24 @@
|
|||||||
FROM debian:10-slim
|
FROM debian:10-slim
|
||||||
|
|
||||||
|
|
||||||
LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de"
|
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>, Stephan Porada <sporada@uni-bielefeld.de>"
|
||||||
|
|
||||||
|
|
||||||
ENV LANG=C.UTF-8
|
ENV LANG=C.UTF-8
|
||||||
|
|
||||||
RUN mkdir /usr/share/man/man1/ # needed to install pdftk https://github.com/geerlingguy/ansible-role-java/issues/64
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends \
|
&& apt-get install -y --no-install-recommends \
|
||||||
ca-certificates \
|
|
||||||
imagemagick \
|
imagemagick \
|
||||||
python3.7 \
|
python3.7 \
|
||||||
pdftk \
|
zip \
|
||||||
zip
|
&& rm -r /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
||||||
RUN rm -rf /var/lib/apt/lists/*
|
RUN cat /etc/ImageMagick-6/policy.xml && rm /etc/ImageMagick-6/policy.xml
|
||||||
RUN rm -f /etc/ImageMagick-6/policy.xml
|
|
||||||
|
|
||||||
COPY file-setup /usr/local/bin
|
COPY file-setup /usr/local/bin
|
||||||
COPY policy.xml /etc/ImageMagick-6
|
|
||||||
|
|
||||||
|
|
||||||
ENTRYPOINT ["file-setup"]
|
ENTRYPOINT ["file-setup"]
|
||||||
|
51
file-setup
51
file-setup
@ -3,20 +3,21 @@
|
|||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
merge_images
|
file-setup
|
||||||
|
|
||||||
Usage: For usage instructions run with option --help
|
Usage: For usage instructions run with option --help
|
||||||
Author: Stephan Porada <sporada@uni-bielefeld.de>
|
Authors: Patrick Jentsch <p.jentsch@uni-bielefeld.de
|
||||||
|
Stephan Porada <sporada@uni-bielefeld.de>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
parser = ArgumentParser(description='Merges images into one .pdf file.')
|
parser = ArgumentParser(description='Merge images (JPEG, PNG or TIFF) into one PDF file.')
|
||||||
parser.add_argument('-i', '--input-directory',
|
parser.add_argument('-i', '--input-directory',
|
||||||
help='Input directory',
|
help='Input directory',
|
||||||
required=True)
|
required=True)
|
||||||
@ -31,45 +32,37 @@ def parse_arguments():
|
|||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def natural_sorted(iterable):
|
||||||
|
""" Sort the given list in the way that humans expect.
|
||||||
|
"""
|
||||||
|
convert = lambda text: int(text) if text.isdigit() else text
|
||||||
|
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
|
||||||
|
return sorted(iterable, key=alphanum_key)
|
||||||
|
|
||||||
|
|
||||||
def merge_images(input_dir, output_dir, output_file_base, zip):
|
def merge_images(input_dir, output_dir, output_file_base, zip):
|
||||||
try:
|
try:
|
||||||
os.mkdir(output_dir)
|
os.mkdir(output_dir)
|
||||||
tmp_dir = os.path.join(input_dir, 'tmp')
|
|
||||||
os.mkdir(tmp_dir)
|
|
||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
pass
|
pass
|
||||||
try:
|
files = filter(lambda x: x.lower().endswith(('.jpg', '.jpeg', '.png', '.tif', '.tiff')),
|
||||||
tmp_dir = os.path.join(input_dir, 'tmp')
|
os.listdir(input_dir))
|
||||||
os.mkdir(tmp_dir)
|
files = natural_sorted(files)
|
||||||
except FileExistsError:
|
files = map(lambda x: os.path.join(input_dir, x), files)
|
||||||
pass
|
output_file = os.path.join(output_dir, '{}.pdf'.format(output_file_base))
|
||||||
# Sort filenames into a list ordered with version flag -v
|
# Convert input files to a single PDF
|
||||||
cmd = 'ls -Q -v "{i}"/*.* > "{i}"/file_list.txt'.format(i=input_dir)
|
cmd = 'convert "{}" "{}"'.format('" "'.join(files), output_file)
|
||||||
subprocess.run(cmd, shell=True)
|
|
||||||
# Convert all image files into pdf files
|
|
||||||
cmd = ('mogrify -compress LZW -format pdf '
|
|
||||||
+ '-path "{}" @"{}"/file_list.txt'.format(tmp_dir, input_dir))
|
|
||||||
subprocess.run(cmd, shell=True)
|
|
||||||
# remove file list
|
|
||||||
cmd = 'rm "{}"/file_list.txt'.format(input_dir)
|
|
||||||
subprocess.run(cmd, shell=True)
|
|
||||||
# join all pdfs into one pdf
|
|
||||||
cmd = ('pdftk "{}"/*.pdf cat '.format(tmp_dir)
|
|
||||||
+ 'output "{}"/"{}".pdf').format(output_dir, output_file_base)
|
|
||||||
subprocess.run(cmd, shell=True)
|
|
||||||
# remove single pdf files
|
|
||||||
cmd = 'rm -r "{}"'.format(tmp_dir)
|
|
||||||
subprocess.run(cmd, shell=True)
|
subprocess.run(cmd, shell=True)
|
||||||
# zip stuff
|
# zip stuff
|
||||||
if zip is not None:
|
if zip is not None:
|
||||||
# Remove .zip file extension if provided
|
# Remove .zip file extension if provided
|
||||||
if zip.endswith('.zip'):
|
if zip.lower().endswith('.zip'):
|
||||||
zip = zip[:-4]
|
zip = zip[:-4]
|
||||||
zip = zip if zip else 'output'
|
zip = zip if zip else 'output'
|
||||||
cmd = 'cd "{}"'.format(output_dir)
|
cmd = 'cd "{}"'.format(output_dir)
|
||||||
cmd += ' && '
|
cmd += ' && '
|
||||||
cmd += 'zip'
|
cmd += 'zip'
|
||||||
cmd += ' "{}".zip "{}".pdf'.format(zip, output_file_base)
|
cmd += ' "{}.zip" "{}.pdf"'.format(zip, output_file_base)
|
||||||
cmd += ' && '
|
cmd += ' && '
|
||||||
cmd += 'cd -'
|
cmd += 'cd -'
|
||||||
subprocess.run(cmd, shell=True)
|
subprocess.run(cmd, shell=True)
|
||||||
|
87
policy.xml
87
policy.xml
@ -1,87 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE policymap [
|
|
||||||
<!ELEMENT policymap (policy)+>
|
|
||||||
<!ELEMENT policy (#PCDATA)>
|
|
||||||
<!ATTLIST policy domain (delegate|coder|filter|path|resource) #IMPLIED>
|
|
||||||
<!ATTLIST policy name CDATA #IMPLIED>
|
|
||||||
<!ATTLIST policy rights CDATA #IMPLIED>
|
|
||||||
<!ATTLIST policy pattern CDATA #IMPLIED>
|
|
||||||
<!ATTLIST policy value CDATA #IMPLIED>
|
|
||||||
]>
|
|
||||||
<!--
|
|
||||||
Configure ImageMagick policies.
|
|
||||||
|
|
||||||
Domains include system, delegate, coder, filter, path, or resource.
|
|
||||||
|
|
||||||
Rights include none, read, write, and execute. Use | to combine them,
|
|
||||||
for example: "read | write" to permit read from, or write to, a path.
|
|
||||||
|
|
||||||
Use a glob expression as a pattern.
|
|
||||||
|
|
||||||
Suppose we do not want users to process MPEG video images:
|
|
||||||
|
|
||||||
<policy domain="delegate" rights="none" pattern="mpeg:decode" />
|
|
||||||
|
|
||||||
Here we do not want users reading images from HTTP:
|
|
||||||
|
|
||||||
<policy domain="coder" rights="none" pattern="HTTP" />
|
|
||||||
|
|
||||||
Lets prevent users from executing any image filters:
|
|
||||||
|
|
||||||
<policy domain="filter" rights="none" pattern="*" />
|
|
||||||
|
|
||||||
The /repository file system is restricted to read only. We use a glob
|
|
||||||
expression to match all paths that start with /repository:
|
|
||||||
|
|
||||||
<policy domain="path" rights="read" pattern="/repository/*" />
|
|
||||||
|
|
||||||
Let's prevent possible exploits by removing the right to use indirect reads.
|
|
||||||
|
|
||||||
<policy domain="path" rights="none" pattern="@*" />
|
|
||||||
|
|
||||||
Any large image is cached to disk rather than memory:
|
|
||||||
|
|
||||||
<policy domain="resource" name="area" value="1GB"/>
|
|
||||||
|
|
||||||
Define arguments for the memory, map, area, width, height, and disk resources
|
|
||||||
with SI prefixes (.e.g 100MB). In addition, resource policies are maximums
|
|
||||||
for each instance of ImageMagick (e.g. policy memory limit 1GB, -limit 2GB
|
|
||||||
exceeds policy maximum so memory limit is 1GB).
|
|
||||||
-->
|
|
||||||
<policymap>
|
|
||||||
<!-- <policy domain="system" name="shred" value="2"/> -->
|
|
||||||
<!-- <policy domain="system" name="precision" value="6"/> -->
|
|
||||||
<!-- <policy domain="system" name="memory-map" value="anonymous"/> -->
|
|
||||||
<!-- <policy domain="system" name="max-memory-request" value="256MiB"/> -->
|
|
||||||
<!-- <policy domain="resource" name="temporary-path" value="/tmp"/> -->
|
|
||||||
<policy domain="resource" name="memory" value="256MiB"/>
|
|
||||||
<policy domain="resource" name="map" value="512MiB"/>
|
|
||||||
<policy domain="resource" name="width" value="16KP"/>
|
|
||||||
<policy domain="resource" name="height" value="16KP"/>
|
|
||||||
<!-- <policy domain="resource" name="list-length" value="128"/> -->
|
|
||||||
<policy domain="resource" name="area" value="128MB"/>
|
|
||||||
<policy domain="resource" name="disk" value="1GiB"/>
|
|
||||||
<!-- <policy domain="resource" name="file" value="768"/> -->
|
|
||||||
<!-- <policy domain="resource" name="thread" value="4"/> -->
|
|
||||||
<!-- <policy domain="resource" name="throttle" value="0"/> -->
|
|
||||||
<!-- <policy domain="resource" name="time" value="3600"/> -->
|
|
||||||
<!-- <policy domain="coder" rights="none" pattern="MVG" /> -->
|
|
||||||
<policy domain="module" rights="read|write" pattern="{PS,PDF,XPS}" />
|
|
||||||
<!-- <policy domain="delegate" rights="none" pattern="HTTPS" /> -->
|
|
||||||
<!-- <policy domain="path" rights="none" pattern="@*" /> -->
|
|
||||||
<!-- <policy domain="cache" name="memory-map" value="anonymous"/> -->
|
|
||||||
<!-- <policy domain="cache" name="synchronize" value="True"/> -->
|
|
||||||
<!-- <policy domain="cache" name="shared-secret" value="passphrase" stealth="true"/> -->
|
|
||||||
<!-- <policy domain="system" name="pixel-cache-memory" value="anonymous"/> -->
|
|
||||||
<!-- <policy domain="system" name="shred" value="2"/> -->
|
|
||||||
<!-- <policy domain="system" name="precision" value="6"/> -->
|
|
||||||
<!-- not needed due to the need to use explicitly by mvg: -->
|
|
||||||
<!-- <policy domain="delegate" rights="none" pattern="MVG" /> -->
|
|
||||||
<!-- use curl -->
|
|
||||||
<policy domain="delegate" rights="none" pattern="URL" />
|
|
||||||
<policy domain="delegate" rights="none" pattern="HTTPS" />
|
|
||||||
<policy domain="delegate" rights="none" pattern="HTTP" />
|
|
||||||
<!-- in order to avoid to get image with password text -->
|
|
||||||
<!-- <policy domain="path" rights="none" pattern="@*"/> -->
|
|
||||||
</policymap>
|
|
||||||
|
|
@ -29,4 +29,5 @@ if args.input_directory is not None:
|
|||||||
remaining_args.insert(0, '-i')
|
remaining_args.insert(0, '-i')
|
||||||
cmd.append(CONTAINER_IMAGE)
|
cmd.append(CONTAINER_IMAGE)
|
||||||
cmd += remaining_args
|
cmd += remaining_args
|
||||||
|
|
||||||
subprocess.run(cmd)
|
subprocess.run(cmd)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user