mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup.git
synced 2024-12-25 10:54:18 +00:00
rework how pdfs are created 2
This commit is contained in:
parent
194156d862
commit
4e17867248
@ -6,18 +6,21 @@ LABEL maintainer="inf_sfb1288@lists.uni-bielefeld.de"
|
||||
|
||||
ENV LANG=C.UTF-8
|
||||
|
||||
|
||||
RUN mkdir /usr/share/man/man1/ # needed to install pdftk https://github.com/geerlingguy/ansible-role-java/issues/64
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
imagemagick \
|
||||
python3.7 \
|
||||
pdftk \
|
||||
zip
|
||||
|
||||
|
||||
RUN rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN rm -f /etc/ImageMagick-6/policy.xml
|
||||
|
||||
COPY file-setup /usr/local/bin
|
||||
COPY policy.xml /etc/ImageMagick-6
|
||||
|
||||
|
||||
ENTRYPOINT ["file-setup"]
|
||||
|
31
file-setup
31
file-setup
@ -28,22 +28,39 @@ def parse_arguments():
|
||||
def merge_images(input_dir, output_dir, output_file_base, zip):
|
||||
try:
|
||||
os.mkdir(output_dir)
|
||||
tmp_dir_path = os.path.join(input_dir, 'tmp')
|
||||
os.mkdir(tmp_dir_path)
|
||||
tmp_dir = os.path.join(input_dir, 'tmp')
|
||||
os.mkdir(tmp_dir)
|
||||
except FileExistsError:
|
||||
pass
|
||||
try:
|
||||
tmp_dir = os.path.join(input_dir, 'tmp')
|
||||
os.mkdir(tmp_dir)
|
||||
except FileExistsError:
|
||||
pass
|
||||
# Sort filenames into a list ordered with version flag -v
|
||||
cmd = 'ls -Q -v "{i}"/*.* > "{i}"/file_list.txt'.format(i=input_dir)
|
||||
subprocess.run(cmd, shell=True)
|
||||
cmd = ('mogrify -compress LZW -format pdf ',
|
||||
+ '-path @"{o}" "{i}"/file_list.txt').format(i=input_dir,
|
||||
o=tmp_dir_path)
|
||||
# Convert all image files into pdf files
|
||||
cmd = ('mogrify -compress LZW -format pdf '
|
||||
+ '-path "{o}" @"{i}"/file_list.txt').format(i=input_dir,
|
||||
o=tmp_dir)
|
||||
subprocess.run(cmd, shell=True)
|
||||
# remove image files
|
||||
cmd = 'xargs rm <{i}/file_list.txt'.format(i=input_dir)
|
||||
subprocess.run(cmd, shell=True)
|
||||
# remove file list
|
||||
cmd = 'rm "{i}"/file_list.txt'.format(i=input_dir)
|
||||
subprocess.run(cmd, shell=True)
|
||||
cmd = ('pdftk "{tmp_dir_path}"/*.pdf cat ',
|
||||
+ 'output "{o}"/"{ofb}".pdf').format(i=tmp_dir_path,
|
||||
# join all pdfs into one pdf
|
||||
cmd = ('pdftk "{tmp_i}"/*.pdf cat '
|
||||
+ 'output "{o}"/"{ofb}".pdf').format(tmp_i=tmp_dir,
|
||||
o=output_dir,
|
||||
ofb=output_file_base)
|
||||
subprocess.run(cmd, shell=True)
|
||||
# remove single pdf files
|
||||
cmd = 'rm -fr {i}/tmp'.format(i=input_dir)
|
||||
subprocess.run(cmd, shell=True)
|
||||
# zip stuff
|
||||
if zip is not None:
|
||||
# Remove .zip file extension if provided
|
||||
if zip.endswith('.zip'):
|
||||
|
87
policy.xml
Normal file
87
policy.xml
Normal file
@ -0,0 +1,87 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE policymap [
|
||||
<!ELEMENT policymap (policy)+>
|
||||
<!ELEMENT policy (#PCDATA)>
|
||||
<!ATTLIST policy domain (delegate|coder|filter|path|resource) #IMPLIED>
|
||||
<!ATTLIST policy name CDATA #IMPLIED>
|
||||
<!ATTLIST policy rights CDATA #IMPLIED>
|
||||
<!ATTLIST policy pattern CDATA #IMPLIED>
|
||||
<!ATTLIST policy value CDATA #IMPLIED>
|
||||
]>
|
||||
<!--
|
||||
Configure ImageMagick policies.
|
||||
|
||||
Domains include system, delegate, coder, filter, path, or resource.
|
||||
|
||||
Rights include none, read, write, and execute. Use | to combine them,
|
||||
for example: "read | write" to permit read from, or write to, a path.
|
||||
|
||||
Use a glob expression as a pattern.
|
||||
|
||||
Suppose we do not want users to process MPEG video images:
|
||||
|
||||
<policy domain="delegate" rights="none" pattern="mpeg:decode" />
|
||||
|
||||
Here we do not want users reading images from HTTP:
|
||||
|
||||
<policy domain="coder" rights="none" pattern="HTTP" />
|
||||
|
||||
Lets prevent users from executing any image filters:
|
||||
|
||||
<policy domain="filter" rights="none" pattern="*" />
|
||||
|
||||
The /repository file system is restricted to read only. We use a glob
|
||||
expression to match all paths that start with /repository:
|
||||
|
||||
<policy domain="path" rights="read" pattern="/repository/*" />
|
||||
|
||||
Let's prevent possible exploits by removing the right to use indirect reads.
|
||||
|
||||
<policy domain="path" rights="none" pattern="@*" />
|
||||
|
||||
Any large image is cached to disk rather than memory:
|
||||
|
||||
<policy domain="resource" name="area" value="1GB"/>
|
||||
|
||||
Define arguments for the memory, map, area, width, height, and disk resources
|
||||
with SI prefixes (.e.g 100MB). In addition, resource policies are maximums
|
||||
for each instance of ImageMagick (e.g. policy memory limit 1GB, -limit 2GB
|
||||
exceeds policy maximum so memory limit is 1GB).
|
||||
-->
|
||||
<policymap>
|
||||
<!-- <policy domain="system" name="shred" value="2"/> -->
|
||||
<!-- <policy domain="system" name="precision" value="6"/> -->
|
||||
<!-- <policy domain="system" name="memory-map" value="anonymous"/> -->
|
||||
<!-- <policy domain="system" name="max-memory-request" value="256MiB"/> -->
|
||||
<!-- <policy domain="resource" name="temporary-path" value="/tmp"/> -->
|
||||
<policy domain="resource" name="memory" value="256MiB"/>
|
||||
<policy domain="resource" name="map" value="512MiB"/>
|
||||
<policy domain="resource" name="width" value="16KP"/>
|
||||
<policy domain="resource" name="height" value="16KP"/>
|
||||
<!-- <policy domain="resource" name="list-length" value="128"/> -->
|
||||
<policy domain="resource" name="area" value="128MB"/>
|
||||
<policy domain="resource" name="disk" value="1GiB"/>
|
||||
<!-- <policy domain="resource" name="file" value="768"/> -->
|
||||
<!-- <policy domain="resource" name="thread" value="4"/> -->
|
||||
<!-- <policy domain="resource" name="throttle" value="0"/> -->
|
||||
<!-- <policy domain="resource" name="time" value="3600"/> -->
|
||||
<!-- <policy domain="coder" rights="none" pattern="MVG" /> -->
|
||||
<policy domain="module" rights="read|write" pattern="{PS,PDF,XPS}" />
|
||||
<!-- <policy domain="delegate" rights="none" pattern="HTTPS" /> -->
|
||||
<!-- <policy domain="path" rights="none" pattern="@*" /> -->
|
||||
<!-- <policy domain="cache" name="memory-map" value="anonymous"/> -->
|
||||
<!-- <policy domain="cache" name="synchronize" value="True"/> -->
|
||||
<!-- <policy domain="cache" name="shared-secret" value="passphrase" stealth="true"/> -->
|
||||
<!-- <policy domain="system" name="pixel-cache-memory" value="anonymous"/> -->
|
||||
<!-- <policy domain="system" name="shred" value="2"/> -->
|
||||
<!-- <policy domain="system" name="precision" value="6"/> -->
|
||||
<!-- not needed due to the need to use explicitly by mvg: -->
|
||||
<!-- <policy domain="delegate" rights="none" pattern="MVG" /> -->
|
||||
<!-- use curl -->
|
||||
<policy domain="delegate" rights="none" pattern="URL" />
|
||||
<policy domain="delegate" rights="none" pattern="HTTPS" />
|
||||
<policy domain="delegate" rights="none" pattern="HTTP" />
|
||||
<!-- in order to avoid to get image with password text -->
|
||||
<!-- <policy domain="path" rights="none" pattern="@*"/> -->
|
||||
</policymap>
|
||||
|
@ -14,6 +14,7 @@ GID = str(os.getgid())
|
||||
parser = ArgumentParser(add_help=False)
|
||||
parser.add_argument('-i')
|
||||
parser.add_argument('-o')
|
||||
parser.add_argument('-f')
|
||||
args, remaining_args = parser.parse_known_args()
|
||||
|
||||
cmd = ['docker', 'run', '--rm', '-it', '-u', '{}:{}'.format(UID, GID)]
|
||||
@ -25,5 +26,4 @@ if args.i is not None:
|
||||
remaining_args.insert(0, CONTAINER_INPUT_DIR)
|
||||
cmd.append(CONTAINER_IMAGE)
|
||||
cmd += remaining_args
|
||||
|
||||
subprocess.run(cmd)
|
||||
|
Loading…
Reference in New Issue
Block a user