Remove foo script and its functionality in an entrypoint script.

This commit is contained in:
Patrick Jentsch 2020-01-29 11:48:15 +01:00
parent 221650ac20
commit 27900f391b
4 changed files with 28 additions and 38 deletions

View File

@ -36,8 +36,7 @@ RUN echo "yes" | cpan HTML::Entities && \
COPY cqpserver.init /root/
COPY foo.sh /usr/local/bin/
COPY docker-entrypoint.sh /usr/local/bin/
ENTRYPOINT ["cqpserver"]
CMD ["-I", "/root/cqpserver.init"]
ENTRYPOINT ["docker-entrypoint.sh"]

26
docker-entrypoint.sh Executable file
View File

@ -0,0 +1,26 @@
#!/bin/bash
if [ $# -eq 0 ]; then
cqpserver -I /root/cqpserver.init
elif [ $1 == "prepare" ]; then
# Input:
# - Source file: "/root/files/corpus.vrt"
# Output
# - Corpus name: "CORPUS"
# - Corpus data directory: "/corpora/data/corpus"
# - Corpus registry file: "/usr/local/share/cwb/registry/corpus"
mkdir -p /corpora/data/corpus
cwb-encode \
-c utf8 \
-d /corpora/data/corpus \
-f /root/files/corpus.vrt \
-R /usr/local/share/cwb/registry/corpus \
-P lemma -P simple_pos -P pos -P ner \
-S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title -S s \
-0 corpus \
-xsB
cwb-make \
-V CORPUS
else
echo "Unknown command"
fi

14
foo.sh
View File

@ -1,14 +0,0 @@
#!/bin/bash
mkdir -p /corpora/data/corpus
cwb-encode \
-c utf8 \
-d /corpora/data/corpus \
-f /root/files/corpus.vrt \
-R /usr/local/share/cwb/registry/corpus \
-P lemma -P simple_pos -P pos -P ner \
-S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title -S s \
-0 corpus \
-xsB
cwb-make \
-V CORPUS

View File

@ -1,21 +0,0 @@
import argparse
import os
import xml.etree.ElementTree as ET
parser = argparse.ArgumentParser()
parser.add_argument('-dir', dest='corpus_dir', required=True)
args = parser.parse_args()
text_nodes = []
for corpus_file in os.listdir(args.corpus_dir):
if os.path.isdir(os.path.join(args.corpus_dir, corpus_file)):
continue
if corpus_file == 'corpus.vrt':
continue
element_tree = ET.parse(os.path.join(args.corpus_dir, corpus_file))
text_nodes.append(element_tree.find('text'))
element_tree = ET.ElementTree(ET.fromstring('<corpus></corpus>'))
root = element_tree.getroot()
for text_node in text_nodes:
root.insert(1, text_node)
element_tree.write(os.path.join(args.corpus_dir, 'corpus.vrt'))