mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/cqpserver.git
synced 2024-12-26 11:24:19 +00:00
Remove foo script and its functionality in an entrypoint script.
This commit is contained in:
parent
221650ac20
commit
27900f391b
@ -36,8 +36,7 @@ RUN echo "yes" | cpan HTML::Entities && \
|
|||||||
|
|
||||||
|
|
||||||
COPY cqpserver.init /root/
|
COPY cqpserver.init /root/
|
||||||
COPY foo.sh /usr/local/bin/
|
COPY docker-entrypoint.sh /usr/local/bin/
|
||||||
|
|
||||||
|
|
||||||
ENTRYPOINT ["cqpserver"]
|
ENTRYPOINT ["docker-entrypoint.sh"]
|
||||||
CMD ["-I", "/root/cqpserver.init"]
|
|
||||||
|
26
docker-entrypoint.sh
Executable file
26
docker-entrypoint.sh
Executable file
@ -0,0 +1,26 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ $# -eq 0 ]; then
|
||||||
|
cqpserver -I /root/cqpserver.init
|
||||||
|
elif [ $1 == "prepare" ]; then
|
||||||
|
# Input:
|
||||||
|
# - Source file: "/root/files/corpus.vrt"
|
||||||
|
# Output
|
||||||
|
# - Corpus name: "CORPUS"
|
||||||
|
# - Corpus data directory: "/corpora/data/corpus"
|
||||||
|
# - Corpus registry file: "/usr/local/share/cwb/registry/corpus"
|
||||||
|
mkdir -p /corpora/data/corpus
|
||||||
|
cwb-encode \
|
||||||
|
-c utf8 \
|
||||||
|
-d /corpora/data/corpus \
|
||||||
|
-f /root/files/corpus.vrt \
|
||||||
|
-R /usr/local/share/cwb/registry/corpus \
|
||||||
|
-P lemma -P simple_pos -P pos -P ner \
|
||||||
|
-S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title -S s \
|
||||||
|
-0 corpus \
|
||||||
|
-xsB
|
||||||
|
cwb-make \
|
||||||
|
-V CORPUS
|
||||||
|
else
|
||||||
|
echo "Unknown command"
|
||||||
|
fi
|
14
foo.sh
14
foo.sh
@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
mkdir -p /corpora/data/corpus
|
|
||||||
cwb-encode \
|
|
||||||
-c utf8 \
|
|
||||||
-d /corpora/data/corpus \
|
|
||||||
-f /root/files/corpus.vrt \
|
|
||||||
-R /usr/local/share/cwb/registry/corpus \
|
|
||||||
-P lemma -P simple_pos -P pos -P ner \
|
|
||||||
-S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title -S s \
|
|
||||||
-0 corpus \
|
|
||||||
-xsB
|
|
||||||
cwb-make \
|
|
||||||
-V CORPUS
|
|
@ -1,21 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import os
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument('-dir', dest='corpus_dir', required=True)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
text_nodes = []
|
|
||||||
for corpus_file in os.listdir(args.corpus_dir):
|
|
||||||
if os.path.isdir(os.path.join(args.corpus_dir, corpus_file)):
|
|
||||||
continue
|
|
||||||
if corpus_file == 'corpus.vrt':
|
|
||||||
continue
|
|
||||||
element_tree = ET.parse(os.path.join(args.corpus_dir, corpus_file))
|
|
||||||
text_nodes.append(element_tree.find('text'))
|
|
||||||
element_tree = ET.ElementTree(ET.fromstring('<corpus></corpus>'))
|
|
||||||
root = element_tree.getroot()
|
|
||||||
for text_node in text_nodes:
|
|
||||||
root.insert(1, text_node)
|
|
||||||
element_tree.write(os.path.join(args.corpus_dir, 'corpus.vrt'))
|
|
Loading…
Reference in New Issue
Block a user