2019-11-05 08:29:53 +00:00
|
|
|
#!/bin/bash
|
2019-11-04 13:14:22 +00:00
|
|
|
|
2019-11-05 10:33:47 +00:00
|
|
|
mkdir -p /corpora/data/corpus
|
2019-11-05 08:29:53 +00:00
|
|
|
cwb-encode \
|
2019-11-05 10:58:41 +00:00
|
|
|
-c utf8 \
|
2019-11-05 08:29:53 +00:00
|
|
|
-d /corpora/data/corpus \
|
2019-11-05 08:39:45 +00:00
|
|
|
-f /root/files/corpus.vrt \
|
|
|
|
-R /usr/local/share/cwb/registry/corpus \
|
2019-11-05 08:29:53 +00:00
|
|
|
-P lemma -P simple_pos -P pos -P ner \
|
2020-01-28 11:02:38 +00:00
|
|
|
-S text:0+author+id+publishing_year+title -S s \
|
2019-11-19 09:01:47 +00:00
|
|
|
-0 corpus \
|
2019-11-05 10:40:49 +00:00
|
|
|
-xsB
|
2019-11-05 08:29:53 +00:00
|
|
|
cwb-make \
|
|
|
|
-V CORPUS
|