2019-11-05 09:29:53 +01:00
|
|
|
#!/bin/bash
|
2019-11-04 14:14:22 +01:00
|
|
|
|
2019-11-05 11:33:47 +01:00
|
|
|
mkdir -p /corpora/data/corpus
|
2019-11-05 09:29:53 +01:00
|
|
|
cwb-encode \
|
2019-11-05 11:58:41 +01:00
|
|
|
-c utf8 \
|
2019-11-05 09:29:53 +01:00
|
|
|
-d /corpora/data/corpus \
|
2019-11-05 09:39:45 +01:00
|
|
|
-f /root/files/corpus.vrt \
|
|
|
|
-R /usr/local/share/cwb/registry/corpus \
|
2019-11-05 09:29:53 +01:00
|
|
|
-P lemma -P simple_pos -P pos -P ner \
|
2020-01-28 12:04:14 +01:00
|
|
|
-S text:0+author+id+publishing_year+title -S s \
|
2019-11-19 10:01:47 +01:00
|
|
|
-0 corpus \
|
2019-11-05 11:40:49 +01:00
|
|
|
-xsB
|
2019-11-05 09:29:53 +01:00
|
|
|
cwb-make \
|
|
|
|
-V CORPUS
|