From ebff8fb7f1dbf265937eb078626187069433cca7 Mon Sep 17 00:00:00 2001 From: Stephan Porada Date: Mon, 14 Oct 2019 14:00:41 +0200 Subject: [PATCH] Add author and title tags. Fix some errors in note.txt --- docs/note.txt | 5 ++++- files/utopien.vrt | 48 +++++++++++++++++++++++------------------------ 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/docs/note.txt b/docs/note.txt index 8f2ea3f..163bb05 100644 --- a/docs/note.txt +++ b/docs/note.txt @@ -1,7 +1,9 @@ # Enter bash in cqpwebserver -docker exec -it cqperver_cqpserver_1 /bin/bash +docker exec -it cqpserver_cqpserver_1 /bin/bash ## Encode example corpus +mkdir /corpora/data/example + cwb-encode -d /corpora/data/example \ -f /root/files/example.vrt \ -R /usr/local/share/cwb/registry/example \ @@ -11,6 +13,7 @@ cwb-make -V EXAMPLE cwb-describe-corpus EXAMPLE ## Encode utopien corpus +mkdir /corpora/data/utopien cwb-encode -d /corpora/data/utopien \ -f /root/files/utopien.vrt \ -sxB \ diff --git a/files/utopien.vrt b/files/utopien.vrt index 74bdae1..d232827 100644 --- a/files/utopien.vrt +++ b/files/utopien.vrt @@ -1,6 +1,6 @@ - + The the AT |Z5| Project project NN1 |X7+|X2.4| @@ -118560,7 +118560,7 @@ eBooks ebook NN2 |Q4.1/Y2| . PUNC YSTP |PUNC| - + 1984 1984 MC |N1|T1.2|T3| By by II |Z5| @@ -251365,7 +251365,7 @@ as as CSA |Z5| 393 393 MC |N1|T1.2|T3| - + PICTURES picture NN2 |C1|Q4.3|X4.1| OF of IO |Z5| @@ -296907,7 +296907,7 @@ ERNST ernst NP1 |Z99| . PUNC YSTP |PUNC| - + Ernest ernest NP1 |Z1m| gallenbach gallenbach JJ |Z99| @@ -382648,7 +382648,7 @@ home home RL |H4|M6| WILL will VM |T1.1.3| - + KALLOCAIN kallocain VV0 |Z99| BY by II |Z5| @@ -462032,7 +462032,7 @@ PAIPHO paipho NN1 |Z99| Censor censor VV0 |Q4/S7.4-| - + SHEPHERD shepherd NP1 |F4/S2mf| : PUNC YCOL |PUNC| @@ -472856,7 +472856,7 @@ not not XX |Z6| be be VBI |A3+|Z5| - + Every every AT1 |N5.1+| human human JJ |S2mf| @@ -479322,7 +479322,7 @@ curse curse VV0 |Q2.2|A1.4-| Goddamn goddamn UH |Z99| - + The the AT |Z5| Project project NN1 |X7+|X2.4| @@ -534794,7 +534794,7 @@ eBooks ebook NN2 |Q4.1/Y2| . PUNC YSTP |PUNC| - + Neuromancer neuromancer NP1 |Z99| William william NP1 |Z1m| @@ -651295,7 +651295,7 @@ why why RRQ |A2.2| . PUNC YSTP |PUNC| - + Introduction introduction NN1 |T2+|Q4|S1.1.1| 1LITERARY 1literary FO |Z99| @@ -720473,7 +720473,7 @@ and and CC |Z5| even even RR |A13.1| - + The the AT |Z5| Project project NN1 |X7+|X2.4| @@ -906270,7 +906270,7 @@ eBooks ebook NN2 |Q4.1/Y2| . PUNC YSTP |PUNC| - + The the AT |Z5| courage courage NN1 |E5+/S1.2| @@ -922466,7 +922466,7 @@ i0% i0% FO |Z99| i i ZZ1 |Z5| - + FAHRENHEIT fahrenheit NP1 |Z99| 451 451 MC |N1|T1.2|T3| @@ -988288,7 +988288,7 @@ THE the AT |Z5| END end NN1 |M6|T2-|O2| - + The the AT |Z5| Project project NN1 |X7+|X2.4| @@ -1093742,7 +1093742,7 @@ eBooks ebook NN2 |Q4.1/Y2| . PUNC YSTP |PUNC| - + I i PPIS1 |Z8mf| , PUNC YCOM |PUNC| @@ -1188344,7 +1188344,7 @@ end end NN1 |M6|T2-|O2| ---- ---- NN1 |Z99| - + Do do VD0 |A1.1.1|G2.2-|X9.2+| Androids android NN2 |O3| @@ -1279301,7 +1279301,7 @@ coffee coffee NN1 |F2| . PUNC YSTP |PUNC| - + RECORD record VV0 |Q1.2|K3| ONE one PN1 |Z8| @@ -1364770,7 +1364770,7 @@ prevail prevail VVI |X9.2+|S7.1+| . PUNC YSTP |PUNC| - + Project project NP1 |A10+|X7+|X2.6+| Gutenberg gutenberg NP1 |Z99| @@ -1464183,7 +1464183,7 @@ William william NP1 |Z1m| Morris morris NP1 |Z1mf| - + The the AT |Z5| Project project NN1 |X7+|X2.4| @@ -1566447,7 +1566447,7 @@ eBooks ebook NN2 |Q4.1/Y2| . PUNC YSTP |PUNC| - + The the AT |Z5| Project project NN1 |X7+|X2.4| @@ -1588246,7 +1588246,7 @@ Francis francis NP1 |Z1m| Bacon bacon NP1 |F1| - + A a AT1 |Z5| Logic logic NN1 |X2.1|S1.2.6+|N2| @@ -1597349,7 +1597349,7 @@ hand hand NN1 |Z4| maybe maybe RR |A7| - + The the AT |Z5| Project project NN1 |X7+|X2.4| @@ -1817595,7 +1817595,7 @@ LICENSE license NN1 |G1.1/Q1.2|S7.4+| *** *** FO |Z99| - + CONSIDER consider VV0 |X2.1|X2.4|X6| THE the AT |Z5| @@ -1957008,7 +1957008,7 @@ March march NPM1 |T1.3| . PUNC YSTP |PUNC| - + BRAVE brave JJ |E5+| NEW new JJ |T3-|