mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
synced 2024-12-26 22:14:18 +00:00
Escape text and lemma
This commit is contained in:
parent
ec2cf1dcff
commit
6769be049a
@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python3.5
|
#!/usr/bin/env python3.5
|
||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
|
from xml.sax.saxutils import escape
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import spacy
|
import spacy
|
||||||
@ -56,8 +57,8 @@ for text in texts:
|
|||||||
# text, lemma, simple_pos, pos, ner
|
# text, lemma, simple_pos, pos, ner
|
||||||
output_file.write(
|
output_file.write(
|
||||||
'{}\t{}\t{}\t{}\t{}\n'.format(
|
'{}\t{}\t{}\t{}\t{}\n'.format(
|
||||||
token.text,
|
escape(token.text),
|
||||||
token.lemma_,
|
escape(token.lemma_),
|
||||||
token.pos_,
|
token.pos_,
|
||||||
token.tag_,
|
token.tag_,
|
||||||
token.ent_type_ if token.ent_type_ != '' else 'NULL'
|
token.ent_type_ if token.ent_type_ != '' else 'NULL'
|
||||||
|
Loading…
Reference in New Issue
Block a user