Escape text and lemma

This commit is contained in:
Patrick Jentsch 2020-02-04 13:12:31 +01:00
parent ec2cf1dcff
commit 6769be049a

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3.5 #!/usr/bin/env python3.5
# coding=utf-8 # coding=utf-8
from xml.sax.saxutils import escape
import argparse import argparse
import os import os
import spacy import spacy
@ -56,8 +57,8 @@ for text in texts:
# text, lemma, simple_pos, pos, ner # text, lemma, simple_pos, pos, ner
output_file.write( output_file.write(
'{}\t{}\t{}\t{}\t{}\n'.format( '{}\t{}\t{}\t{}\t{}\n'.format(
token.text, escape(token.text),
token.lemma_, escape(token.lemma_),
token.pos_, token.pos_,
token.tag_, token.tag_,
token.ent_type_ if token.ent_type_ != '' else 'NULL' token.ent_type_ if token.ent_type_ != '' else 'NULL'