Escape text and lemma

This commit is contained in:
Patrick Jentsch 2020-02-04 13:12:31 +01:00
parent ec2cf1dcff
commit 6769be049a

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3.5
# coding=utf-8
from xml.sax.saxutils import escape
import argparse
import os
import spacy
@ -56,8 +57,8 @@ for text in texts:
# text, lemma, simple_pos, pos, ner
output_file.write(
'{}\t{}\t{}\t{}\t{}\n'.format(
token.text,
token.lemma_,
escape(token.text),
escape(token.lemma_),
token.pos_,
token.tag_,
token.ent_type_ if token.ent_type_ != '' else 'NULL'