From 6769be049ad45fae51e96b372d51f0cac7ce3049 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Tue, 4 Feb 2020 13:12:31 +0100
Subject: [PATCH] Escape text and lemma
---
spacy_nlp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/spacy_nlp b/spacy_nlp
index e5d086d..0844ad1 100755
--- a/spacy_nlp
+++ b/spacy_nlp
@@ -1,6 +1,7 @@
#!/usr/bin/env python3.5
# coding=utf-8
+from xml.sax.saxutils import escape
import argparse
import os
import spacy
@@ -56,8 +57,8 @@ for text in texts:
# text, lemma, simple_pos, pos, ner
output_file.write(
'{}\t{}\t{}\t{}\t{}\n'.format(
- token.text,
- token.lemma_,
+ escape(token.text),
+ escape(token.lemma_),
token.pos_,
token.tag_,
token.ent_type_ if token.ent_type_ != '' else 'NULL'