From 6769be049ad45fae51e96b372d51f0cac7ce3049 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch 
Date: Tue, 4 Feb 2020 13:12:31 +0100
Subject: [PATCH] Escape text and lemma
---
 spacy_nlp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/spacy_nlp b/spacy_nlp
index e5d086d..0844ad1 100755
--- a/spacy_nlp
+++ b/spacy_nlp
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3.5
 # coding=utf-8
 
+from xml.sax.saxutils import escape
 import argparse
 import os
 import spacy
@@ -56,8 +57,8 @@ for text in texts:
             # text, lemma, simple_pos, pos, ner
             output_file.write(
                 '{}\t{}\t{}\t{}\t{}\n'.format(
-                    token.text,
-                    token.lemma_,
+                    escape(token.text),
+                    escape(token.lemma_),
                     token.pos_,
                     token.tag_,
                     token.ent_type_ if token.ent_type_ != '' else 'NULL'