mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
				synced 2025-10-31 12:52:47 +00:00 
			
		
		
		
	Escape text and lemma
This commit is contained in:
		| @@ -1,6 +1,7 @@ | ||||
| #!/usr/bin/env python3.5 | ||||
| # coding=utf-8 | ||||
|  | ||||
| from xml.sax.saxutils import escape | ||||
| import argparse | ||||
| import os | ||||
| import spacy | ||||
| @@ -56,8 +57,8 @@ for text in texts: | ||||
|             # text, lemma, simple_pos, pos, ner | ||||
|             output_file.write( | ||||
|                 '{}\t{}\t{}\t{}\t{}\n'.format( | ||||
|                     token.text, | ||||
|                     token.lemma_, | ||||
|                     escape(token.text), | ||||
|                     escape(token.lemma_), | ||||
|                     token.pos_, | ||||
|                     token.tag_, | ||||
|                     token.ent_type_ if token.ent_type_ != '' else 'NULL' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user