mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
synced 2024-12-26 08:54:18 +00:00
Escape text and lemma
This commit is contained in:
parent
ec2cf1dcff
commit
6769be049a
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3.5
|
||||
# coding=utf-8
|
||||
|
||||
from xml.sax.saxutils import escape
|
||||
import argparse
|
||||
import os
|
||||
import spacy
|
||||
@ -56,8 +57,8 @@ for text in texts:
|
||||
# text, lemma, simple_pos, pos, ner
|
||||
output_file.write(
|
||||
'{}\t{}\t{}\t{}\t{}\n'.format(
|
||||
token.text,
|
||||
token.lemma_,
|
||||
escape(token.text),
|
||||
escape(token.lemma_),
|
||||
token.pos_,
|
||||
token.tag_,
|
||||
token.ent_type_ if token.ent_type_ != '' else 'NULL'
|
||||
|
Loading…
Reference in New Issue
Block a user