Remove path from text id.

This commit is contained in:
Patrick Jentsch 2019-03-06 18:31:18 +01:00
parent 2b7e8cd48d
commit 19ac9e8999

View File

@ -42,7 +42,7 @@ with open(args.input) as input_file:
# Create and open the output file
output_file = open(args.output, "w+")
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n<corpus>\n<text id="' + args.input.rsplit(".", 1)[0] + '">\n')
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n<corpus>\n<text id="' + os.path.basename(args.input).rsplit(".", 1)[0] + '">\n')
for text in texts:
# Run spacy nlp over the text (partial string if above 1 million chars)
doc = nlp(text)