mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
synced 2025-01-13 13:50:33 +00:00
Remove path from text id.
This commit is contained in:
parent
2b7e8cd48d
commit
19ac9e8999
@ -42,7 +42,7 @@ with open(args.input) as input_file:
|
||||
|
||||
# Create and open the output file
|
||||
output_file = open(args.output, "w+")
|
||||
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n<corpus>\n<text id="' + args.input.rsplit(".", 1)[0] + '">\n')
|
||||
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n<corpus>\n<text id="' + os.path.basename(args.input).rsplit(".", 1)[0] + '">\n')
|
||||
for text in texts:
|
||||
# Run spacy nlp over the text (partial string if above 1 million chars)
|
||||
doc = nlp(text)
|
||||
|
Loading…
x
Reference in New Issue
Block a user