mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
synced 2025-01-14 12:10:36 +00:00
Remove path from text id.
This commit is contained in:
parent
2b7e8cd48d
commit
19ac9e8999
@ -42,7 +42,7 @@ with open(args.input) as input_file:
|
|||||||
|
|
||||||
# Create and open the output file
|
# Create and open the output file
|
||||||
output_file = open(args.output, "w+")
|
output_file = open(args.output, "w+")
|
||||||
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n<corpus>\n<text id="' + args.input.rsplit(".", 1)[0] + '">\n')
|
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n<corpus>\n<text id="' + os.path.basename(args.input).rsplit(".", 1)[0] + '">\n')
|
||||||
for text in texts:
|
for text in texts:
|
||||||
# Run spacy nlp over the text (partial string if above 1 million chars)
|
# Run spacy nlp over the text (partial string if above 1 million chars)
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user