mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
synced 2024-12-26 22:54:17 +00:00
Add nlp.text_length option.
This commit is contained in:
parent
2a0662bccc
commit
e578f5a8ff
@ -32,6 +32,10 @@ SPACY_MODELS = {"de": "de_core_news_sm", "en": "en_core_web_sm",
|
||||
|
||||
# Set the language model for spacy
|
||||
nlp = spacy.load(SPACY_MODELS[args.lang])
|
||||
# Set maximum character length for input documents. According to documentation
|
||||
# every value above 1000000 (1 Million) can cause memory allocation errors.
|
||||
# We are testing it with 10 Million for now.
|
||||
nlp.max_length = 10000000 # 10 Million character limit
|
||||
|
||||
# Read text from the input file
|
||||
with open(args.input) as input_file:
|
||||
@ -56,4 +60,4 @@ for sent in doc.sents:
|
||||
+ (token.ent_type_ if token.ent_type_ != "" else "NULL") + "\n")
|
||||
output_file.write('</s>\n')
|
||||
output_file.write('</text>\n</corpus>')
|
||||
output_file.close()
|
||||
output_file.close()
|
||||
|
Loading…
Reference in New Issue
Block a user