mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
synced 2024-12-27 14:14:17 +00:00
Add nlp.text_length option.
This commit is contained in:
parent
2a0662bccc
commit
e578f5a8ff
@ -32,6 +32,10 @@ SPACY_MODELS = {"de": "de_core_news_sm", "en": "en_core_web_sm",
|
|||||||
|
|
||||||
# Set the language model for spacy
|
# Set the language model for spacy
|
||||||
nlp = spacy.load(SPACY_MODELS[args.lang])
|
nlp = spacy.load(SPACY_MODELS[args.lang])
|
||||||
|
# Set maximum character length for input documents. According to documentation
|
||||||
|
# every value above 1000000 (1 Million) can cause memory allocation errors.
|
||||||
|
# We are testing it with 10 Million for now.
|
||||||
|
nlp.max_length = 10000000 # 10 Million character limit
|
||||||
|
|
||||||
# Read text from the input file
|
# Read text from the input file
|
||||||
with open(args.input) as input_file:
|
with open(args.input) as input_file:
|
||||||
|
Loading…
Reference in New Issue
Block a user