Add nlp.text_length option.

This commit is contained in:
Stephan Porada 2019-03-05 15:01:57 +01:00
parent 2a0662bccc
commit e578f5a8ff

View File

@ -32,6 +32,10 @@ SPACY_MODELS = {"de": "de_core_news_sm", "en": "en_core_web_sm",
# Set the language model for spacy # Set the language model for spacy
nlp = spacy.load(SPACY_MODELS[args.lang]) nlp = spacy.load(SPACY_MODELS[args.lang])
# Set maximum character length for input documents. According to documentation
# every value above 1000000 (1 Million) can cause memory allocation errors.
# We are testing it with 10 Million for now.
nlp.max_length = 10000000 # 10 Million character limit
# Read text from the input file # Read text from the input file
with open(args.input) as input_file: with open(args.input) as input_file: