diff --git a/spacy_nlp b/spacy_nlp index bd3921c..046ae10 100755 --- a/spacy_nlp +++ b/spacy_nlp @@ -32,6 +32,10 @@ SPACY_MODELS = {"de": "de_core_news_sm", "en": "en_core_web_sm", # Set the language model for spacy nlp = spacy.load(SPACY_MODELS[args.lang]) +# Set maximum character length for input documents. According to documentation +# every value above 1000000 (1 Million) can cause memory allocation errors. +# We are testing it with 10 Million for now. +nlp.max_length = 10000000 # 10 Million character limit # Read text from the input file with open(args.input) as input_file: @@ -56,4 +60,4 @@ for sent in doc.sents: + (token.ent_type_ if token.ent_type_ != "" else "NULL") + "\n") output_file.write('\n') output_file.write('\n') -output_file.close() \ No newline at end of file +output_file.close()