mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
				synced 2025-10-31 20:43:14 +00:00 
			
		
		
		
	Add nlp.text_length option.
This commit is contained in:
		| @@ -32,6 +32,10 @@ SPACY_MODELS = {"de": "de_core_news_sm", "en": "en_core_web_sm", | ||||
|  | ||||
| # Set the language model for spacy | ||||
| nlp = spacy.load(SPACY_MODELS[args.lang]) | ||||
| # Set maximum character length for input documents. According to documentation | ||||
| # every value above 1000000 (1 Million) can cause memory allocation errors. | ||||
| # We are testing it with 10 Million for now. | ||||
| nlp.max_length = 10000000  # 10 Million character limit | ||||
|  | ||||
| # Read text from the input file | ||||
| with open(args.input) as input_file: | ||||
| @@ -56,4 +60,4 @@ for sent in doc.sents: | ||||
|                           + (token.ent_type_ if token.ent_type_ != "" else "NULL") + "\n") | ||||
|     output_file.write('</s>\n') | ||||
| output_file.write('</text>\n</corpus>') | ||||
| output_file.close() | ||||
| output_file.close() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user