mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
				synced 2025-10-31 13:02:44 +00:00 
			
		
		
		
	Remove id xml attribute from output file
This commit is contained in:
		
							
								
								
									
										14
									
								
								spacy_nlp
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								spacy_nlp
									
									
									
									
									
								
							| @@ -40,11 +40,9 @@ with open(args.i) as input_file: | ||||
| # Create and open the output file | ||||
| output_file = open(args.o, 'w+') | ||||
|  | ||||
| output_file.write( | ||||
|     '<?xml version="1.0" encoding="UTF-8"?>\n' | ||||
|     '<corpus>\n' | ||||
|     '<text id="{}">\n'.format(os.path.basename(args.i).rsplit(".", 1)[0]) | ||||
| ) | ||||
| output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n' | ||||
|                   '<corpus>\n' | ||||
|                   '<text>\n') | ||||
| for text in texts: | ||||
|     # Run spacy nlp over the text (partial string if above 1 million chars) | ||||
|     doc = nlp(text) | ||||
| @@ -66,9 +64,7 @@ for text in texts: | ||||
|                 ) | ||||
|             ) | ||||
|         output_file.write('</s>\n') | ||||
| output_file.write( | ||||
|     '</text>\n' | ||||
|     '</corpus>' | ||||
| ) | ||||
| output_file.write('</text>\n' | ||||
|                   '</corpus>') | ||||
|  | ||||
| output_file.close() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user