From 5139fd9727cf9291e4d077733bd3435d9752a2dd Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Tue, 22 Jun 2021 12:46:01 +0200
Subject: [PATCH] Fix problem where encoding is not set
---
spacy-nlp | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/spacy-nlp b/spacy-nlp
index 7dde3ac..d55bfa0 100755
--- a/spacy-nlp
+++ b/spacy-nlp
@@ -27,14 +27,12 @@ parser.add_argument('-c', '--check-encoding',
help='Check encoding of the input file, UTF-8 is used instead') # noqa
args = parser.parse_args()
-if args.check_encoding:
- with open(args.input, "rb") as text_file:
- if args.check_encoding:
- encoding = chardet.detect(text_file.read())['encoding']
- else:
- encoding = 'utf-8'
-
with open(args.input, "rb") as text_file:
+ if args.check_encoding:
+ encoding = chardet.detect(text_file.read())['encoding']
+ else:
+ encoding = 'utf-8'
+ text_file.seek(0)
text_md5 = hashlib.md5()
for chunk in iter(lambda: text_file.read(128 * text_md5.block_size), b''):
text_md5.update(chunk)