From 5139fd9727cf9291e4d077733bd3435d9752a2dd Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Tue, 22 Jun 2021 12:46:01 +0200 Subject: [PATCH] Fix problem where encoding is not set --- spacy-nlp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/spacy-nlp b/spacy-nlp index 7dde3ac..d55bfa0 100755 --- a/spacy-nlp +++ b/spacy-nlp @@ -27,14 +27,12 @@ parser.add_argument('-c', '--check-encoding', help='Check encoding of the input file, UTF-8 is used instead') # noqa args = parser.parse_args() -if args.check_encoding: - with open(args.input, "rb") as text_file: - if args.check_encoding: - encoding = chardet.detect(text_file.read())['encoding'] - else: - encoding = 'utf-8' - with open(args.input, "rb") as text_file: + if args.check_encoding: + encoding = chardet.detect(text_file.read())['encoding'] + else: + encoding = 'utf-8' + text_file.seek(0) text_md5 = hashlib.md5() for chunk in iter(lambda: text_file.read(128 * text_md5.block_size), b''): text_md5.update(chunk)