mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
synced 2024-12-25 19:54:18 +00:00
Fix problem where encoding is not set
This commit is contained in:
parent
fd39246e4b
commit
5139fd9727
12
spacy-nlp
12
spacy-nlp
@ -27,14 +27,12 @@ parser.add_argument('-c', '--check-encoding',
|
||||
help='Check encoding of the input file, UTF-8 is used instead') # noqa
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.check_encoding:
|
||||
with open(args.input, "rb") as text_file:
|
||||
if args.check_encoding:
|
||||
encoding = chardet.detect(text_file.read())['encoding']
|
||||
else:
|
||||
encoding = 'utf-8'
|
||||
|
||||
with open(args.input, "rb") as text_file:
|
||||
if args.check_encoding:
|
||||
encoding = chardet.detect(text_file.read())['encoding']
|
||||
else:
|
||||
encoding = 'utf-8'
|
||||
text_file.seek(0)
|
||||
text_md5 = hashlib.md5()
|
||||
for chunk in iter(lambda: text_file.read(128 * text_md5.block_size), b''):
|
||||
text_md5.update(chunk)
|
||||
|
Loading…
Reference in New Issue
Block a user