mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
synced 2024-12-26 22:24:17 +00:00
Fix problem where encoding is not set
This commit is contained in:
parent
fd39246e4b
commit
5139fd9727
@ -27,14 +27,12 @@ parser.add_argument('-c', '--check-encoding',
|
|||||||
help='Check encoding of the input file, UTF-8 is used instead') # noqa
|
help='Check encoding of the input file, UTF-8 is used instead') # noqa
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.check_encoding:
|
|
||||||
with open(args.input, "rb") as text_file:
|
with open(args.input, "rb") as text_file:
|
||||||
if args.check_encoding:
|
if args.check_encoding:
|
||||||
encoding = chardet.detect(text_file.read())['encoding']
|
encoding = chardet.detect(text_file.read())['encoding']
|
||||||
else:
|
else:
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
text_file.seek(0)
|
||||||
with open(args.input, "rb") as text_file:
|
|
||||||
text_md5 = hashlib.md5()
|
text_md5 = hashlib.md5()
|
||||||
for chunk in iter(lambda: text_file.read(128 * text_md5.block_size), b''):
|
for chunk in iter(lambda: text_file.read(128 * text_md5.block_size), b''):
|
||||||
text_md5.update(chunk)
|
text_md5.update(chunk)
|
||||||
|
Loading…
Reference in New Issue
Block a user