diff --git a/nlp b/nlp index 74cd2b2..482ca7a 100755 --- a/nlp +++ b/nlp @@ -45,7 +45,7 @@ def parse_arguments(): action='store_true', default=False, dest="check_encoding", - help='''if used the nlp process will know hat the encoding of + help='''if used the nlp process will know hat the encoding of the input files is unkown and thus != utf-8. The process will try to determine the encoding of the input files and use this. encoding.''' @@ -60,7 +60,7 @@ class NLPWorkflow(WorkflowRunner): self.n_cores = args.n_cores self.output_dir = args.output_dir self.zip = args.zip - self.check_encoding + self.check_encoding = args.check_encoding def workflow(self): if len(self.jobs) == 0: @@ -95,8 +95,8 @@ class NLPWorkflow(WorkflowRunner): cmd = 'spacy_nlp -l "{}" "{}" "{}" "{}"'.format( self.lang, job['path'], - os.path.join(job['output_dir'], job['name'] + '.vrt', - if self.check_encoding "--check-encoding" else "") + os.path.join(job['output_dir'], job['name'] + '.vrt'), + "--check-encoding" if self.check_encoding else "" ) nlp_jobs.append( self.addTask( diff --git a/spacy_nlp b/spacy_nlp index 1cb4ac4..d65dd81 100755 --- a/spacy_nlp +++ b/spacy_nlp @@ -3,7 +3,7 @@ from xml.sax.saxutils import escape import argparse -import os +import chardet import spacy import textwrap