mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp.git
synced 2024-12-26 07:54:18 +00:00
Fix last errors
This commit is contained in:
parent
1a3e4a0a02
commit
79043f3dd7
2
nlp
2
nlp
@ -92,7 +92,7 @@ class NLPWorkflow(WorkflowRunner):
|
|||||||
max(1, int(self.n_cores / len(self.jobs)))
|
max(1, int(self.n_cores / len(self.jobs)))
|
||||||
)
|
)
|
||||||
for index, job in enumerate(self.jobs):
|
for index, job in enumerate(self.jobs):
|
||||||
cmd = 'spacy_nlp -l "{}" "{}" "{}" "{}"'.format(
|
cmd = 'spacy_nlp -l "{}" "{}" "{}" {}'.format(
|
||||||
self.lang,
|
self.lang,
|
||||||
job['path'],
|
job['path'],
|
||||||
os.path.join(job['output_dir'], job['name'] + '.vrt'),
|
os.path.join(job['output_dir'], job['name'] + '.vrt'),
|
||||||
|
@ -42,7 +42,7 @@ if args.check_encoding:
|
|||||||
bytes = input_file.read()
|
bytes = input_file.read()
|
||||||
encoding = chardet.detect(bytes)['encoding']
|
encoding = chardet.detect(bytes)['encoding']
|
||||||
else:
|
else:
|
||||||
encoding='utf-8'
|
encoding = 'utf-8'
|
||||||
# Read text from the input file and if neccessary split it into parts with a
|
# Read text from the input file and if neccessary split it into parts with a
|
||||||
# length of less than 1 million characters.
|
# length of less than 1 million characters.
|
||||||
with open(args.i, encoding=encoding) as input_file:
|
with open(args.i, encoding=encoding) as input_file:
|
||||||
|
Loading…
Reference in New Issue
Block a user