Use language models from repository. Remove workaround for the legacy German Fraktur model.

This commit is contained in:
Patrick Jentsch
2019-07-31 11:13:55 +02:00
parent 1a3d7175fe
commit fa4a798351
3 changed files with 12 additions and 49 deletions

8
ocr
View File

@ -30,7 +30,7 @@ def parse_arguments():
parser.add_argument(
'-l',
choices=[
'deu', 'deu_frak', 'eng', 'enm', 'fra', 'frm', 'ita', 'por', 'spa'
'deu', 'eng', 'enm', 'fra', 'frk', 'frm', 'ita', 'por', 'spa'
],
dest='lang',
required=True
@ -240,12 +240,6 @@ class OCRWorkflow(WorkflowRunner):
' the available core number.
'''
ocr_job_n_cores = min(4, self.n_cores)
'''
' WORKAROUND: Tesseract only uses one core for the deu_frak language
' model, so the workflow will also only reserve one in this case.
'''
if self.lang == "deu_frak":
ocr_job_n_cores = 1
for index, job in enumerate(self.jobs):
files = os.listdir(os.path.join(job['output_dir'], 'tmp'))
if self.skip_binarisation: