From e1462152fe437c1b8bf5dd4c582d125c71ce8fb0 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch <pjentsch@pjentsch-Laptop.local>
Date: Mon, 20 May 2019 11:10:40 +0200
Subject: [PATCH] Codestyle

---
 hocrtotei | 12 +++++++-----
 ocr       | 18 +++++++++---------
 2 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/hocrtotei b/hocrtotei
index efdf179..96a4045 100755
--- a/hocrtotei
+++ b/hocrtotei
@@ -5,7 +5,9 @@ from xml.sax.saxutils import escape
 import argparse
 import xml.etree.ElementTree as ET
 
-parser = argparse.ArgumentParser(description='hocrtotei merges several hOCR files in order of their occurrence on command line to one TEI result file.')
+parser = argparse.ArgumentParser(
+    description='Merges several hOCR files in order of their occurrence on command line to one TEI result file.'
+)
 parser.add_argument(
     'i',
     metavar='hOCR-sourcefile',
@@ -17,7 +19,7 @@ parser.add_argument(
 )
 args = parser.parse_args()
 
-output_file = open(args.o, "w")
+output_file = open(args.o, 'w')
 
 output_file.write(
       '<?xml version="1.0" encoding="UTF-8"?>\n'
@@ -37,11 +39,11 @@ output_file.write(
 for index, input_file in enumerate(args.i):
     tree = ET.parse(input_file)
     output_file.write('            <pb n="%i"/>\n' % (index + 1))
-    for para in tree.findall(".//*[@class='ocr_par']"):
+    for para in tree.findall('.//*[@class="ocr_par"]'):
         output_file.write('            <p>\n')
-        for line in para.findall(".//*[@class='ocr_line']"):
+        for line in para.findall('.//*[@class="ocr_line"]'):
             first_word_in_line = True
-            for word in line.findall(".//*[@class='ocrx_word']"):
+            for word in line.findall('.//*[@class="ocrx_word"]'):
                 if word.text is not None:
                     output_file.write(('                ' if first_word_in_line else ' ') + escape(word.text.strip()))
                     first_word_in_line = False
diff --git a/ocr b/ocr
index 5783263..ffe63aa 100755
--- a/ocr
+++ b/ocr
@@ -21,7 +21,7 @@ from pyflow import WorkflowRunner
 ''' TODO:
 ' Implement --end-page: Last page to ocr
 ' Implement --memMb: Total amount of memory (RAM) available for this workflow.
-'                    Default: 2048 * nCores
+'                    Default: 2048 * n_cores
 ' Implement --rotate: Rotate pages from input (90, 180, 270)
 ' Implement --split-pages: Split pages in half after possible rotation
 ' Implement --start-page: First page to ocr
@@ -123,7 +123,7 @@ class OCRWorkflow(WorkflowRunner):
         ' ##################################################
         '''
         split_jobs = []
-        split_job_nCores = min(
+        split_job_n_cores = min(
             self.n_cores,
             max(1, int(self.n_cores / len(self.jobs)))
         )
@@ -148,7 +148,7 @@ class OCRWorkflow(WorkflowRunner):
                     command=cmd,
                     dependencies='create_output_directories_job_-_%i' % (index),
                     label='split_job_-_%i' % (index),
-                    nCores=split_job_nCores
+                    nCores=split_job_n_cores
                 )
             )
 
@@ -170,7 +170,7 @@ class OCRWorkflow(WorkflowRunner):
             ' four cores available for this workflow, the available core
             ' number.
             '''
-            binarisation_job_nCores = min(4, self.n_cores)
+            binarisation_job_n_cores = min(4, self.n_cores)
             for index, job in enumerate(self.jobs):
                 files = os.listdir(os.path.join(job['output_dir'], 'tmp'))
                 files = filter(lambda x: x.endswith('.tif'), files)
@@ -181,7 +181,7 @@ class OCRWorkflow(WorkflowRunner):
                 )
                 cmd = 'ocropus-nlbin --output "%s" --parallel "%i" %s' % (
                     os.path.join(job['output_dir'], 'tmp'),
-                    binarisation_job_nCores,
+                    binarisation_job_n_cores,
                     ' '.join(files)
                 )
                 binarisation_jobs.append(
@@ -189,7 +189,7 @@ class OCRWorkflow(WorkflowRunner):
                         command=cmd,
                         dependencies='split_job_-_%i' % (index),
                         label='binarisation_job_-_%i' % (index),
-                        nCores=binarisation_job_nCores
+                        nCores=binarisation_job_n_cores
                     )
                 )
 
@@ -249,13 +249,13 @@ class OCRWorkflow(WorkflowRunner):
         ' or, if there are less then four cores available for this workflow,
         ' the available core number.
         '''
-        ocr_job_nCores = min(4, self.n_cores)
+        ocr_job_n_cores = min(4, self.n_cores)
         '''
         ' WORKAROUND: Tesseract only uses one core for the deu_frak language
         ' model, so the workflow will also only reserve one in this case.
         '''
         if self.lang == "deu_frak":
-            ocr_job_nCores = 1
+            ocr_job_n_cores = 1
         for index, job in enumerate(self.jobs):
             files = os.listdir(os.path.join(job['output_dir'], 'tmp'))
             if self.skip_binarisation:
@@ -293,7 +293,7 @@ class OCRWorkflow(WorkflowRunner):
                         command=cmd,
                         dependencies=ocr_job_dependencies,
                         label='ocr_job_-_%i-%i' % (index, number),
-                        nCores=ocr_job_nCores
+                        nCores=ocr_job_n_cores
                     )
                 )
                 number += 1