Fix enumeration in readme

Codestyle update
2026-05-31 23:30:40 +00:00 · 2022-01-18 13:46:52 +01:00 · 2022-01-18 13:45:17 +01:00
2 changed files with 10 additions and 10 deletions
@@ -14,10 +14,10 @@ This software implements a heavily parallelized pipeline to recognize text in PD

 1. Install Docker and Python 3.
 2. Clone this repository: `git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git`
-2. Build the Docker image: `docker build -t gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr:v0.1.0 ocr`
-2. Add the wrapper script (`wrapper/ocr` relative to this README file) to your `${PATH}`.
-3. Create working directories for the pipeline: `mkdir -p /<my_data_location>/{input,models,output}`.
-4. Place your Tesseract OCR model(s) inside `/<my_data_location>/models`.
+3. Build the Docker image: `docker build -t gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr:v0.1.0 ocr`
+4. Add the wrapper script (`wrapper/ocr` relative to this README file) to your `${PATH}`.
+5. Create working directories for the pipeline: `mkdir -p /<my_data_location>/{input,models,output}`.
+6. Place your Tesseract OCR model(s) inside `/<my_data_location>/models`.

 ## Use the Pipeline

@@ -385,7 +385,6 @@ class MainWorkflow(WorkflowRunner):
        self.input_dir = input_dir
        self.lang = lang
        self.output_dir = output_dir
-        self.output_files = []
        self.binarize = binarize
        self.jobs = []

@@ -528,33 +527,34 @@ class MainWorkflow(WorkflowRunner):
            create_txt_tasks.append(task)

        self.waitForTasks()
+        output_files = []
        for job in self.jobs:
            # Remove temporary directory
            os.rmdir(job.tmp_dir)
            # Track output files
            relative_output_dir = os.path.relpath(job.output_dir, start=self.output_dir)  # noqa
-            self.output_files.append(
+            output_files.append(
                {
                    'description': 'Post correction package (.png and .hocr).',
                    'file': os.path.join(relative_output_dir, '{}.poco.zip'.format(job.name)),  # noqa
                    'mimetype': 'application/zip'
                }
            )
-            self.output_files.append(
+            output_files.append(
                {
                    'description': 'PDF file with text layer.',
                    'file': os.path.join(relative_output_dir, '{}.pdf'.format(job.name)),  # noqa
                    'mimetype': 'application/pdf'
                }
            )
-            self.output_files.append(
+            output_files.append(
                {
                    'description': 'Plain text file.',
                    'file': os.path.join(relative_output_dir, '{}.txt'.format(job.name)),  # noqa
                    'mimetype': 'text/plain'
                }
            )
-            self.output_files.append(
+            output_files.append(
                {
                    'description': 'TEI compliant XML file.',
                    'file': os.path.join(relative_output_dir, '{}.tei.xml'.format(job.name)),  # noqa
@@ -562,7 +562,7 @@ class MainWorkflow(WorkflowRunner):
                }
            )
        with open(os.path.join(self.output_dir, 'output_records.json'), 'w') as f:  # noqa
-            json.dump(self.output_files, f, indent=4)
+            json.dump(output_files, f, indent=4)


 def parse_args():
Author	SHA1	Message	Date
Patrick Jentsch	aeab9b7802	Fix enumeration in readme	2022-01-18 13:46:52 +01:00
Patrick Jentsch	00c4b17018	Codestyle update	2022-01-18 13:45:17 +01:00