From 46bb0efd14b1e1ce18e25d4a5a51d0c250edc422 Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Thu, 16 May 2019 14:59:22 +0200 Subject: [PATCH] Add description to hocrtotei --- hocrtotei | 10 ++++++---- ocr | 8 +------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/hocrtotei b/hocrtotei index f623650..540637d 100755 --- a/hocrtotei +++ b/hocrtotei @@ -5,15 +5,17 @@ from xml.sax.saxutils import escape import argparse import xml.etree.ElementTree as ET -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser(description='hocrtotei merges several hOCR files in order of their occurrence on command line to one TEI result file.') parser.add_argument( 'i', - help='The input files.', - nargs='*', + metavar='hOCR-sourcefile', + help='Input file in hOCR file format.', + nargs='+' ) parser.add_argument( 'o', - help='The output file.', + metavar='TEI-destfile', + help='Output file.' ) args = parser.parse_args() diff --git a/ocr b/ocr index 8c4d30a..9bf65a1 100755 --- a/ocr +++ b/ocr @@ -29,13 +29,7 @@ from pyflow import WorkflowRunner def parse_arguments(): - parser = argparse.ArgumentParser( - 'Performs OCR of (historical) documents utilizing OCRopus for \ - preprocessing and Tesseract OCR for OCR. Available outputs are HOCR, \ - PDF, shrinked PDF, and simple DTAbf (TEI P5 compliant). Software \ - requirements: imagemagick, ocropus, pdftoppm, pdfunite, \ - poppler-utils, pyflow, python2.7, python3.5, tesseract' - ) + parser = argparse.ArgumentParser(description='Performs OCR of (historical) documents utilizing OCRopus for preprocessing and Tesseract OCR for OCR. Available outputs are hOCR, PDF, TEI compliant XML and raw text. Software requirements: imagemagick, ocropus, pdftoppm, pdfunite, poppler-utils, pyflow, python2.7, python3.5, tesseract') parser.add_argument( '-i', dest='inputDirectory',