diff --git a/hocrtotei b/hocrtotei index f623650..540637d 100755 --- a/hocrtotei +++ b/hocrtotei @@ -5,15 +5,17 @@ from xml.sax.saxutils import escape import argparse import xml.etree.ElementTree as ET -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser(description='hocrtotei merges several hOCR files in order of their occurrence on command line to one TEI result file.') parser.add_argument( 'i', - help='The input files.', - nargs='*', + metavar='hOCR-sourcefile', + help='Input file in hOCR file format.', + nargs='+' ) parser.add_argument( 'o', - help='The output file.', + metavar='TEI-destfile', + help='Output file.' ) args = parser.parse_args() diff --git a/ocr b/ocr index 8c4d30a..9bf65a1 100755 --- a/ocr +++ b/ocr @@ -29,13 +29,7 @@ from pyflow import WorkflowRunner def parse_arguments(): - parser = argparse.ArgumentParser( - 'Performs OCR of (historical) documents utilizing OCRopus for \ - preprocessing and Tesseract OCR for OCR. Available outputs are HOCR, \ - PDF, shrinked PDF, and simple DTAbf (TEI P5 compliant). Software \ - requirements: imagemagick, ocropus, pdftoppm, pdfunite, \ - poppler-utils, pyflow, python2.7, python3.5, tesseract' - ) + parser = argparse.ArgumentParser(description='Performs OCR of (historical) documents utilizing OCRopus for preprocessing and Tesseract OCR for OCR. Available outputs are hOCR, PDF, TEI compliant XML and raw text. Software requirements: imagemagick, ocropus, pdftoppm, pdfunite, poppler-utils, pyflow, python2.7, python3.5, tesseract') parser.add_argument( '-i', dest='inputDirectory',