Add description to hocrtotei

This commit is contained in:
Patrick Jentsch 2019-05-16 14:59:22 +02:00
parent b81ad4cc67
commit 46bb0efd14
2 changed files with 7 additions and 11 deletions

View File

@ -5,15 +5,17 @@ from xml.sax.saxutils import escape
import argparse
import xml.etree.ElementTree as ET
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description='hocrtotei merges several hOCR files in order of their occurrence on command line to one TEI result file.')
parser.add_argument(
'i',
help='The input files.',
nargs='*',
metavar='hOCR-sourcefile',
help='Input file in hOCR file format.',
nargs='+'
)
parser.add_argument(
'o',
help='The output file.',
metavar='TEI-destfile',
help='Output file.'
)
args = parser.parse_args()

8
ocr
View File

@ -29,13 +29,7 @@ from pyflow import WorkflowRunner
def parse_arguments():
parser = argparse.ArgumentParser(
'Performs OCR of (historical) documents utilizing OCRopus for \
preprocessing and Tesseract OCR for OCR. Available outputs are HOCR, \
PDF, shrinked PDF, and simple DTAbf (TEI P5 compliant). Software \
requirements: imagemagick, ocropus, pdftoppm, pdfunite, \
poppler-utils, pyflow, python2.7, python3.5, tesseract'
)
parser = argparse.ArgumentParser(description='Performs OCR of (historical) documents utilizing OCRopus for preprocessing and Tesseract OCR for OCR. Available outputs are hOCR, PDF, TEI compliant XML and raw text. Software requirements: imagemagick, ocropus, pdftoppm, pdfunite, poppler-utils, pyflow, python2.7, python3.5, tesseract')
parser.add_argument(
'-i',
dest='inputDirectory',