Add description to hocrtotei

This commit is contained in:
Patrick Jentsch 2019-05-16 14:59:22 +02:00
parent b81ad4cc67
commit 46bb0efd14
2 changed files with 7 additions and 11 deletions

View File

@ -5,15 +5,17 @@ from xml.sax.saxutils import escape
import argparse import argparse
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser(description='hocrtotei merges several hOCR files in order of their occurrence on command line to one TEI result file.')
parser.add_argument( parser.add_argument(
'i', 'i',
help='The input files.', metavar='hOCR-sourcefile',
nargs='*', help='Input file in hOCR file format.',
nargs='+'
) )
parser.add_argument( parser.add_argument(
'o', 'o',
help='The output file.', metavar='TEI-destfile',
help='Output file.'
) )
args = parser.parse_args() args = parser.parse_args()

8
ocr
View File

@ -29,13 +29,7 @@ from pyflow import WorkflowRunner
def parse_arguments(): def parse_arguments():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(description='Performs OCR of (historical) documents utilizing OCRopus for preprocessing and Tesseract OCR for OCR. Available outputs are hOCR, PDF, TEI compliant XML and raw text. Software requirements: imagemagick, ocropus, pdftoppm, pdfunite, poppler-utils, pyflow, python2.7, python3.5, tesseract')
'Performs OCR of (historical) documents utilizing OCRopus for \
preprocessing and Tesseract OCR for OCR. Available outputs are HOCR, \
PDF, shrinked PDF, and simple DTAbf (TEI P5 compliant). Software \
requirements: imagemagick, ocropus, pdftoppm, pdfunite, \
poppler-utils, pyflow, python2.7, python3.5, tesseract'
)
parser.add_argument( parser.add_argument(
'-i', '-i',
dest='inputDirectory', dest='inputDirectory',