mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2025-07-02 01:10:35 +00:00
Add description to hocrtotei
This commit is contained in:
8
ocr
8
ocr
@ -29,13 +29,7 @@ from pyflow import WorkflowRunner
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(
|
||||
'Performs OCR of (historical) documents utilizing OCRopus for \
|
||||
preprocessing and Tesseract OCR for OCR. Available outputs are HOCR, \
|
||||
PDF, shrinked PDF, and simple DTAbf (TEI P5 compliant). Software \
|
||||
requirements: imagemagick, ocropus, pdftoppm, pdfunite, \
|
||||
poppler-utils, pyflow, python2.7, python3.5, tesseract'
|
||||
)
|
||||
parser = argparse.ArgumentParser(description='Performs OCR of (historical) documents utilizing OCRopus for preprocessing and Tesseract OCR for OCR. Available outputs are hOCR, PDF, TEI compliant XML and raw text. Software requirements: imagemagick, ocropus, pdftoppm, pdfunite, poppler-utils, pyflow, python2.7, python3.5, tesseract')
|
||||
parser.add_argument(
|
||||
'-i',
|
||||
dest='inputDirectory',
|
||||
|
Reference in New Issue
Block a user