mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-26 18:04:18 +00:00
Add description to hocrtotei
This commit is contained in:
parent
b81ad4cc67
commit
46bb0efd14
10
hocrtotei
10
hocrtotei
@ -5,15 +5,17 @@ from xml.sax.saxutils import escape
|
||||
import argparse
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser = argparse.ArgumentParser(description='hocrtotei merges several hOCR files in order of their occurrence on command line to one TEI result file.')
|
||||
parser.add_argument(
|
||||
'i',
|
||||
help='The input files.',
|
||||
nargs='*',
|
||||
metavar='hOCR-sourcefile',
|
||||
help='Input file in hOCR file format.',
|
||||
nargs='+'
|
||||
)
|
||||
parser.add_argument(
|
||||
'o',
|
||||
help='The output file.',
|
||||
metavar='TEI-destfile',
|
||||
help='Output file.'
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
8
ocr
8
ocr
@ -29,13 +29,7 @@ from pyflow import WorkflowRunner
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(
|
||||
'Performs OCR of (historical) documents utilizing OCRopus for \
|
||||
preprocessing and Tesseract OCR for OCR. Available outputs are HOCR, \
|
||||
PDF, shrinked PDF, and simple DTAbf (TEI P5 compliant). Software \
|
||||
requirements: imagemagick, ocropus, pdftoppm, pdfunite, \
|
||||
poppler-utils, pyflow, python2.7, python3.5, tesseract'
|
||||
)
|
||||
parser = argparse.ArgumentParser(description='Performs OCR of (historical) documents utilizing OCRopus for preprocessing and Tesseract OCR for OCR. Available outputs are hOCR, PDF, TEI compliant XML and raw text. Software requirements: imagemagick, ocropus, pdftoppm, pdfunite, poppler-utils, pyflow, python2.7, python3.5, tesseract')
|
||||
parser.add_argument(
|
||||
'-i',
|
||||
dest='inputDirectory',
|
||||
|
Loading…
Reference in New Issue
Block a user