mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-27 09:24:19 +00:00
Add description to hocrtotei
This commit is contained in:
parent
b81ad4cc67
commit
46bb0efd14
10
hocrtotei
10
hocrtotei
@ -5,15 +5,17 @@ from xml.sax.saxutils import escape
|
|||||||
import argparse
|
import argparse
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser(description='hocrtotei merges several hOCR files in order of their occurrence on command line to one TEI result file.')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'i',
|
'i',
|
||||||
help='The input files.',
|
metavar='hOCR-sourcefile',
|
||||||
nargs='*',
|
help='Input file in hOCR file format.',
|
||||||
|
nargs='+'
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'o',
|
'o',
|
||||||
help='The output file.',
|
metavar='TEI-destfile',
|
||||||
|
help='Output file.'
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
8
ocr
8
ocr
@ -29,13 +29,7 @@ from pyflow import WorkflowRunner
|
|||||||
|
|
||||||
|
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(description='Performs OCR of (historical) documents utilizing OCRopus for preprocessing and Tesseract OCR for OCR. Available outputs are hOCR, PDF, TEI compliant XML and raw text. Software requirements: imagemagick, ocropus, pdftoppm, pdfunite, poppler-utils, pyflow, python2.7, python3.5, tesseract')
|
||||||
'Performs OCR of (historical) documents utilizing OCRopus for \
|
|
||||||
preprocessing and Tesseract OCR for OCR. Available outputs are HOCR, \
|
|
||||||
PDF, shrinked PDF, and simple DTAbf (TEI P5 compliant). Software \
|
|
||||||
requirements: imagemagick, ocropus, pdftoppm, pdfunite, \
|
|
||||||
poppler-utils, pyflow, python2.7, python3.5, tesseract'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-i',
|
'-i',
|
||||||
dest='inputDirectory',
|
dest='inputDirectory',
|
||||||
|
Loading…
Reference in New Issue
Block a user