mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr.git
synced 2024-12-25 18:34:18 +00:00
Change output files file format
This commit is contained in:
parent
c640d9743f
commit
f51a8c4546
22
ocr
22
ocr
@ -488,39 +488,41 @@ class MainWorkflow(WorkflowRunner):
|
||||
# Remove temporary directory
|
||||
os.rmdir(job.tmp_dir)
|
||||
# Track output files
|
||||
relative_input = os.path.relpath(job.file, start=self.input_dir)
|
||||
relative_output_dir = os.path.relpath(job.output_dir, start=self.output_dir) # noqa
|
||||
for x in os.listdir(os.path.join(job.output_dir, 'images')):
|
||||
self.output_files.append(
|
||||
{
|
||||
'directory': os.path.join(os.path.relpath(job.output_dir, start=self.output_dir), 'images'), # noqa
|
||||
'filename': x,
|
||||
'input': relative_input,
|
||||
'path': os.path.join(relative_output_dir, 'images', x),
|
||||
'mimetype': 'image/png'
|
||||
}
|
||||
)
|
||||
self.output_files.append(
|
||||
{
|
||||
'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa
|
||||
'filename': '{}.hocr'.format(job.name),
|
||||
'input': relative_input,
|
||||
'path': os.path.join(relative_output_dir, '{}.hocr'.format(job.name)), # noqa
|
||||
'mimetype': 'application/xhtml+xml'
|
||||
}
|
||||
)
|
||||
self.output_files.append(
|
||||
{
|
||||
'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa
|
||||
'filename': '{}.pdf'.format(job.name),
|
||||
'input': relative_input,
|
||||
'filename': os.path.join(relative_output_dir, '{}.pdf'.format(job.name)), # noqa
|
||||
'mimetype': 'application/pdf'
|
||||
}
|
||||
)
|
||||
self.output_files.append(
|
||||
{
|
||||
'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa
|
||||
'filename': '{}.txt'.format(job.name),
|
||||
'input': relative_input,
|
||||
'filename': os.path.join(relative_output_dir, '{}.txt'.format(job.name)), # noqa
|
||||
'mimetype': 'text/plain'
|
||||
}
|
||||
)
|
||||
self.output_files.append(
|
||||
{
|
||||
'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa
|
||||
'filename': '{}.xml'.format(job.name),
|
||||
'input': relative_input,
|
||||
'filename': os.path.join(relative_output_dir, '{}.xml'.format(job.name)), # noqa
|
||||
'mimetype': 'application/tei+xml'
|
||||
}
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user