Change output files file format

This commit is contained in:
Patrick Jentsch 2022-01-14 10:56:16 +01:00
parent c640d9743f
commit f51a8c4546

22
ocr
View File

@ -488,39 +488,41 @@ class MainWorkflow(WorkflowRunner):
# Remove temporary directory # Remove temporary directory
os.rmdir(job.tmp_dir) os.rmdir(job.tmp_dir)
# Track output files # Track output files
relative_input = os.path.relpath(job.file, start=self.input_dir)
relative_output_dir = os.path.relpath(job.output_dir, start=self.output_dir) # noqa
for x in os.listdir(os.path.join(job.output_dir, 'images')): for x in os.listdir(os.path.join(job.output_dir, 'images')):
self.output_files.append( self.output_files.append(
{ {
'directory': os.path.join(os.path.relpath(job.output_dir, start=self.output_dir), 'images'), # noqa 'input': relative_input,
'filename': x, 'path': os.path.join(relative_output_dir, 'images', x),
'mimetype': 'image/png' 'mimetype': 'image/png'
} }
) )
self.output_files.append( self.output_files.append(
{ {
'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa 'input': relative_input,
'filename': '{}.hocr'.format(job.name), 'path': os.path.join(relative_output_dir, '{}.hocr'.format(job.name)), # noqa
'mimetype': 'application/xhtml+xml' 'mimetype': 'application/xhtml+xml'
} }
) )
self.output_files.append( self.output_files.append(
{ {
'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa 'input': relative_input,
'filename': '{}.pdf'.format(job.name), 'filename': os.path.join(relative_output_dir, '{}.pdf'.format(job.name)), # noqa
'mimetype': 'application/pdf' 'mimetype': 'application/pdf'
} }
) )
self.output_files.append( self.output_files.append(
{ {
'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa 'input': relative_input,
'filename': '{}.txt'.format(job.name), 'filename': os.path.join(relative_output_dir, '{}.txt'.format(job.name)), # noqa
'mimetype': 'text/plain' 'mimetype': 'text/plain'
} }
) )
self.output_files.append( self.output_files.append(
{ {
'directory': os.path.relpath(job.output_dir, start=self.output_dir), # noqa 'input': relative_input,
'filename': '{}.xml'.format(job.name), 'filename': os.path.join(relative_output_dir, '{}.xml'.format(job.name)), # noqa
'mimetype': 'application/tei+xml' 'mimetype': 'application/tei+xml'
} }
) )