Fix corpus building process for vrt files without entities

This commit is contained in:
Patrick Jentsch 2022-11-25 10:46:46 +01:00
parent aff85f2145
commit f8d4b601f7

View File

@ -97,7 +97,7 @@ def normalize_vrt_file(input_file, output_file):
multi_line_tag_definition = False
continue
pos_attrs = line.rstrip('\n').split('\t')
if not has_ent_as_s_attr:
if not has_ent_as_s_attr and len(pos_attrs) > 4:
if pos_attrs[4].lower() in ['null', 'none']:
if current_ent:
output_vrt += '</ent>\n'