From 29ccfac4f6aa392c76a0e4fc5975ef4039234e7a Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Wed, 11 Aug 2021 16:47:29 +0200 Subject: [PATCH] optimizations --- .../stand_off_data/models.py | 83 ++++++++++++++----- 1 file changed, 60 insertions(+), 23 deletions(-) diff --git a/packages/stand-off-data-py/stand_off_data/models.py b/packages/stand-off-data-py/stand_off_data/models.py index 7d6efc3..0206119 100644 --- a/packages/stand-off-data-py/stand_off_data/models.py +++ b/packages/stand-off-data-py/stand_off_data/models.py @@ -2,12 +2,26 @@ from xml.sax.saxutils import escape class StandOffData: - def __init__(self, attrs): + def __init__(self, attrs={}): self.meta = attrs.get('meta', {}) - self.lookup = {tag_definition.id: tag_definition for tag_definition in - [TagDefinition(x) for x in attrs.get('tags', [])]} - self.annotations = [TagAnnotation(x, self.lookup) for x in - attrs.get('annotations', [])] + self.lookup = {} + for x in attrs.get('tags', []): + self.add_tag_definition(x) + self.annotations = [TagAnnotation(x, self.lookup) + for x in attrs.get('annotations', [])] + + def add_tag_definition(self, attrs): + tag_definition = TagDefinition(attrs) + if tag_definition.id in self.lookup: + raise Exception('Tag id already in use: {}'.format(self.to_dict())) + self.lookup[tag_definition.id] = tag_definition + + def to_dict(self): + return { + 'meta': self.meta, + 'lookup': {k: v.to_dict() for k, v in self.lookup.items()}, + 'annotations': [x.to_dict() for x in self.annotations] + } def to_vrt(self, text): # Devide annotations into CWB's verticalized text format (.vrt) logic @@ -88,21 +102,23 @@ class TagAnnotation: def __init__(self, attrs, lookup): self.lookup = lookup self.tag_id = attrs['tag_id'] - if self.tag_id not in self.lookup: - raise Exception('Unknown tag id: {}'.format(self.tag_id)) self.start = attrs['start'] self.end = attrs['end'] - if self.start >= self.end: - raise Exception('start must be lower then end') self.properties = [ - PropertyAnnotation({**x, 'tag_id': self.tag_id}, self.lookup) + PropertyAnnotation(x, self.lookup[self.tag_id].properties) for x in attrs.get('properties', []) ] + ''' Sanity checks ''' + if self.tag_id not in self.lookup: + raise Exception('Unknown tag: {}'.format(self.to_dict())) + if self.end < self.start: + raise Exception('Annotation end less then start: ' + '{}'.format(self.to_dict())) property_ids = [x.property_id for x in self.properties] - for required_property_id in self.lookup[self.tag_id].required_properties: + for required_property_id, required_property in self.lookup[self.tag_id].required_properties.items(): # noqa if required_property_id not in property_ids: - raise Exception( - 'Missing required property: {}'.format(required_property_id)) + raise Exception('Missing required property: ' + '{}'.format(required_property.to_dict())) @property def name(self): @@ -151,15 +167,15 @@ class PropertyAnnotation: def __init__(self, attrs, lookup): self.lookup = lookup self.property_id = attrs['property_id'] - self.tag_id = attrs['tag_id'] - if self.property_id not in self.lookup[self.tag_id].properties: - raise Exception('Unknown property id: {}'.format(self.property_id)) self.value = attrs['value'] # TODO: Process attrs['possibleValues'] as self.labels (no id?) + ''' Sanity checks ''' + if self.property_id not in self.lookup: + raise Exception('Unknown property: {}'.format(self.to_dict())) @property def name(self): - return self.lookup[self.tag_id].properties[self.property_id].name + return self.lookup[self.property_id].name def to_dict(self): return { @@ -174,18 +190,30 @@ class TagDefinition: self.id = attrs['id'] self.name = attrs['name'] self.description = attrs.get('description', '') - self.properties = { - property_definition.id: property_definition - for property_definition in [ - PropertyDefinition(x) for x in attrs.get('properties', []) - ] - } + self.properties = {} + for x in attrs.get('properties', []): + self.add_property_definition(x) + + def add_property_definition(self, attrs): + property_definition = PropertyDefinition(attrs) + if property_definition.id in self.properties: + raise Exception('Property id already in use: ' + '{}'.format(property_definition.to_dict())) + self.properties[property_definition.id] = property_definition @property def required_properties(self): return {property.id: property for property in self.properties.values() if property.is_required} + def to_dict(self): + return { + 'id': self.id, + 'name': self.name, + 'description': self.description, + 'properties': {k: v.to_dict() for k, v in self.properties.items()} + } + class PropertyDefinition: def __init__(self, attrs): @@ -202,3 +230,12 @@ class PropertyDefinition: @property def has_multiple_values(self): return 'multiple' in self.flags + + def to_dict(self): + return { + 'id': self.id, + 'name': self.name, + 'description': self.description, + 'flags': self.flags, + 'labels': self.labels + }