''' 'generator': { 'name': 'nopaque NLP service', 'version': '1.0.0', 'arguments': { 'check_encoding': args.check_encoding, 'language': args.language } }, 'file': { 'encoding': encoding, 'md5': text_md5.hexdigest(), 'name': os.path.basename(args.input) } ''' class StandOffData: def __init__(self, attrs): self.tags = {tag_definition.id: tag_definition for tag_definition in [TagDefinition(x) for x in attrs.get('tags', [])]} self.annotations = [TagAnnotation(x, self.tags) for x in attrs.get('annotations', [])] class TagAnnotation: def __init__(self, attrs, tag_lookup): self.tag_id = attrs['tag_id'] self.tag_lookup = tag_lookup if self.tag_id not in self.tag_lookup: raise Exception('Unknown tag id: {}'.format(self.tag_id)) self.start = attrs['start'] self.end = attrs['end'] if self.start >= self.end: raise Exception('start must be lower then end') self.description = attrs.get('description', '') self.properties = [ PropertyAnnotation(x, self.tag_lookup[self.tag_id].properties) for x in attrs.get('properties', []) ] for required_property_id in self.tag_lookup[self.tag_id].required_properties: if required_property_id not in self.properties: raise Exception('Missing required property: {}'.format(required_property_id)) @property def name(self): return self.tag_lookup[self.tag_id].name def __lt__(self, other): if self.start == other.start: return self.name == 'token' and other.name != 'token' else: return self.start < other.start def __le__(self, other): if self.start == other.start: return self.name == 'token' or other.name != 'token' else: return self.start < other.start def __eq__(self, other): return self.start == other.start and self.name == other.name def __ne__(self, other): return self.start != other.start and self.name != other.name def __gt__(self, other): if self.start == other.start: return self.name != 'token' and other.name == 'token' else: return self.start > other.start def __ge__(self, other): if self.start == other.start: return self.name != 'token' or other.name == 'token' else: return self.start > other.start class PropertyAnnotation: def __init__(self, attrs, property_lookup): self.property_id = property['property_id'] self.property_lookup = property_lookup if self.property_id not in self.property_lookup: raise Exception('Unknown property id: {}'.format(self.property_id)) self.value = property['value'] # TODO: Process attrs['possibleValues'] as self.labels (no id?) @property def name(self): return self.property_lookup[self.property_id].name class TagDefinition: def __init__(self, attrs): self.id = attrs['id'] self.name = attrs['name'] self.description = attrs.get('description', '') self.properties = { property_definition.id: property_definition for property_definition in [ PropertyDefinition(x) for x in attrs.get('properties', []) ] } @property def required_properties(self): return {property.id: property for property in self.properties if property.is_required} class PropertyDefinition: def __init__(self, attrs): self.id = attrs['id'] self.name = attrs['name'] self.description = attrs.get('description', '') self.flags = attrs.get('flags', []) self.labels = attrs.get('labels', []) @property def is_required(self): return 'required' in self.flags @property def has_multiple_values(self): return 'multiple' in self.flags