from datetime import datetime from flask import current_app from flask_login import UserMixin, AnonymousUserMixin from itsdangerous import BadSignature, TimedJSONWebSignatureSerializer from time import sleep from werkzeug.security import generate_password_hash, check_password_hash import xml.etree.ElementTree as ET from . import db, login_manager import logging import os import shutil class Permission: ''' Defines User permissions as integers by the power of 2. User permission can be evaluated using the bitwise operator &. 3 equals to CREATE_JOB and DELETE_JOB and so on. ''' MANAGE_CORPORA = 1 MANAGE_JOBS = 2 # PERMISSION_NAME = 4 # PERMISSION_NAME = 8 ADMIN = 16 class Role(db.Model): ''' Model for the different roles Users can have. Is a one-to-many relationship. A Role can be associated with many User rows. ''' __tablename__ = 'roles' # Primary key id = db.Column(db.Integer, primary_key=True) # Fields default = db.Column(db.Boolean, default=False, index=True) name = db.Column(db.String(64), unique=True) permissions = db.Column(db.Integer) # Relationships users = db.relationship('User', backref='role', lazy='dynamic') def to_dict(self): return {'id': self.id, 'default': self.default, 'name': self.name, 'permissions': self.permissions} def __init__(self, **kwargs): super(Role, self).__init__(**kwargs) if self.permissions is None: self.permissions = 0 def __repr__(self): ''' String representation of the Role. For human readability. ''' return ''.format(self.name) def add_permission(self, perm): ''' Add new permission to Role. Input is a Permission. ''' if not self.has_permission(perm): self.permissions += perm def remove_permission(self, perm): ''' Removes permission from a Role. Input a Permission. ''' if self.has_permission(perm): self.permissions -= perm def reset_permissions(self): ''' Resets permissions to zero. Zero equals no permissions at all. ''' self.permissions = 0 def has_permission(self, perm): ''' Checks if a Role has a specific Permission. Does this with the bitwise operator. ''' return self.permissions & perm == perm @staticmethod def insert_roles(): ''' Inserts roles into the database. This has to be executed befor Users are added to the database. Otherwiese Users will not have a Role assigned to them. Order of the roles dictionary determines the ID of each role. Users have the ID 1 and Administrators have the ID 2. ''' roles = {'User': [Permission.MANAGE_CORPORA, Permission.MANAGE_JOBS], 'Administrator': [Permission.MANAGE_CORPORA, Permission.MANAGE_JOBS, Permission.ADMIN]} default_role = 'User' for r in roles: role = Role.query.filter_by(name=r).first() if role is None: role = Role(name=r) role.reset_permissions() for perm in roles[r]: role.add_permission(perm) role.default = (role.name == default_role) db.session.add(role) db.session.commit() class User(UserMixin, db.Model): ''' Model for Users that are registered to Opaque. ''' __tablename__ = 'users' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys role_id = db.Column(db.Integer, db.ForeignKey('roles.id')) # Fields confirmed = db.Column(db.Boolean, default=False) email = db.Column(db.String(254), unique=True, index=True) last_seen = db.Column(db.DateTime(), default=datetime.utcnow) member_since = db.Column(db.DateTime(), default=datetime.utcnow) password_hash = db.Column(db.String(128)) setting_dark_mode = db.Column(db.Boolean, default=False) setting_job_status_mail_notifications = db.Column(db.String(16), default='end') setting_job_status_site_notifications = db.Column(db.String(16), default='all') username = db.Column(db.String(64), unique=True, index=True) # Relationships corpora = db.relationship('Corpus', backref='creator', lazy='dynamic', cascade='save-update, merge, delete') jobs = db.relationship('Job', backref='creator', lazy='dynamic', cascade='save-update, merge, delete') query_results = db.relationship('QueryResult', backref='creator', cascade='save-update, merge, delete', lazy='dynamic') @property def path(self): return os.path.join(current_app.config['NOPAQUE_DATA_DIR'], str(self.id)) @property def password(self): raise AttributeError('password is not a readable attribute') @password.setter def password(self, password): self.password_hash = generate_password_hash(password) def to_dict(self): return {'id': self.id, 'role_id': self.role_id, 'confirmed': self.confirmed, 'email': self.email, 'last_seen': self.last_seen.timestamp(), 'member_since': self.member_since.timestamp(), 'settings': {'dark_mode': self.setting_dark_mode, 'job_status_mail_notifications': self.setting_job_status_mail_notifications, 'job_status_site_notifications': self.setting_job_status_site_notifications}, 'username': self.username, 'corpora': {corpus.id: corpus.to_dict() for corpus in self.corpora}, 'jobs': {job.id: job.to_dict() for job in self.jobs}, 'query_results': {query_result.id: query_result.to_dict() for query_result in self.query_results}, 'role': self.role.to_dict()} def __repr__(self): ''' String representation of the User. For human readability. ''' return ''.format(self.username) def __init__(self, **kwargs): super(User, self).__init__(**kwargs) if self.role is None: if self.email == current_app.config['NOPAQUE_ADMIN']: self.role = Role.query.filter_by(name='Administrator').first() if self.role is None: self.role = Role.query.filter_by(default=True).first() def generate_confirmation_token(self, expiration=3600): ''' Generates a confirmation token for user confirmation via email. ''' s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY'], expiration) return s.dumps({'confirm': self.id}).decode('utf-8') def generate_reset_token(self, expiration=3600): ''' Generates a reset token for password reset via email. ''' s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY'], expiration) return s.dumps({'reset': self.id}).decode('utf-8') def confirm(self, token): ''' Confirms User if the given token is valid and not expired. ''' s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY']) try: data = s.loads(token.encode('utf-8')) except BadSignature: return False if data.get('confirm') != self.id: return False self.confirmed = True db.session.add(self) return True @staticmethod def reset_password(token, new_password): ''' Resets password for User if the given token is valid and not expired. ''' s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY']) try: data = s.loads(token.encode('utf-8')) except BadSignature: return False user = User.query.get(data.get('reset')) if user is None: return False user.password = new_password db.session.add(user) return True def verify_password(self, password): return check_password_hash(self.password_hash, password) def can(self, perm): ''' Checks if a User with its current role can doe something. Checks if the associated role actually has the needed Permission. ''' return self.role is not None and self.role.has_permission(perm) def is_administrator(self): ''' Checks if User has Admin permissions. ''' return self.can(Permission.ADMIN) def delete(self): ''' Delete the user and its corpora and jobs from database and filesystem. ''' shutil.rmtree(self.path, ignore_errors=True) db.session.delete(self) class AnonymousUser(AnonymousUserMixin): ''' Model replaces the default AnonymousUser. ''' def can(self, permissions): return False def is_administrator(self): return False class JobInput(db.Model): ''' Class to define JobInputs. ''' __tablename__ = 'job_inputs' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys job_id = db.Column(db.Integer, db.ForeignKey('jobs.id')) # Fields filename = db.Column(db.String(255)) @property def path(self): return os.path.join(self.job.path, self.filename) def __repr__(self): ''' String representation of the JobInput. For human readability. ''' return ''.format(self.filename) def to_dict(self): return {'id': self.id, 'job_id': self.job_id, 'filename': self.filename} class JobResult(db.Model): ''' Class to define JobResults. ''' __tablename__ = 'job_results' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys job_id = db.Column(db.Integer, db.ForeignKey('jobs.id')) # Fields filename = db.Column(db.String(255)) @property def path(self): return os.path.join(self.job.path, self.filename) def __repr__(self): ''' String representation of the JobResult. For human readability. ''' return ''.format(self.filename) def to_dict(self): return {'id': self.id, 'job_id': self.job_id, 'filename': self.filename} class Job(db.Model): ''' Class to define Jobs. ''' __tablename__ = 'jobs' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys user_id = db.Column(db.Integer, db.ForeignKey('users.id')) # Fields creation_date = db.Column(db.DateTime(), default=datetime.utcnow) description = db.Column(db.String(255)) end_date = db.Column(db.DateTime()) mem_mb = db.Column(db.Integer) n_cores = db.Column(db.Integer) service = db.Column(db.String(64)) ''' ' Service specific arguments as string list. ' Example: ["-l eng", "--binarize"] ''' service_args = db.Column(db.String(255)) service_version = db.Column(db.String(16)) status = db.Column(db.String(16)) title = db.Column(db.String(32)) # Relationships inputs = db.relationship('JobInput', backref='job', lazy='dynamic', cascade='save-update, merge, delete') results = db.relationship('JobResult', backref='job', lazy='dynamic', cascade='save-update, merge, delete') @property def path(self): return os.path.join(self.creator.path, 'jobs', str(self.id)) def __repr__(self): ''' String representation of the Job. For human readability. ''' return ''.format(self.title) def delete(self): ''' Delete the job and its inputs and results from the database. ''' if self.status not in ['complete', 'failed']: self.status = 'canceling' db.session.commit() while self.status != 'canceled': # In case the daemon handled a job in any way if self.status != 'canceling': self.status = 'canceling' db.session.commit() sleep(1) db.session.refresh(self) shutil.rmtree(self.path, ignore_errors=True) db.session.delete(self) def restart(self): ''' Restart a job - only if the status is failed ''' if self.status != 'failed': raise Exception('Could not restart job: status is not "failed"') shutil.rmtree(os.path.join(self.path, 'output'), ignore_errors=True) shutil.rmtree(os.path.join(self.path, 'pyflow.data'), ignore_errors=True) # noqa self.end_date = None self.status = 'submitted' def to_dict(self): return {'id': self.id, 'user_id': self.user_id, 'creation_date': self.creation_date.timestamp(), 'description': self.description, 'end_date': (self.end_date.timestamp() if self.end_date else None), 'service': {'args': self.service_args, 'name': self.service, 'version': self.service_version}, 'status': self.status, 'title': self.title, 'inputs': {input.id: input.to_dict() for input in self.inputs}, 'results': {result.id: result.to_dict() for result in self.results}} class CorpusFile(db.Model): ''' Class to define Files. ''' __tablename__ = 'corpus_files' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id')) # Fields address = db.Column(db.String(255)) author = db.Column(db.String(255)) booktitle = db.Column(db.String(255)) chapter = db.Column(db.String(255)) editor = db.Column(db.String(255)) filename = db.Column(db.String(255)) institution = db.Column(db.String(255)) journal = db.Column(db.String(255)) pages = db.Column(db.String(255)) publisher = db.Column(db.String(255)) publishing_year = db.Column(db.Integer) school = db.Column(db.String(255)) title = db.Column(db.String(255)) @property def path(self): return os.path.join(self.corpus.path, self.filename) def delete(self): try: os.remove(self.path) except OSError: logging.error('Removing {} led to an OSError!'.format(self.path)) pass db.session.delete(self) self.corpus.status = 'unprepared' def to_dict(self): return {'id': self.id, 'corpus_id': self.corpus_id, 'address': self.address, 'author': self.author, 'booktitle': self.booktitle, 'chapter': self.chapter, 'editor': self.editor, 'filename': self.filename, 'institution': self.institution, 'journal': self.journal, 'pages': self.pages, 'publisher': self.publisher, 'publishing_year': self.publishing_year, 'school': self.school, 'title': self.title} class Corpus(db.Model): ''' Class to define a corpus. ''' __tablename__ = 'corpora' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys user_id = db.Column(db.Integer, db.ForeignKey('users.id')) # Fields creation_date = db.Column(db.DateTime(), default=datetime.utcnow) current_nr_of_tokens = db.Column(db.BigInteger, default=0) description = db.Column(db.String(255)) last_edited_date = db.Column(db.DateTime(), default=datetime.utcnow) max_nr_of_tokens = db.Column(db.BigInteger, default=2147483647) status = db.Column(db.String(16), default='unprepared') title = db.Column(db.String(32)) archive_file = db.Column(db.String(255)) # Relationships files = db.relationship('CorpusFile', backref='corpus', lazy='dynamic', cascade='save-update, merge, delete') @property def path(self): return os.path.join(self.creator.path, 'corpora', str(self.id)) def to_dict(self): return {'id': self.id, 'user_id': self.user_id, 'creation_date': self.creation_date.timestamp(), 'description': self.description, 'status': self.status, 'last_edited_date': self.last_edited_date.timestamp(), 'title': self.title, 'files': {file.id: file.to_dict() for file in self.files}} def build(self): output_dir = os.path.join(self.path, 'merged') shutil.rmtree(output_dir, ignore_errors=True) os.mkdir(output_dir) master_element_tree = ET.ElementTree( ET.fromstring('\n') ) for corpus_file in self.files: element_tree = ET.parse(corpus_file.path) text_node = element_tree.find('text') text_node.set('address', corpus_file.address or "NULL") text_node.set('author', corpus_file.author) text_node.set('booktitle', corpus_file.booktitle or "NULL") text_node.set('chapter', corpus_file.chapter or "NULL") text_node.set('editor', corpus_file.editor or "NULL") text_node.set('institution', corpus_file.institution or "NULL") text_node.set('journal', corpus_file.journal or "NULL") text_node.set('pages', corpus_file.pages or "NULL") text_node.set('publisher', corpus_file.publisher or "NULL") text_node.set('publishing_year', str(corpus_file.publishing_year)) text_node.set('school', corpus_file.school or "NULL") text_node.set('title', corpus_file.title) element_tree.write(corpus_file.path) master_element_tree.getroot().insert(1, text_node) output_file = os.path.join(output_dir, 'corpus.vrt') master_element_tree.write(output_file, xml_declaration=True, encoding='utf-8') self.last_edited_date = datetime.utcnow() self.status = 'submitted' def delete(self): shutil.rmtree(self.path, ignore_errors=True) db.session.delete(self) def __repr__(self): ''' String representation of the corpus. For human readability. ''' return ''.format(self.title) class QueryResult(db.Model): ''' Class to define a corpus analysis result. ''' __tablename__ = 'query_results' # Primary key id = db.Column(db.Integer, primary_key=True) # Foreign keys user_id = db.Column(db.Integer, db.ForeignKey('users.id')) # Fields description = db.Column(db.String(255)) filename = db.Column(db.String(255)) query_metadata = db.Column(db.JSON()) title = db.Column(db.String(32)) @property def path(self): return os.path.join(self.creator.path, 'query_results', str(self.id)) def delete(self): shutil.rmtree(self.path, ignore_errors=True) db.session.delete(self) def to_dict(self): return {'id': self.id, 'user_id': self.user_id, 'description': self.description, 'filename': self.filename, 'query_metadata': self.query_metadata, 'title': self.title} def __repr__(self): ''' String representation of the QueryResult. For human readability. ''' return ''.format(self.title) ''' ' Flask-Login is told to use the application’s custom anonymous user by setting ' its class in the login_manager.anonymous_user attribute. ''' login_manager.anonymous_user = AnonymousUser @login_manager.user_loader def load_user(user_id): return User.query.get(int(user_id))