nopaque/web/app/models.py

671 lines
24 KiB
Python
Raw Normal View History

2019-11-05 09:32:42 +00:00
from datetime import datetime
from flask import current_app
from flask_login import UserMixin, AnonymousUserMixin
2019-08-22 07:35:23 +00:00
from itsdangerous import BadSignature, TimedJSONWebSignatureSerializer
from time import sleep
from werkzeug.security import generate_password_hash, check_password_hash
2020-02-17 10:58:58 +00:00
from werkzeug.utils import secure_filename
2020-07-10 09:36:54 +00:00
import xml.etree.ElementTree as ET
from . import db, login_manager
import os
import shutil
class Permission:
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Defines User permissions as integers by the power of 2. User permission
can be evaluated using the bitwise operator &. 3 equals to CREATE_JOB and
DELETE_JOB and so on.
2020-05-14 13:30:13 +00:00
'''
2020-04-22 07:29:45 +00:00
MANAGE_CORPORA = 1
MANAGE_JOBS = 2
# PERMISSION_NAME = 4
# PERMISSION_NAME = 8
ADMIN = 16
class Role(db.Model):
2020-05-14 13:30:13 +00:00
'''
2019-08-22 07:35:23 +00:00
Model for the different roles Users can have. Is a one-to-many
relationship. A Role can be associated with many User rows.
2020-05-14 13:30:13 +00:00
'''
__tablename__ = 'roles'
2019-08-06 09:47:04 +00:00
# Primary key
id = db.Column(db.Integer, primary_key=True)
2020-04-27 08:30:38 +00:00
# Fields
default = db.Column(db.Boolean, default=False, index=True)
2019-08-06 09:47:04 +00:00
name = db.Column(db.String(64), unique=True)
2020-05-04 10:08:43 +00:00
permissions = db.Column(db.BigInteger)
2019-08-06 09:47:04 +00:00
# Relationships
users = db.relationship('User', backref='role', lazy='dynamic')
def to_dict(self):
return {'id': self.id,
'default': self.default,
'name': self.name,
'permissions': self.permissions}
def __init__(self, **kwargs):
super(Role, self).__init__(**kwargs)
if self.permissions is None:
self.permissions = 0
def __repr__(self):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
String representation of the Role. For human readability.
2020-05-14 13:30:13 +00:00
'''
2020-05-18 09:59:19 +00:00
return '<Role {role_name}>'.format(role_name=self.name)
def add_permission(self, perm):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Add new permission to Role. Input is a Permission.
2020-05-14 13:30:13 +00:00
'''
if not self.has_permission(perm):
self.permissions += perm
def remove_permission(self, perm):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Removes permission from a Role. Input a Permission.
2020-05-14 13:30:13 +00:00
'''
if self.has_permission(perm):
self.permissions -= perm
def reset_permissions(self):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Resets permissions to zero. Zero equals no permissions at all.
2020-05-14 13:30:13 +00:00
'''
self.permissions = 0
def has_permission(self, perm):
2020-05-14 13:30:13 +00:00
'''
Checks if a Role has a specific Permission. Does this with the bitwise
2019-07-11 13:33:48 +00:00
operator.
2020-05-14 13:30:13 +00:00
'''
return self.permissions & perm == perm
@staticmethod
def insert_roles():
2020-05-14 13:30:13 +00:00
'''
2020-04-22 07:29:45 +00:00
Inserts roles into the database. This has to be executed befor Users
are added to the database. Otherwiese Users will not have a Role
assigned to them. Order of the roles dictionary determines the ID of
each role. Users have the ID 1 and Administrators have the ID 2.
2020-05-14 13:30:13 +00:00
'''
2020-04-22 07:29:45 +00:00
roles = {'User': [Permission.MANAGE_CORPORA, Permission.MANAGE_JOBS],
'Administrator': [Permission.MANAGE_CORPORA,
Permission.MANAGE_JOBS, Permission.ADMIN]}
default_role = 'User'
for r in roles:
role = Role.query.filter_by(name=r).first()
if role is None:
role = Role(name=r)
role.reset_permissions()
for perm in roles[r]:
role.add_permission(perm)
role.default = (role.name == default_role)
db.session.add(role)
db.session.commit()
class User(UserMixin, db.Model):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Model for Users that are registered to Opaque.
2020-05-14 13:30:13 +00:00
'''
__tablename__ = 'users'
2019-08-06 09:47:04 +00:00
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
role_id = db.Column(db.Integer, db.ForeignKey('roles.id'))
2020-04-27 08:30:38 +00:00
# Fields
2019-08-06 09:47:04 +00:00
confirmed = db.Column(db.Boolean, default=False)
email = db.Column(db.String(254), unique=True, index=True)
last_seen = db.Column(db.DateTime(), default=datetime.utcnow)
member_since = db.Column(db.DateTime(), default=datetime.utcnow)
password_hash = db.Column(db.String(128))
2020-04-27 08:19:20 +00:00
setting_dark_mode = db.Column(db.Boolean, default=False)
setting_job_status_mail_notifications = db.Column(db.String(16),
default='end')
setting_job_status_site_notifications = db.Column(db.String(16),
default='all')
username = db.Column(db.String(64), unique=True, index=True)
2019-08-06 09:47:04 +00:00
# Relationships
corpora = db.relationship('Corpus', backref='creator', lazy='dynamic',
cascade='save-update, merge, delete')
jobs = db.relationship('Job', backref='creator', lazy='dynamic',
cascade='save-update, merge, delete')
query_results = db.relationship('QueryResult',
backref='creator',
cascade='save-update, merge, delete',
lazy='dynamic')
def to_dict(self):
return {'id': self.id,
'role_id': self.role_id,
'confirmed': self.confirmed,
'email': self.email,
'last_seen': self.last_seen.timestamp(),
'member_since': self.member_since.timestamp(),
'username': self.username,
'settings': {'dark_mode': self.setting_dark_mode,
'job_status_mail_notifications':
self.setting_job_status_mail_notifications,
'job_status_site_notifications':
self.setting_job_status_site_notifications},
'corpora': {corpus.id: corpus.to_dict()
for corpus in self.corpora},
'jobs': {job.id: job.to_dict() for job in self.jobs},
'query_results': {query_result.id: query_result.to_dict()
for query_result in self.query_results}}
def __repr__(self):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
String representation of the User. For human readability.
2020-05-14 13:30:13 +00:00
'''
2020-05-18 09:59:19 +00:00
return '<User {username}>'.format(username=self.username)
def __init__(self, **kwargs):
super(User, self).__init__(**kwargs)
if self.role is None:
if self.email == current_app.config['ADMIN_EMAIL_ADRESS']:
self.role = Role.query.filter_by(name='Administrator').first()
if self.role is None:
self.role = Role.query.filter_by(default=True).first()
2019-07-08 13:59:15 +00:00
def generate_confirmation_token(self, expiration=3600):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Generates a confirmation token for user confirmation via email.
2020-05-14 13:30:13 +00:00
'''
2019-08-22 07:35:23 +00:00
s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY'],
expiration)
2019-07-08 13:59:15 +00:00
return s.dumps({'confirm': self.id}).decode('utf-8')
def generate_reset_token(self, expiration=3600):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Generates a reset token for password reset via email.
2020-05-14 13:30:13 +00:00
'''
2019-08-22 07:35:23 +00:00
s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY'],
expiration)
return s.dumps({'reset': self.id}).decode('utf-8')
2019-07-08 13:59:15 +00:00
def confirm(self, token):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Confirms User if the given token is valid and not expired.
2020-05-14 13:30:13 +00:00
'''
2019-08-22 07:35:23 +00:00
s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY'])
2019-07-08 13:59:15 +00:00
try:
data = s.loads(token.encode('utf-8'))
2019-08-22 07:35:23 +00:00
except BadSignature:
2019-07-08 13:59:15 +00:00
return False
if data.get('confirm') != self.id:
return False
self.confirmed = True
db.session.add(self)
return True
2019-07-08 13:13:32 +00:00
@staticmethod
def reset_password(token, new_password):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Resets password for User if the given token is valid and not expired.
2020-05-14 13:30:13 +00:00
'''
2019-08-22 07:35:23 +00:00
s = TimedJSONWebSignatureSerializer(current_app.config['SECRET_KEY'])
2019-07-08 13:13:32 +00:00
try:
data = s.loads(token.encode('utf-8'))
2019-08-22 07:35:23 +00:00
except BadSignature:
2019-07-08 13:13:32 +00:00
return False
user = User.query.get(data.get('reset'))
if user is None:
return False
user.password = new_password
db.session.add(user)
return True
@property
def password(self):
raise AttributeError('password is not a readable attribute')
@password.setter
def password(self, password):
self.password_hash = generate_password_hash(password)
def verify_password(self, password):
return check_password_hash(self.password_hash, password)
def can(self, perm):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Checks if a User with its current role can doe something. Checks if the
associated role actually has the needed Permission.
2020-05-14 13:30:13 +00:00
'''
return self.role is not None and self.role.has_permission(perm)
def is_administrator(self):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Checks if User has Admin permissions.
2020-05-14 13:30:13 +00:00
'''
return self.can(Permission.ADMIN)
def ping(self):
self.last_seen = datetime.utcnow()
db.session.add(self)
2019-11-14 08:48:30 +00:00
def delete(self):
2020-05-14 13:30:13 +00:00
'''
2019-11-14 08:48:30 +00:00
Delete the user and its corpora and jobs from database and filesystem.
2020-05-14 13:30:13 +00:00
'''
user_dir = os.path.join(current_app.config['DATA_DIR'],
2020-07-10 09:36:54 +00:00
str(self.id))
shutil.rmtree(user_dir, ignore_errors=True)
db.session.delete(self)
2019-09-09 14:17:59 +00:00
class AnonymousUser(AnonymousUserMixin):
2020-05-14 13:30:13 +00:00
'''
2019-07-11 13:33:48 +00:00
Model replaces the default AnonymousUser.
2020-05-14 13:30:13 +00:00
'''
2019-08-06 12:26:22 +00:00
def can(self, permissions):
return False
def is_administrator(self):
return False
2019-10-16 14:52:05 +00:00
class JobInput(db.Model):
2020-05-14 13:30:13 +00:00
'''
2019-10-17 11:26:20 +00:00
Class to define JobInputs.
2020-05-14 13:30:13 +00:00
'''
2019-10-16 14:52:05 +00:00
__tablename__ = 'job_inputs'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
2020-04-27 08:30:38 +00:00
# Fields
dir = db.Column(db.String(255))
filename = db.Column(db.String(255))
2019-10-16 14:52:05 +00:00
2019-10-17 11:26:20 +00:00
def __repr__(self):
2020-05-14 13:30:13 +00:00
'''
2019-10-17 11:26:20 +00:00
String representation of the JobInput. For human readability.
2020-05-14 13:30:13 +00:00
'''
2020-05-18 09:59:19 +00:00
return '<JobInput {filename}>'.format(filename=self.filename)
2019-10-17 11:26:20 +00:00
def to_dict(self):
return {'id': self.id,
'job_id': self.job_id,
'filename': self.filename}
2019-10-17 11:26:20 +00:00
2019-10-16 14:52:05 +00:00
class JobResult(db.Model):
2020-05-14 13:30:13 +00:00
'''
2019-10-17 11:26:20 +00:00
Class to define JobResults.
2020-05-14 13:30:13 +00:00
'''
2019-10-16 14:52:05 +00:00
__tablename__ = 'job_results'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
2020-04-27 08:30:38 +00:00
# Fields
dir = db.Column(db.String(255))
filename = db.Column(db.String(255))
2019-10-16 14:52:05 +00:00
2019-10-17 11:26:20 +00:00
def __repr__(self):
2020-05-14 13:30:13 +00:00
'''
2019-10-17 11:26:20 +00:00
String representation of the JobResult. For human readability.
2020-05-14 13:30:13 +00:00
'''
2020-05-18 09:59:19 +00:00
return '<JobResult {filename}>'.format(filename=self.filename)
2019-10-17 11:26:20 +00:00
def to_dict(self):
return {'id': self.id,
'job_id': self.job_id,
'filename': self.filename}
2019-10-17 11:26:20 +00:00
2019-10-16 14:52:05 +00:00
2019-08-06 09:47:04 +00:00
class Job(db.Model):
2020-05-14 13:30:13 +00:00
'''
2019-08-05 14:45:38 +00:00
Class to define Jobs.
2020-05-14 13:30:13 +00:00
'''
2019-08-05 14:45:38 +00:00
__tablename__ = 'jobs'
2019-08-06 09:47:04 +00:00
# Primary key
2019-08-05 14:45:38 +00:00
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
2020-04-27 08:30:38 +00:00
# Fields
2019-08-06 12:26:22 +00:00
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
2019-08-09 09:48:43 +00:00
description = db.Column(db.String(255))
end_date = db.Column(db.DateTime())
2019-08-09 09:48:43 +00:00
mem_mb = db.Column(db.Integer)
n_cores = db.Column(db.Integer)
secure_filename = db.Column(db.String(32))
2019-08-06 09:47:04 +00:00
service = db.Column(db.String(64))
'''
2019-08-09 09:48:43 +00:00
' Service specific arguments as string list.
2020-07-02 10:01:40 +00:00
' Example: ["-l eng", "--binarize"]
2019-08-06 09:47:04 +00:00
'''
service_args = db.Column(db.String(255))
2019-08-09 09:48:43 +00:00
service_version = db.Column(db.String(16))
status = db.Column(db.String(16))
2019-08-06 09:47:04 +00:00
title = db.Column(db.String(32))
2019-10-16 14:52:05 +00:00
# Relationships
2019-11-14 08:48:30 +00:00
inputs = db.relationship('JobInput', backref='job', lazy='dynamic',
2019-10-16 14:52:05 +00:00
cascade='save-update, merge, delete')
2019-11-14 08:48:30 +00:00
results = db.relationship('JobResult', backref='job', lazy='dynamic',
2019-10-16 14:52:05 +00:00
cascade='save-update, merge, delete')
2020-05-14 13:30:13 +00:00
notification_data = db.relationship('NotificationData',
cascade='save-update, merge, delete',
uselist=False,
back_populates='job') # One-to-One relationship
notification_email_data = db.relationship('NotificationEmailData',
cascade='save-update, merge, delete',
back_populates='job')
2019-08-05 14:45:38 +00:00
def __repr__(self):
2020-05-14 13:30:13 +00:00
'''
2019-08-05 14:45:38 +00:00
String representation of the Job. For human readability.
2020-05-14 13:30:13 +00:00
'''
2020-05-18 09:59:19 +00:00
return '<Job {job_title}>'.format(job_title=self.title)
2019-08-05 14:45:38 +00:00
2020-02-17 10:58:58 +00:00
def create_secure_filename(self):
2020-05-14 13:30:13 +00:00
'''
2020-02-17 10:58:58 +00:00
Takes the job.title string nad cratesa a secure filename from this.
2020-05-14 13:30:13 +00:00
'''
self.secure_filename = secure_filename(self.title)
2020-02-17 10:58:58 +00:00
2019-11-14 08:48:30 +00:00
def delete(self):
2020-05-14 13:30:13 +00:00
'''
Delete the job and its inputs and results from the database.
2020-05-14 13:30:13 +00:00
'''
if self.status not in ['complete', 'failed']:
self.status = 'canceling'
db.session.commit()
while self.status != 'canceled':
# In case the daemon handled a job in any way
if self.status != 'canceling':
self.status = 'canceling'
db.session.commit()
sleep(1)
db.session.refresh(self)
job_dir = os.path.join(current_app.config['DATA_DIR'],
str(self.user_id),
'jobs',
str(self.id))
shutil.rmtree(job_dir, ignore_errors=True)
2019-11-14 08:48:30 +00:00
db.session.delete(self)
def restart(self):
'''
Restart a job - only if the status is failed
'''
if self.status != 'failed':
raise Exception('Could not restart job: status is not "failed"')
job_dir = os.path.join(current_app.config['DATA_DIR'],
str(self.user_id),
'jobs',
str(self.id))
shutil.rmtree(os.path.join(job_dir, 'output'), ignore_errors=True)
shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True)
self.end_date = None
self.status = 'submitted'
def to_dict(self):
return {'id': self.id,
'user_id': self.user_id,
'creation_date': self.creation_date.timestamp(),
'description': self.description,
'end_date': (self.end_date.timestamp() if self.end_date else
None),
'inputs': {input.id: input.to_dict() for input in self.inputs},
'mem_mb': self.mem_mb,
'n_cores': self.n_cores,
'results': {result.id: result.to_dict()
for result in self.results},
'service': self.service,
'service_args': self.service_args,
'service_version': self.service_version,
'status': self.status,
'title': self.title}
2019-08-05 14:45:38 +00:00
2020-05-11 14:09:09 +00:00
class NotificationData(db.Model):
2020-05-14 13:30:13 +00:00
'''
2020-05-11 14:09:09 +00:00
Class to define notification data used for sending a notification mail with
nopaque_notify.
2020-05-14 13:30:13 +00:00
'''
__tablename__ = 'notification_data'
2020-05-11 14:09:09 +00:00
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign Key
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
2020-05-14 13:30:13 +00:00
# relationships
job = db.relationship('Job', back_populates='notification_data')
2020-05-11 14:09:09 +00:00
# Fields
2020-05-14 13:30:13 +00:00
notified_on = db.Column(db.String(16), default=None)
def __repr__(self):
'''
String representation of the NotificationData. For human readability.
'''
2020-05-18 09:59:19 +00:00
return '<NotificationData {id}>'.format(id=self.id)
2020-05-14 13:30:13 +00:00
def to_dict(self):
return {'id': self.id,
'job_id': self.job_id,
'job': self.job,
'notified': self.notified}
class NotificationEmailData(db.Model):
'''
Class to define data that will be used to send a corresponding Notification
via email.
'''
__tablename__ = 'notification_email_data'
# Primary Key
id = db.Column(db.Integer, primary_key=True)
# Foreign Key
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
# relationships
job = db.relationship('Job', back_populates='notification_email_data')
notify_status = db.Column(db.String(16), default=None)
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
2020-05-11 14:09:09 +00:00
2020-05-15 12:43:12 +00:00
def __repr__(self):
'''
String representation of the NotificationEmailData. For human readability.
'''
2020-05-18 11:46:12 +00:00
return '<NotificationData {id}>'.format(id=self.id)
2020-05-15 12:43:12 +00:00
def to_dict(self):
return {'id': self.id,
'job_id': self.job_id,
'job': self.job,
'notify_status': self.notify_status,
'creation_date': self.creation_date}
2020-05-11 14:09:09 +00:00
2019-10-16 14:52:05 +00:00
class CorpusFile(db.Model):
2020-05-14 13:30:13 +00:00
'''
2019-10-16 14:52:05 +00:00
Class to define Files.
2020-05-14 13:30:13 +00:00
'''
2019-10-16 14:52:05 +00:00
__tablename__ = 'corpus_files'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
2020-04-27 08:30:38 +00:00
# Fields
2020-01-08 15:02:42 +00:00
address = db.Column(db.String(255))
author = db.Column(db.String(255))
booktitle = db.Column(db.String(255))
chapter = db.Column(db.String(255))
dir = db.Column(db.String(255))
2020-01-08 15:02:42 +00:00
editor = db.Column(db.String(255))
filename = db.Column(db.String(255))
2020-01-08 15:02:42 +00:00
institution = db.Column(db.String(255))
journal = db.Column(db.String(255))
pages = db.Column(db.String(255))
publisher = db.Column(db.String(255))
publishing_year = db.Column(db.Integer)
2020-01-08 15:02:42 +00:00
school = db.Column(db.String(255))
title = db.Column(db.String(255))
2019-10-16 14:52:05 +00:00
2019-10-30 07:28:52 +00:00
def delete(self):
corpus_file_path = os.path.join(current_app.config['DATA_DIR'],
str(self.corpus.user_id),
'corpora',
str(self.corpus_id),
self.filename)
2020-07-10 09:36:54 +00:00
try:
os.remove(corpus_file_path)
2020-07-10 09:36:54 +00:00
except OSError:
pass
2019-10-30 07:28:52 +00:00
db.session.delete(self)
2020-07-10 09:36:54 +00:00
self.corpus.status = 'unprepared'
2019-10-30 07:28:52 +00:00
def to_dict(self):
return {'id': self.id,
'corpus_id': self.corpus_id,
'address': self.address,
'author': self.author,
'booktitle': self.booktitle,
'chapter': self.chapter,
'editor': self.editor,
'filename': self.filename,
'institution': self.institution,
'journal': self.journal,
'pages': self.pages,
'publisher': self.publisher,
'publishing_year': self.publishing_year,
'school': self.school,
'title': self.title}
2019-10-16 14:52:05 +00:00
2019-08-06 10:06:41 +00:00
class Corpus(db.Model):
2020-05-14 13:30:13 +00:00
'''
2019-08-06 10:06:41 +00:00
Class to define a corpus.
2020-05-14 13:30:13 +00:00
'''
2019-08-06 10:06:41 +00:00
__tablename__ = 'corpora'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
2020-04-27 08:30:38 +00:00
# Fields
2019-08-06 13:41:07 +00:00
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
2020-05-04 10:08:43 +00:00
current_nr_of_tokens = db.Column(db.BigInteger, default=0)
description = db.Column(db.String(255))
last_edited_date = db.Column(db.DateTime(), default=datetime.utcnow)
2020-05-04 10:08:43 +00:00
max_nr_of_tokens = db.Column(db.BigInteger, default=2147483647)
2019-11-04 14:35:09 +00:00
status = db.Column(db.String(16))
2019-08-06 10:06:41 +00:00
title = db.Column(db.String(32))
2019-10-16 14:52:05 +00:00
# Relationships
2019-11-14 08:48:30 +00:00
files = db.relationship('CorpusFile', backref='corpus', lazy='dynamic',
2019-10-16 14:52:05 +00:00
cascade='save-update, merge, delete')
2019-08-06 10:06:41 +00:00
def to_dict(self):
return {'id': self.id,
'user_id': self.user_id,
2019-08-23 13:05:01 +00:00
'creation_date': self.creation_date.timestamp(),
'description': self.description,
2019-11-05 14:35:51 +00:00
'status': self.status,
'last_edited_date': self.last_edited_date.timestamp(),
'title': self.title,
'files': {file.id: file.to_dict() for file in self.files}}
2019-08-06 10:06:41 +00:00
2020-07-10 09:36:54 +00:00
def build(self):
corpus_dir = os.path.join(current_app.config['DATA_DIR'],
2020-07-10 09:36:54 +00:00
str(self.user_id),
'corpora',
str(self.id))
output_dir = os.path.join(corpus_dir, 'merged')
shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)
master_element_tree = ET.ElementTree(
ET.fromstring('<corpus>\n</corpus>')
)
for corpus_file in self.files:
corpus_file_path = os.path.join(corpus_dir, corpus_file.filename)
element_tree = ET.parse(corpus_file_path)
text_node = element_tree.find('text')
text_node.set('address', corpus_file.address or "NULL")
text_node.set('author', corpus_file.author)
text_node.set('booktitle', corpus_file.booktitle or "NULL")
text_node.set('chapter', corpus_file.chapter or "NULL")
text_node.set('editor', corpus_file.editor or "NULL")
text_node.set('institution', corpus_file.institution or "NULL")
text_node.set('journal', corpus_file.journal or "NULL")
text_node.set('pages', corpus_file.pages or "NULL")
text_node.set('publisher', corpus_file.publisher or "NULL")
text_node.set('publishing_year', str(corpus_file.publishing_year))
text_node.set('school', corpus_file.school or "NULL")
text_node.set('title', corpus_file.title)
element_tree.write(corpus_file_path)
master_element_tree.getroot().insert(1, text_node)
output_file = os.path.join(output_dir, 'corpus.vrt')
master_element_tree.write(output_file,
xml_declaration=True,
encoding='utf-8')
self.last_edited_date = datetime.utcnow()
self.status = 'submitted'
2019-10-30 07:28:52 +00:00
def delete(self):
corpus_dir = os.path.join(current_app.config['DATA_DIR'],
2020-07-10 09:36:54 +00:00
str(self.user_id),
'corpora',
str(self.id))
shutil.rmtree(corpus_dir, ignore_errors=True)
db.session.delete(self)
def __repr__(self):
2020-05-14 13:30:13 +00:00
'''
String representation of the corpus. For human readability.
2020-05-14 13:30:13 +00:00
'''
2020-05-18 09:59:19 +00:00
return '<Corpus {corpus_title}>'.format(corpus_title=self.title)
2019-11-04 14:06:54 +00:00
2019-08-22 07:35:23 +00:00
class QueryResult(db.Model):
'''
Class to define a corpus analysis result.
'''
__tablename__ = 'query_results'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
description = db.Column(db.String(255))
filename = db.Column(db.String(255))
query_metadata = db.Column(db.JSON())
title = db.Column(db.String(32))
def delete(self):
query_result_dir = os.path.join(current_app.config['DATA_DIR'],
str(self.user_id),
'query_results',
str(self.id))
shutil.rmtree(query_result_dir, ignore_errors=True)
db.session.delete(self)
def to_dict(self):
return {'id': self.id,
'user_id': self.user_id,
'description': self.description,
'filename': self.filename,
2020-07-15 09:07:03 +00:00
'query_metadata': self.query_metadata,
'title': self.title}
def __repr__(self):
'''
String representation of the CorpusAnalysisResult. For human readability.
'''
return '<QueryResult {}>'.format(self.title)
2019-08-06 09:47:04 +00:00
'''
' Flask-Login is told to use the applications custom anonymous user by setting
' its class in the login_manager.anonymous_user attribute.
'''
login_manager.anonymous_user = AnonymousUser
@login_manager.user_loader
def load_user(user_id):
return User.query.get(int(user_id))