diff --git a/web/app/corpora/tasks.py b/web/app/corpora/tasks.py
index ec7cd817..97d2e378 100644
--- a/web/app/corpora/tasks.py
+++ b/web/app/corpora/tasks.py
@@ -1,10 +1,6 @@
-from datetime import datetime
from .. import db
from ..decorators import background
from ..models import Corpus, CorpusFile
-import xml.etree.ElementTree as ET
-import os
-import shutil
@background
@@ -13,68 +9,26 @@ def build_corpus(corpus_id, *args, **kwargs):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
if corpus is None:
- return
- corpus.status = 'File processing'
- db.session.commit()
- corpus_dir = os.path.join(app.config['NOPAQUE_STORAGE'],
- str(corpus.user_id), 'corpora',
- str(corpus.id))
- output_dir = os.path.join(corpus_dir, 'merged')
- shutil.rmtree(output_dir, ignore_errors=True)
- os.mkdir(output_dir)
- master_element_tree = ET.ElementTree(
- ET.fromstring('\n'))
- for corpus_file in corpus.files:
- file = os.path.join(corpus_dir, corpus_file.filename)
- element_tree = ET.parse(file)
- text_node = element_tree.find('text')
- text_node.set('address', corpus_file.address or "NULL")
- text_node.set('author', corpus_file.author)
- text_node.set('booktitle', corpus_file.booktitle or "NULL")
- text_node.set('chapter', corpus_file.chapter or "NULL")
- text_node.set('editor', corpus_file.editor or "NULL")
- text_node.set('institution', corpus_file.institution or "NULL")
- text_node.set('journal', corpus_file.journal or "NULL")
- text_node.set('pages', corpus_file.pages or "NULL")
- text_node.set('publisher', corpus_file.publisher or "NULL")
- text_node.set('publishing_year', str(corpus_file.publishing_year))
- text_node.set('school', corpus_file.school or "NULL")
- text_node.set('title', corpus_file.title)
- element_tree.write(file)
- master_element_tree.getroot().insert(1, text_node)
- output_file = os.path.join(output_dir, 'corpus.vrt')
- master_element_tree.write(output_file, xml_declaration=True,
- encoding='utf-8')
- corpus.status = 'submitted'
- corpus.last_edited_date = datetime.utcnow()
+ raise Exception('Corpus {} not found'.format(corpus_id))
+ corpus.build()
db.session.commit()
@background
def delete_corpus(corpus_id, *args, **kwargs):
- app = kwargs['app']
- with app.app_context():
+ with kwargs['app'].app_context():
corpus = Corpus.query.get(corpus_id)
if corpus is None:
- return
- path = os.path.join(app.config['NOPAQUE_STORAGE'], str(corpus.user_id),
- 'corpora', str(corpus.id))
- shutil.rmtree(path, ignore_errors=True)
+ raise Exception('Corpus {} not found'.format(corpus_id))
corpus.delete()
+ db.session.commit()
@background
def delete_corpus_file(corpus_file_id, *args, **kwargs):
- app = kwargs['app']
- with app.app_context():
+ with kwargs['app'].app_context():
corpus_file = CorpusFile.query.get(corpus_file_id)
if corpus_file is None:
- return
- path = os.path.join(app.config['NOPAQUE_STORAGE'], corpus_file.dir,
- corpus_file.filename)
- try:
- os.remove(path)
- except Exception:
- pass
- else:
- corpus_file.delete()
+ raise Exception('Corpus file {} not found'.format(corpus_file_id))
+ corpus_file.delete()
+ db.session.commit()
diff --git a/web/app/decorators.py b/web/app/decorators.py
index daa7012b..4bd2f731 100644
--- a/web/app/decorators.py
+++ b/web/app/decorators.py
@@ -2,7 +2,6 @@ from . import socketio
from flask import abort, current_app, request
from flask_login import current_user
from functools import wraps
-from threading import Thread
def admin_required(f):
@@ -27,8 +26,7 @@ def background(f):
@wraps(f)
def wrapped(*args, **kwargs):
kwargs['app'] = current_app._get_current_object()
- thread = Thread(target=f, args=args, kwargs=kwargs)
- thread.start()
+ thread = socketio.start_background_task(f, *args, **kwargs)
return thread
return wrapped
diff --git a/web/app/jobs/tasks.py b/web/app/jobs/tasks.py
index 0daec416..8ab823dc 100644
--- a/web/app/jobs/tasks.py
+++ b/web/app/jobs/tasks.py
@@ -1,22 +1,23 @@
+from .. import db
from ..decorators import background
from ..models import Job
@background
def delete_job(job_id, *args, **kwargs):
- app = kwargs['app']
- with app.app_context():
+ with kwargs['app'].app_context():
job = Job.query.get(job_id)
if job is None:
- raise Exception('Could not find job with id {}'.format(job_id))
+ raise Exception('Job {} not found'.format(job_id))
job.delete()
+ db.session.commit()
@background
def restart_job(job_id, *args, **kwargs):
- app = kwargs['app']
- with app.app_context():
+ with kwargs['app'].app_context():
job = Job.query.get(job_id)
if job is None:
- raise Exception('Could not find job with id {}'.format(job_id))
+ raise Exception('Job {} not found'.format(job_id))
job.restart()
+ db.session.commit()
diff --git a/web/app/jobs/views.py b/web/app/jobs/views.py
index ffc17dd9..557413cc 100644
--- a/web/app/jobs/views.py
+++ b/web/app/jobs/views.py
@@ -60,10 +60,6 @@ def restart(job_id):
else:
tasks.restart_job(job_id)
flash('Job has been restarted!', 'job')
- job_inputs = [dict(filename=input.filename,
- id=input.id,
- job_id=job.id)
- for input in job.inputs]
return redirect(url_for('jobs.job', job_id=job_id))
diff --git a/web/app/models.py b/web/app/models.py
index 538322d9..d23a25ff 100644
--- a/web/app/models.py
+++ b/web/app/models.py
@@ -5,6 +5,7 @@ from itsdangerous import BadSignature, TimedJSONWebSignatureSerializer
from time import sleep
from werkzeug.security import generate_password_hash, check_password_hash
from werkzeug.utils import secure_filename
+import xml.etree.ElementTree as ET
from . import db, login_manager
import os
import shutil
@@ -246,9 +247,10 @@ class User(UserMixin, db.Model):
'''
Delete the user and its corpora and jobs from database and filesystem.
'''
-
+ user_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
+ str(self.id))
+ shutil.rmtree(user_dir, ignore_errors=True)
db.session.delete(self)
- db.session.commit()
class AnonymousUser(AnonymousUserMixin):
@@ -383,7 +385,6 @@ class Job(db.Model):
str(self.id))
shutil.rmtree(job_dir, ignore_errors=True)
db.session.delete(self)
- db.session.commit()
def restart(self):
'''
@@ -400,7 +401,6 @@ class Job(db.Model):
shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True)
self.end_date = None
self.status = 'submitted'
- db.session.commit()
def to_dict(self):
return {'id': self.id,
@@ -504,9 +504,17 @@ class CorpusFile(db.Model):
title = db.Column(db.String(255))
def delete(self):
- self.corpus.status = 'unprepared'
+ corpus_file = os.path.join(current_app.config['NOPAQUE_STORAGE'],
+ str(self.corpus.user_id),
+ 'corpora',
+ str(self.corpus_id),
+ self.filename)
+ try:
+ os.remove(corpus_file)
+ except OSError:
+ pass
db.session.delete(self)
- db.session.commit()
+ self.corpus.status = 'unprepared'
def to_dict(self):
return {'id': self.id,
@@ -557,9 +565,49 @@ class Corpus(db.Model):
'title': self.title,
'files': {file.id: file.to_dict() for file in self.files}}
+ def build(self):
+ corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
+ str(self.user_id),
+ 'corpora',
+ str(self.id))
+ output_dir = os.path.join(corpus_dir, 'merged')
+ shutil.rmtree(output_dir, ignore_errors=True)
+ os.mkdir(output_dir)
+ master_element_tree = ET.ElementTree(
+ ET.fromstring('\n')
+ )
+ for corpus_file in self.files:
+ corpus_file_path = os.path.join(corpus_dir, corpus_file.filename)
+ element_tree = ET.parse(corpus_file_path)
+ text_node = element_tree.find('text')
+ text_node.set('address', corpus_file.address or "NULL")
+ text_node.set('author', corpus_file.author)
+ text_node.set('booktitle', corpus_file.booktitle or "NULL")
+ text_node.set('chapter', corpus_file.chapter or "NULL")
+ text_node.set('editor', corpus_file.editor or "NULL")
+ text_node.set('institution', corpus_file.institution or "NULL")
+ text_node.set('journal', corpus_file.journal or "NULL")
+ text_node.set('pages', corpus_file.pages or "NULL")
+ text_node.set('publisher', corpus_file.publisher or "NULL")
+ text_node.set('publishing_year', str(corpus_file.publishing_year))
+ text_node.set('school', corpus_file.school or "NULL")
+ text_node.set('title', corpus_file.title)
+ element_tree.write(corpus_file_path)
+ master_element_tree.getroot().insert(1, text_node)
+ output_file = os.path.join(output_dir, 'corpus.vrt')
+ master_element_tree.write(output_file,
+ xml_declaration=True,
+ encoding='utf-8')
+ self.last_edited_date = datetime.utcnow()
+ self.status = 'submitted'
+
def delete(self):
+ corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
+ str(self.user_id),
+ 'corpora',
+ str(self.id))
+ shutil.rmtree(corpus_dir, ignore_errors=True)
db.session.delete(self)
- db.session.commit()
def __repr__(self):
'''
@@ -582,8 +630,10 @@ class Result(db.Model):
cascade='save-update, merge, delete')
def delete(self):
+ result_file_path = os.path.join(current_app.config['NOPAQUE_STORAGE'],
+ self.file[0].dir)
+ shutil.rmtree(result_file_path)
db.session.delete(self)
- db.session.commit()
def __repr__(self):
'''
diff --git a/web/app/profile/tasks.py b/web/app/profile/tasks.py
index a4f61dac..61f737c5 100644
--- a/web/app/profile/tasks.py
+++ b/web/app/profile/tasks.py
@@ -1,16 +1,13 @@
+from .. import db
from ..decorators import background
from ..models import User
-import os
-import shutil
@background
def delete_user(user_id, *args, **kwargs):
- app = kwargs['app']
- with app.app_context():
+ with kwargs['app'].app_context():
user = User.query.get(user_id)
if user is None:
- raise Exception('User {} not found!'.format(user_id))
- path = os.path.join(app.config['NOPAQUE_STORAGE'], str(user.id))
- shutil.rmtree(path, ignore_errors=True)
+ raise Exception('User {} not found'.format(user_id))
user.delete()
+ db.session.commit()
diff --git a/web/app/results/tasks.py b/web/app/results/tasks.py
index 39e8e4be..d139501f 100644
--- a/web/app/results/tasks.py
+++ b/web/app/results/tasks.py
@@ -1,17 +1,13 @@
+from .. import db
from ..decorators import background
from ..models import Result
-import os
-import shutil
@background
def delete_result(result_id, *args, **kwargs):
- app = kwargs['app']
- with app.app_context():
+ with kwargs['app'].app_context():
result = Result.query.get(result_id)
if result is None:
- return
- result_file_path = os.path.join(app.config['NOPAQUE_STORAGE'],
- result.file[0].dir)
- shutil.rmtree(result_file_path)
+ raise Exception('Result {} not found'.format(result_id))
result.delete() # cascades down and also deletes ResultFile
+ db.session.commit()
diff --git a/web/config.py b/web/config.py
index 4b153b16..6c2143ad 100644
--- a/web/config.py
+++ b/web/config.py
@@ -33,8 +33,7 @@ class Config:
os.makedirs('logs', exist_ok=True)
logging.basicConfig(filename='logs/nopaque.log',
format='[%(asctime)s] %(levelname)s in '
- '%(name)s/%(filename)s:%(lineno)d - '
- '%(message)s',
+ '%(pathname)s:%(lineno)d - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S', filemode='w')
''' ### Security enhancements ### '''