From 7f8797d22750b7c54053909f9a6759860b18dd80 Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Fri, 10 Jul 2020 11:36:54 +0200 Subject: [PATCH] Delete files in db model methods. --- web/app/corpora/tasks.py | 64 ++++++-------------------------------- web/app/decorators.py | 4 +-- web/app/jobs/tasks.py | 13 ++++---- web/app/jobs/views.py | 4 --- web/app/models.py | 66 +++++++++++++++++++++++++++++++++++----- web/app/profile/tasks.py | 11 +++---- web/app/results/tasks.py | 12 +++----- web/config.py | 3 +- 8 files changed, 84 insertions(+), 93 deletions(-) diff --git a/web/app/corpora/tasks.py b/web/app/corpora/tasks.py index ec7cd817..97d2e378 100644 --- a/web/app/corpora/tasks.py +++ b/web/app/corpora/tasks.py @@ -1,10 +1,6 @@ -from datetime import datetime from .. import db from ..decorators import background from ..models import Corpus, CorpusFile -import xml.etree.ElementTree as ET -import os -import shutil @background @@ -13,68 +9,26 @@ def build_corpus(corpus_id, *args, **kwargs): with app.app_context(): corpus = Corpus.query.get(corpus_id) if corpus is None: - return - corpus.status = 'File processing' - db.session.commit() - corpus_dir = os.path.join(app.config['NOPAQUE_STORAGE'], - str(corpus.user_id), 'corpora', - str(corpus.id)) - output_dir = os.path.join(corpus_dir, 'merged') - shutil.rmtree(output_dir, ignore_errors=True) - os.mkdir(output_dir) - master_element_tree = ET.ElementTree( - ET.fromstring('\n')) - for corpus_file in corpus.files: - file = os.path.join(corpus_dir, corpus_file.filename) - element_tree = ET.parse(file) - text_node = element_tree.find('text') - text_node.set('address', corpus_file.address or "NULL") - text_node.set('author', corpus_file.author) - text_node.set('booktitle', corpus_file.booktitle or "NULL") - text_node.set('chapter', corpus_file.chapter or "NULL") - text_node.set('editor', corpus_file.editor or "NULL") - text_node.set('institution', corpus_file.institution or "NULL") - text_node.set('journal', corpus_file.journal or "NULL") - text_node.set('pages', corpus_file.pages or "NULL") - text_node.set('publisher', corpus_file.publisher or "NULL") - text_node.set('publishing_year', str(corpus_file.publishing_year)) - text_node.set('school', corpus_file.school or "NULL") - text_node.set('title', corpus_file.title) - element_tree.write(file) - master_element_tree.getroot().insert(1, text_node) - output_file = os.path.join(output_dir, 'corpus.vrt') - master_element_tree.write(output_file, xml_declaration=True, - encoding='utf-8') - corpus.status = 'submitted' - corpus.last_edited_date = datetime.utcnow() + raise Exception('Corpus {} not found'.format(corpus_id)) + corpus.build() db.session.commit() @background def delete_corpus(corpus_id, *args, **kwargs): - app = kwargs['app'] - with app.app_context(): + with kwargs['app'].app_context(): corpus = Corpus.query.get(corpus_id) if corpus is None: - return - path = os.path.join(app.config['NOPAQUE_STORAGE'], str(corpus.user_id), - 'corpora', str(corpus.id)) - shutil.rmtree(path, ignore_errors=True) + raise Exception('Corpus {} not found'.format(corpus_id)) corpus.delete() + db.session.commit() @background def delete_corpus_file(corpus_file_id, *args, **kwargs): - app = kwargs['app'] - with app.app_context(): + with kwargs['app'].app_context(): corpus_file = CorpusFile.query.get(corpus_file_id) if corpus_file is None: - return - path = os.path.join(app.config['NOPAQUE_STORAGE'], corpus_file.dir, - corpus_file.filename) - try: - os.remove(path) - except Exception: - pass - else: - corpus_file.delete() + raise Exception('Corpus file {} not found'.format(corpus_file_id)) + corpus_file.delete() + db.session.commit() diff --git a/web/app/decorators.py b/web/app/decorators.py index daa7012b..4bd2f731 100644 --- a/web/app/decorators.py +++ b/web/app/decorators.py @@ -2,7 +2,6 @@ from . import socketio from flask import abort, current_app, request from flask_login import current_user from functools import wraps -from threading import Thread def admin_required(f): @@ -27,8 +26,7 @@ def background(f): @wraps(f) def wrapped(*args, **kwargs): kwargs['app'] = current_app._get_current_object() - thread = Thread(target=f, args=args, kwargs=kwargs) - thread.start() + thread = socketio.start_background_task(f, *args, **kwargs) return thread return wrapped diff --git a/web/app/jobs/tasks.py b/web/app/jobs/tasks.py index 0daec416..8ab823dc 100644 --- a/web/app/jobs/tasks.py +++ b/web/app/jobs/tasks.py @@ -1,22 +1,23 @@ +from .. import db from ..decorators import background from ..models import Job @background def delete_job(job_id, *args, **kwargs): - app = kwargs['app'] - with app.app_context(): + with kwargs['app'].app_context(): job = Job.query.get(job_id) if job is None: - raise Exception('Could not find job with id {}'.format(job_id)) + raise Exception('Job {} not found'.format(job_id)) job.delete() + db.session.commit() @background def restart_job(job_id, *args, **kwargs): - app = kwargs['app'] - with app.app_context(): + with kwargs['app'].app_context(): job = Job.query.get(job_id) if job is None: - raise Exception('Could not find job with id {}'.format(job_id)) + raise Exception('Job {} not found'.format(job_id)) job.restart() + db.session.commit() diff --git a/web/app/jobs/views.py b/web/app/jobs/views.py index ffc17dd9..557413cc 100644 --- a/web/app/jobs/views.py +++ b/web/app/jobs/views.py @@ -60,10 +60,6 @@ def restart(job_id): else: tasks.restart_job(job_id) flash('Job has been restarted!', 'job') - job_inputs = [dict(filename=input.filename, - id=input.id, - job_id=job.id) - for input in job.inputs] return redirect(url_for('jobs.job', job_id=job_id)) diff --git a/web/app/models.py b/web/app/models.py index 538322d9..d23a25ff 100644 --- a/web/app/models.py +++ b/web/app/models.py @@ -5,6 +5,7 @@ from itsdangerous import BadSignature, TimedJSONWebSignatureSerializer from time import sleep from werkzeug.security import generate_password_hash, check_password_hash from werkzeug.utils import secure_filename +import xml.etree.ElementTree as ET from . import db, login_manager import os import shutil @@ -246,9 +247,10 @@ class User(UserMixin, db.Model): ''' Delete the user and its corpora and jobs from database and filesystem. ''' - + user_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'], + str(self.id)) + shutil.rmtree(user_dir, ignore_errors=True) db.session.delete(self) - db.session.commit() class AnonymousUser(AnonymousUserMixin): @@ -383,7 +385,6 @@ class Job(db.Model): str(self.id)) shutil.rmtree(job_dir, ignore_errors=True) db.session.delete(self) - db.session.commit() def restart(self): ''' @@ -400,7 +401,6 @@ class Job(db.Model): shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True) self.end_date = None self.status = 'submitted' - db.session.commit() def to_dict(self): return {'id': self.id, @@ -504,9 +504,17 @@ class CorpusFile(db.Model): title = db.Column(db.String(255)) def delete(self): - self.corpus.status = 'unprepared' + corpus_file = os.path.join(current_app.config['NOPAQUE_STORAGE'], + str(self.corpus.user_id), + 'corpora', + str(self.corpus_id), + self.filename) + try: + os.remove(corpus_file) + except OSError: + pass db.session.delete(self) - db.session.commit() + self.corpus.status = 'unprepared' def to_dict(self): return {'id': self.id, @@ -557,9 +565,49 @@ class Corpus(db.Model): 'title': self.title, 'files': {file.id: file.to_dict() for file in self.files}} + def build(self): + corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'], + str(self.user_id), + 'corpora', + str(self.id)) + output_dir = os.path.join(corpus_dir, 'merged') + shutil.rmtree(output_dir, ignore_errors=True) + os.mkdir(output_dir) + master_element_tree = ET.ElementTree( + ET.fromstring('\n') + ) + for corpus_file in self.files: + corpus_file_path = os.path.join(corpus_dir, corpus_file.filename) + element_tree = ET.parse(corpus_file_path) + text_node = element_tree.find('text') + text_node.set('address', corpus_file.address or "NULL") + text_node.set('author', corpus_file.author) + text_node.set('booktitle', corpus_file.booktitle or "NULL") + text_node.set('chapter', corpus_file.chapter or "NULL") + text_node.set('editor', corpus_file.editor or "NULL") + text_node.set('institution', corpus_file.institution or "NULL") + text_node.set('journal', corpus_file.journal or "NULL") + text_node.set('pages', corpus_file.pages or "NULL") + text_node.set('publisher', corpus_file.publisher or "NULL") + text_node.set('publishing_year', str(corpus_file.publishing_year)) + text_node.set('school', corpus_file.school or "NULL") + text_node.set('title', corpus_file.title) + element_tree.write(corpus_file_path) + master_element_tree.getroot().insert(1, text_node) + output_file = os.path.join(output_dir, 'corpus.vrt') + master_element_tree.write(output_file, + xml_declaration=True, + encoding='utf-8') + self.last_edited_date = datetime.utcnow() + self.status = 'submitted' + def delete(self): + corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'], + str(self.user_id), + 'corpora', + str(self.id)) + shutil.rmtree(corpus_dir, ignore_errors=True) db.session.delete(self) - db.session.commit() def __repr__(self): ''' @@ -582,8 +630,10 @@ class Result(db.Model): cascade='save-update, merge, delete') def delete(self): + result_file_path = os.path.join(current_app.config['NOPAQUE_STORAGE'], + self.file[0].dir) + shutil.rmtree(result_file_path) db.session.delete(self) - db.session.commit() def __repr__(self): ''' diff --git a/web/app/profile/tasks.py b/web/app/profile/tasks.py index a4f61dac..61f737c5 100644 --- a/web/app/profile/tasks.py +++ b/web/app/profile/tasks.py @@ -1,16 +1,13 @@ +from .. import db from ..decorators import background from ..models import User -import os -import shutil @background def delete_user(user_id, *args, **kwargs): - app = kwargs['app'] - with app.app_context(): + with kwargs['app'].app_context(): user = User.query.get(user_id) if user is None: - raise Exception('User {} not found!'.format(user_id)) - path = os.path.join(app.config['NOPAQUE_STORAGE'], str(user.id)) - shutil.rmtree(path, ignore_errors=True) + raise Exception('User {} not found'.format(user_id)) user.delete() + db.session.commit() diff --git a/web/app/results/tasks.py b/web/app/results/tasks.py index 39e8e4be..d139501f 100644 --- a/web/app/results/tasks.py +++ b/web/app/results/tasks.py @@ -1,17 +1,13 @@ +from .. import db from ..decorators import background from ..models import Result -import os -import shutil @background def delete_result(result_id, *args, **kwargs): - app = kwargs['app'] - with app.app_context(): + with kwargs['app'].app_context(): result = Result.query.get(result_id) if result is None: - return - result_file_path = os.path.join(app.config['NOPAQUE_STORAGE'], - result.file[0].dir) - shutil.rmtree(result_file_path) + raise Exception('Result {} not found'.format(result_id)) result.delete() # cascades down and also deletes ResultFile + db.session.commit() diff --git a/web/config.py b/web/config.py index 4b153b16..6c2143ad 100644 --- a/web/config.py +++ b/web/config.py @@ -33,8 +33,7 @@ class Config: os.makedirs('logs', exist_ok=True) logging.basicConfig(filename='logs/nopaque.log', format='[%(asctime)s] %(levelname)s in ' - '%(name)s/%(filename)s:%(lineno)d - ' - '%(message)s', + '%(pathname)s:%(lineno)d - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filemode='w') ''' ### Security enhancements ### '''