mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-03 20:02:47 +00:00 
			
		
		
		
	Merge branch 'development' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into development
This commit is contained in:
		@@ -1,10 +1,6 @@
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from .. import db
 | 
			
		||||
from ..decorators import background
 | 
			
		||||
from ..models import Corpus, CorpusFile
 | 
			
		||||
import xml.etree.ElementTree as ET
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@background
 | 
			
		||||
@@ -13,68 +9,26 @@ def build_corpus(corpus_id, *args, **kwargs):
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
        corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
        if corpus is None:
 | 
			
		||||
            return
 | 
			
		||||
        corpus.status = 'File processing'
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
        corpus_dir = os.path.join(app.config['NOPAQUE_STORAGE'],
 | 
			
		||||
                                  str(corpus.user_id), 'corpora',
 | 
			
		||||
                                  str(corpus.id))
 | 
			
		||||
        output_dir = os.path.join(corpus_dir, 'merged')
 | 
			
		||||
        shutil.rmtree(output_dir, ignore_errors=True)
 | 
			
		||||
        os.mkdir(output_dir)
 | 
			
		||||
        master_element_tree = ET.ElementTree(
 | 
			
		||||
            ET.fromstring('<corpus>\n</corpus>'))
 | 
			
		||||
        for corpus_file in corpus.files:
 | 
			
		||||
            file = os.path.join(corpus_dir, corpus_file.filename)
 | 
			
		||||
            element_tree = ET.parse(file)
 | 
			
		||||
            text_node = element_tree.find('text')
 | 
			
		||||
            text_node.set('address', corpus_file.address or "NULL")
 | 
			
		||||
            text_node.set('author', corpus_file.author)
 | 
			
		||||
            text_node.set('booktitle', corpus_file.booktitle or "NULL")
 | 
			
		||||
            text_node.set('chapter', corpus_file.chapter or "NULL")
 | 
			
		||||
            text_node.set('editor', corpus_file.editor or "NULL")
 | 
			
		||||
            text_node.set('institution', corpus_file.institution or "NULL")
 | 
			
		||||
            text_node.set('journal', corpus_file.journal or "NULL")
 | 
			
		||||
            text_node.set('pages', corpus_file.pages or "NULL")
 | 
			
		||||
            text_node.set('publisher', corpus_file.publisher or "NULL")
 | 
			
		||||
            text_node.set('publishing_year', str(corpus_file.publishing_year))
 | 
			
		||||
            text_node.set('school', corpus_file.school or "NULL")
 | 
			
		||||
            text_node.set('title', corpus_file.title)
 | 
			
		||||
            element_tree.write(file)
 | 
			
		||||
            master_element_tree.getroot().insert(1, text_node)
 | 
			
		||||
        output_file = os.path.join(output_dir, 'corpus.vrt')
 | 
			
		||||
        master_element_tree.write(output_file, xml_declaration=True,
 | 
			
		||||
                                  encoding='utf-8')
 | 
			
		||||
        corpus.status = 'submitted'
 | 
			
		||||
        corpus.last_edited_date = datetime.utcnow()
 | 
			
		||||
            raise Exception('Corpus {} not found'.format(corpus_id))
 | 
			
		||||
        corpus.build()
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@background
 | 
			
		||||
def delete_corpus(corpus_id, *args, **kwargs):
 | 
			
		||||
    app = kwargs['app']
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
    with kwargs['app'].app_context():
 | 
			
		||||
        corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
        if corpus is None:
 | 
			
		||||
            return
 | 
			
		||||
        path = os.path.join(app.config['NOPAQUE_STORAGE'], str(corpus.user_id),
 | 
			
		||||
                            'corpora', str(corpus.id))
 | 
			
		||||
        shutil.rmtree(path, ignore_errors=True)
 | 
			
		||||
            raise Exception('Corpus {} not found'.format(corpus_id))
 | 
			
		||||
        corpus.delete()
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@background
 | 
			
		||||
def delete_corpus_file(corpus_file_id, *args, **kwargs):
 | 
			
		||||
    app = kwargs['app']
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
    with kwargs['app'].app_context():
 | 
			
		||||
        corpus_file = CorpusFile.query.get(corpus_file_id)
 | 
			
		||||
        if corpus_file is None:
 | 
			
		||||
            return
 | 
			
		||||
        path = os.path.join(app.config['NOPAQUE_STORAGE'], corpus_file.dir,
 | 
			
		||||
                            corpus_file.filename)
 | 
			
		||||
        try:
 | 
			
		||||
            os.remove(path)
 | 
			
		||||
        except Exception:
 | 
			
		||||
            pass
 | 
			
		||||
        else:
 | 
			
		||||
            corpus_file.delete()
 | 
			
		||||
            raise Exception('Corpus file {} not found'.format(corpus_file_id))
 | 
			
		||||
        corpus_file.delete()
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,6 @@ from . import socketio
 | 
			
		||||
from flask import abort, current_app, request
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from functools import wraps
 | 
			
		||||
from threading import Thread
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def admin_required(f):
 | 
			
		||||
@@ -27,8 +26,7 @@ def background(f):
 | 
			
		||||
    @wraps(f)
 | 
			
		||||
    def wrapped(*args, **kwargs):
 | 
			
		||||
        kwargs['app'] = current_app._get_current_object()
 | 
			
		||||
        thread = Thread(target=f, args=args, kwargs=kwargs)
 | 
			
		||||
        thread.start()
 | 
			
		||||
        thread = socketio.start_background_task(f, *args, **kwargs)
 | 
			
		||||
        return thread
 | 
			
		||||
    return wrapped
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,22 +1,23 @@
 | 
			
		||||
from .. import db
 | 
			
		||||
from ..decorators import background
 | 
			
		||||
from ..models import Job
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@background
 | 
			
		||||
def delete_job(job_id, *args, **kwargs):
 | 
			
		||||
    app = kwargs['app']
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
    with kwargs['app'].app_context():
 | 
			
		||||
        job = Job.query.get(job_id)
 | 
			
		||||
        if job is None:
 | 
			
		||||
            raise Exception('Could not find job with id {}'.format(job_id))
 | 
			
		||||
            raise Exception('Job {} not found'.format(job_id))
 | 
			
		||||
        job.delete()
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@background
 | 
			
		||||
def restart_job(job_id, *args, **kwargs):
 | 
			
		||||
    app = kwargs['app']
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
    with kwargs['app'].app_context():
 | 
			
		||||
        job = Job.query.get(job_id)
 | 
			
		||||
        if job is None:
 | 
			
		||||
            raise Exception('Could not find job with id {}'.format(job_id))
 | 
			
		||||
            raise Exception('Job {} not found'.format(job_id))
 | 
			
		||||
        job.restart()
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 
 | 
			
		||||
@@ -60,10 +60,6 @@ def restart(job_id):
 | 
			
		||||
    else:
 | 
			
		||||
        tasks.restart_job(job_id)
 | 
			
		||||
        flash('Job has been restarted!', 'job')
 | 
			
		||||
    job_inputs = [dict(filename=input.filename,
 | 
			
		||||
                       id=input.id,
 | 
			
		||||
                       job_id=job.id)
 | 
			
		||||
                  for input in job.inputs]
 | 
			
		||||
    return redirect(url_for('jobs.job', job_id=job_id))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,7 @@ from itsdangerous import BadSignature, TimedJSONWebSignatureSerializer
 | 
			
		||||
from time import sleep
 | 
			
		||||
from werkzeug.security import generate_password_hash, check_password_hash
 | 
			
		||||
from werkzeug.utils import secure_filename
 | 
			
		||||
import xml.etree.ElementTree as ET
 | 
			
		||||
from . import db, login_manager
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
@@ -246,9 +247,10 @@ class User(UserMixin, db.Model):
 | 
			
		||||
        '''
 | 
			
		||||
        Delete the user and its corpora and jobs from database and filesystem.
 | 
			
		||||
        '''
 | 
			
		||||
 | 
			
		||||
        user_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
 | 
			
		||||
                                str(self.id))
 | 
			
		||||
        shutil.rmtree(user_dir, ignore_errors=True)
 | 
			
		||||
        db.session.delete(self)
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AnonymousUser(AnonymousUserMixin):
 | 
			
		||||
@@ -383,7 +385,6 @@ class Job(db.Model):
 | 
			
		||||
                               str(self.id))
 | 
			
		||||
        shutil.rmtree(job_dir, ignore_errors=True)
 | 
			
		||||
        db.session.delete(self)
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
    def restart(self):
 | 
			
		||||
        '''
 | 
			
		||||
@@ -400,7 +401,6 @@ class Job(db.Model):
 | 
			
		||||
        shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True)
 | 
			
		||||
        self.end_date = None
 | 
			
		||||
        self.status = 'submitted'
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
    def to_dict(self):
 | 
			
		||||
        return {'id': self.id,
 | 
			
		||||
@@ -504,9 +504,17 @@ class CorpusFile(db.Model):
 | 
			
		||||
    title = db.Column(db.String(255))
 | 
			
		||||
 | 
			
		||||
    def delete(self):
 | 
			
		||||
        self.corpus.status = 'unprepared'
 | 
			
		||||
        corpus_file = os.path.join(current_app.config['NOPAQUE_STORAGE'],
 | 
			
		||||
                                   str(self.corpus.user_id),
 | 
			
		||||
                                   'corpora',
 | 
			
		||||
                                   str(self.corpus_id),
 | 
			
		||||
                                   self.filename)
 | 
			
		||||
        try:
 | 
			
		||||
            os.remove(corpus_file)
 | 
			
		||||
        except OSError:
 | 
			
		||||
            pass
 | 
			
		||||
        db.session.delete(self)
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
        self.corpus.status = 'unprepared'
 | 
			
		||||
 | 
			
		||||
    def to_dict(self):
 | 
			
		||||
        return {'id': self.id,
 | 
			
		||||
@@ -557,9 +565,49 @@ class Corpus(db.Model):
 | 
			
		||||
                'title': self.title,
 | 
			
		||||
                'files': {file.id: file.to_dict() for file in self.files}}
 | 
			
		||||
 | 
			
		||||
    def build(self):
 | 
			
		||||
        corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
 | 
			
		||||
                                  str(self.user_id),
 | 
			
		||||
                                  'corpora',
 | 
			
		||||
                                  str(self.id))
 | 
			
		||||
        output_dir = os.path.join(corpus_dir, 'merged')
 | 
			
		||||
        shutil.rmtree(output_dir, ignore_errors=True)
 | 
			
		||||
        os.mkdir(output_dir)
 | 
			
		||||
        master_element_tree = ET.ElementTree(
 | 
			
		||||
            ET.fromstring('<corpus>\n</corpus>')
 | 
			
		||||
        )
 | 
			
		||||
        for corpus_file in self.files:
 | 
			
		||||
            corpus_file_path = os.path.join(corpus_dir, corpus_file.filename)
 | 
			
		||||
            element_tree = ET.parse(corpus_file_path)
 | 
			
		||||
            text_node = element_tree.find('text')
 | 
			
		||||
            text_node.set('address', corpus_file.address or "NULL")
 | 
			
		||||
            text_node.set('author', corpus_file.author)
 | 
			
		||||
            text_node.set('booktitle', corpus_file.booktitle or "NULL")
 | 
			
		||||
            text_node.set('chapter', corpus_file.chapter or "NULL")
 | 
			
		||||
            text_node.set('editor', corpus_file.editor or "NULL")
 | 
			
		||||
            text_node.set('institution', corpus_file.institution or "NULL")
 | 
			
		||||
            text_node.set('journal', corpus_file.journal or "NULL")
 | 
			
		||||
            text_node.set('pages', corpus_file.pages or "NULL")
 | 
			
		||||
            text_node.set('publisher', corpus_file.publisher or "NULL")
 | 
			
		||||
            text_node.set('publishing_year', str(corpus_file.publishing_year))
 | 
			
		||||
            text_node.set('school', corpus_file.school or "NULL")
 | 
			
		||||
            text_node.set('title', corpus_file.title)
 | 
			
		||||
            element_tree.write(corpus_file_path)
 | 
			
		||||
            master_element_tree.getroot().insert(1, text_node)
 | 
			
		||||
        output_file = os.path.join(output_dir, 'corpus.vrt')
 | 
			
		||||
        master_element_tree.write(output_file,
 | 
			
		||||
                                  xml_declaration=True,
 | 
			
		||||
                                  encoding='utf-8')
 | 
			
		||||
        self.last_edited_date = datetime.utcnow()
 | 
			
		||||
        self.status = 'submitted'
 | 
			
		||||
 | 
			
		||||
    def delete(self):
 | 
			
		||||
        corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
 | 
			
		||||
                                  str(self.user_id),
 | 
			
		||||
                                  'corpora',
 | 
			
		||||
                                  str(self.id))
 | 
			
		||||
        shutil.rmtree(corpus_dir, ignore_errors=True)
 | 
			
		||||
        db.session.delete(self)
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        '''
 | 
			
		||||
@@ -582,8 +630,10 @@ class Result(db.Model):
 | 
			
		||||
                           cascade='save-update, merge, delete')
 | 
			
		||||
 | 
			
		||||
    def delete(self):
 | 
			
		||||
        result_file_path = os.path.join(current_app.config['NOPAQUE_STORAGE'],
 | 
			
		||||
                                        self.file[0].dir)
 | 
			
		||||
        shutil.rmtree(result_file_path)
 | 
			
		||||
        db.session.delete(self)
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        '''
 | 
			
		||||
 
 | 
			
		||||
@@ -1,16 +1,13 @@
 | 
			
		||||
from .. import db
 | 
			
		||||
from ..decorators import background
 | 
			
		||||
from ..models import User
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@background
 | 
			
		||||
def delete_user(user_id, *args, **kwargs):
 | 
			
		||||
    app = kwargs['app']
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
    with kwargs['app'].app_context():
 | 
			
		||||
        user = User.query.get(user_id)
 | 
			
		||||
        if user is None:
 | 
			
		||||
            raise Exception('User {} not found!'.format(user_id))
 | 
			
		||||
        path = os.path.join(app.config['NOPAQUE_STORAGE'], str(user.id))
 | 
			
		||||
        shutil.rmtree(path, ignore_errors=True)
 | 
			
		||||
            raise Exception('User {} not found'.format(user_id))
 | 
			
		||||
        user.delete()
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 
 | 
			
		||||
@@ -1,17 +1,13 @@
 | 
			
		||||
from .. import db
 | 
			
		||||
from ..decorators import background
 | 
			
		||||
from ..models import Result
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@background
 | 
			
		||||
def delete_result(result_id, *args, **kwargs):
 | 
			
		||||
    app = kwargs['app']
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
    with kwargs['app'].app_context():
 | 
			
		||||
        result = Result.query.get(result_id)
 | 
			
		||||
        if result is None:
 | 
			
		||||
            return
 | 
			
		||||
        result_file_path = os.path.join(app.config['NOPAQUE_STORAGE'],
 | 
			
		||||
                                        result.file[0].dir)
 | 
			
		||||
        shutil.rmtree(result_file_path)
 | 
			
		||||
            raise Exception('Result {} not found'.format(result_id))
 | 
			
		||||
        result.delete()  # cascades down and also deletes ResultFile
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 
 | 
			
		||||
@@ -33,8 +33,7 @@ class Config:
 | 
			
		||||
    os.makedirs('logs', exist_ok=True)
 | 
			
		||||
    logging.basicConfig(filename='logs/nopaque.log',
 | 
			
		||||
                        format='[%(asctime)s] %(levelname)s in '
 | 
			
		||||
                               '%(name)s/%(filename)s:%(lineno)d - '
 | 
			
		||||
                               '%(message)s',
 | 
			
		||||
                               '%(pathname)s:%(lineno)d - %(message)s',
 | 
			
		||||
                        datefmt='%Y-%m-%d %H:%M:%S', filemode='w')
 | 
			
		||||
 | 
			
		||||
    ''' ### Security enhancements ### '''
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user