Merge branch 'development' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into development

This commit is contained in:
Stephan Porada 2020-07-10 11:39:21 +02:00
commit 94701ab3b0
8 changed files with 84 additions and 93 deletions

View File

@ -1,10 +1,6 @@
from datetime import datetime
from .. import db from .. import db
from ..decorators import background from ..decorators import background
from ..models import Corpus, CorpusFile from ..models import Corpus, CorpusFile
import xml.etree.ElementTree as ET
import os
import shutil
@background @background
@ -13,68 +9,26 @@ def build_corpus(corpus_id, *args, **kwargs):
with app.app_context(): with app.app_context():
corpus = Corpus.query.get(corpus_id) corpus = Corpus.query.get(corpus_id)
if corpus is None: if corpus is None:
return raise Exception('Corpus {} not found'.format(corpus_id))
corpus.status = 'File processing' corpus.build()
db.session.commit()
corpus_dir = os.path.join(app.config['NOPAQUE_STORAGE'],
str(corpus.user_id), 'corpora',
str(corpus.id))
output_dir = os.path.join(corpus_dir, 'merged')
shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)
master_element_tree = ET.ElementTree(
ET.fromstring('<corpus>\n</corpus>'))
for corpus_file in corpus.files:
file = os.path.join(corpus_dir, corpus_file.filename)
element_tree = ET.parse(file)
text_node = element_tree.find('text')
text_node.set('address', corpus_file.address or "NULL")
text_node.set('author', corpus_file.author)
text_node.set('booktitle', corpus_file.booktitle or "NULL")
text_node.set('chapter', corpus_file.chapter or "NULL")
text_node.set('editor', corpus_file.editor or "NULL")
text_node.set('institution', corpus_file.institution or "NULL")
text_node.set('journal', corpus_file.journal or "NULL")
text_node.set('pages', corpus_file.pages or "NULL")
text_node.set('publisher', corpus_file.publisher or "NULL")
text_node.set('publishing_year', str(corpus_file.publishing_year))
text_node.set('school', corpus_file.school or "NULL")
text_node.set('title', corpus_file.title)
element_tree.write(file)
master_element_tree.getroot().insert(1, text_node)
output_file = os.path.join(output_dir, 'corpus.vrt')
master_element_tree.write(output_file, xml_declaration=True,
encoding='utf-8')
corpus.status = 'submitted'
corpus.last_edited_date = datetime.utcnow()
db.session.commit() db.session.commit()
@background @background
def delete_corpus(corpus_id, *args, **kwargs): def delete_corpus(corpus_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
corpus = Corpus.query.get(corpus_id) corpus = Corpus.query.get(corpus_id)
if corpus is None: if corpus is None:
return raise Exception('Corpus {} not found'.format(corpus_id))
path = os.path.join(app.config['NOPAQUE_STORAGE'], str(corpus.user_id),
'corpora', str(corpus.id))
shutil.rmtree(path, ignore_errors=True)
corpus.delete() corpus.delete()
db.session.commit()
@background @background
def delete_corpus_file(corpus_file_id, *args, **kwargs): def delete_corpus_file(corpus_file_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
corpus_file = CorpusFile.query.get(corpus_file_id) corpus_file = CorpusFile.query.get(corpus_file_id)
if corpus_file is None: if corpus_file is None:
return raise Exception('Corpus file {} not found'.format(corpus_file_id))
path = os.path.join(app.config['NOPAQUE_STORAGE'], corpus_file.dir, corpus_file.delete()
corpus_file.filename) db.session.commit()
try:
os.remove(path)
except Exception:
pass
else:
corpus_file.delete()

View File

@ -2,7 +2,6 @@ from . import socketio
from flask import abort, current_app, request from flask import abort, current_app, request
from flask_login import current_user from flask_login import current_user
from functools import wraps from functools import wraps
from threading import Thread
def admin_required(f): def admin_required(f):
@ -27,8 +26,7 @@ def background(f):
@wraps(f) @wraps(f)
def wrapped(*args, **kwargs): def wrapped(*args, **kwargs):
kwargs['app'] = current_app._get_current_object() kwargs['app'] = current_app._get_current_object()
thread = Thread(target=f, args=args, kwargs=kwargs) thread = socketio.start_background_task(f, *args, **kwargs)
thread.start()
return thread return thread
return wrapped return wrapped

View File

@ -1,22 +1,23 @@
from .. import db
from ..decorators import background from ..decorators import background
from ..models import Job from ..models import Job
@background @background
def delete_job(job_id, *args, **kwargs): def delete_job(job_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
job = Job.query.get(job_id) job = Job.query.get(job_id)
if job is None: if job is None:
raise Exception('Could not find job with id {}'.format(job_id)) raise Exception('Job {} not found'.format(job_id))
job.delete() job.delete()
db.session.commit()
@background @background
def restart_job(job_id, *args, **kwargs): def restart_job(job_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
job = Job.query.get(job_id) job = Job.query.get(job_id)
if job is None: if job is None:
raise Exception('Could not find job with id {}'.format(job_id)) raise Exception('Job {} not found'.format(job_id))
job.restart() job.restart()
db.session.commit()

View File

@ -60,10 +60,6 @@ def restart(job_id):
else: else:
tasks.restart_job(job_id) tasks.restart_job(job_id)
flash('Job has been restarted!', 'job') flash('Job has been restarted!', 'job')
job_inputs = [dict(filename=input.filename,
id=input.id,
job_id=job.id)
for input in job.inputs]
return redirect(url_for('jobs.job', job_id=job_id)) return redirect(url_for('jobs.job', job_id=job_id))

View File

@ -5,6 +5,7 @@ from itsdangerous import BadSignature, TimedJSONWebSignatureSerializer
from time import sleep from time import sleep
from werkzeug.security import generate_password_hash, check_password_hash from werkzeug.security import generate_password_hash, check_password_hash
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
import xml.etree.ElementTree as ET
from . import db, login_manager from . import db, login_manager
import os import os
import shutil import shutil
@ -246,9 +247,10 @@ class User(UserMixin, db.Model):
''' '''
Delete the user and its corpora and jobs from database and filesystem. Delete the user and its corpora and jobs from database and filesystem.
''' '''
user_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
str(self.id))
shutil.rmtree(user_dir, ignore_errors=True)
db.session.delete(self) db.session.delete(self)
db.session.commit()
class AnonymousUser(AnonymousUserMixin): class AnonymousUser(AnonymousUserMixin):
@ -383,7 +385,6 @@ class Job(db.Model):
str(self.id)) str(self.id))
shutil.rmtree(job_dir, ignore_errors=True) shutil.rmtree(job_dir, ignore_errors=True)
db.session.delete(self) db.session.delete(self)
db.session.commit()
def restart(self): def restart(self):
''' '''
@ -400,7 +401,6 @@ class Job(db.Model):
shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True) shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True)
self.end_date = None self.end_date = None
self.status = 'submitted' self.status = 'submitted'
db.session.commit()
def to_dict(self): def to_dict(self):
return {'id': self.id, return {'id': self.id,
@ -504,9 +504,17 @@ class CorpusFile(db.Model):
title = db.Column(db.String(255)) title = db.Column(db.String(255))
def delete(self): def delete(self):
self.corpus.status = 'unprepared' corpus_file = os.path.join(current_app.config['NOPAQUE_STORAGE'],
str(self.corpus.user_id),
'corpora',
str(self.corpus_id),
self.filename)
try:
os.remove(corpus_file)
except OSError:
pass
db.session.delete(self) db.session.delete(self)
db.session.commit() self.corpus.status = 'unprepared'
def to_dict(self): def to_dict(self):
return {'id': self.id, return {'id': self.id,
@ -557,9 +565,49 @@ class Corpus(db.Model):
'title': self.title, 'title': self.title,
'files': {file.id: file.to_dict() for file in self.files}} 'files': {file.id: file.to_dict() for file in self.files}}
def build(self):
corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
str(self.user_id),
'corpora',
str(self.id))
output_dir = os.path.join(corpus_dir, 'merged')
shutil.rmtree(output_dir, ignore_errors=True)
os.mkdir(output_dir)
master_element_tree = ET.ElementTree(
ET.fromstring('<corpus>\n</corpus>')
)
for corpus_file in self.files:
corpus_file_path = os.path.join(corpus_dir, corpus_file.filename)
element_tree = ET.parse(corpus_file_path)
text_node = element_tree.find('text')
text_node.set('address', corpus_file.address or "NULL")
text_node.set('author', corpus_file.author)
text_node.set('booktitle', corpus_file.booktitle or "NULL")
text_node.set('chapter', corpus_file.chapter or "NULL")
text_node.set('editor', corpus_file.editor or "NULL")
text_node.set('institution', corpus_file.institution or "NULL")
text_node.set('journal', corpus_file.journal or "NULL")
text_node.set('pages', corpus_file.pages or "NULL")
text_node.set('publisher', corpus_file.publisher or "NULL")
text_node.set('publishing_year', str(corpus_file.publishing_year))
text_node.set('school', corpus_file.school or "NULL")
text_node.set('title', corpus_file.title)
element_tree.write(corpus_file_path)
master_element_tree.getroot().insert(1, text_node)
output_file = os.path.join(output_dir, 'corpus.vrt')
master_element_tree.write(output_file,
xml_declaration=True,
encoding='utf-8')
self.last_edited_date = datetime.utcnow()
self.status = 'submitted'
def delete(self): def delete(self):
corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
str(self.user_id),
'corpora',
str(self.id))
shutil.rmtree(corpus_dir, ignore_errors=True)
db.session.delete(self) db.session.delete(self)
db.session.commit()
def __repr__(self): def __repr__(self):
''' '''
@ -582,8 +630,10 @@ class Result(db.Model):
cascade='save-update, merge, delete') cascade='save-update, merge, delete')
def delete(self): def delete(self):
result_file_path = os.path.join(current_app.config['NOPAQUE_STORAGE'],
self.file[0].dir)
shutil.rmtree(result_file_path)
db.session.delete(self) db.session.delete(self)
db.session.commit()
def __repr__(self): def __repr__(self):
''' '''

View File

@ -1,16 +1,13 @@
from .. import db
from ..decorators import background from ..decorators import background
from ..models import User from ..models import User
import os
import shutil
@background @background
def delete_user(user_id, *args, **kwargs): def delete_user(user_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
user = User.query.get(user_id) user = User.query.get(user_id)
if user is None: if user is None:
raise Exception('User {} not found!'.format(user_id)) raise Exception('User {} not found'.format(user_id))
path = os.path.join(app.config['NOPAQUE_STORAGE'], str(user.id))
shutil.rmtree(path, ignore_errors=True)
user.delete() user.delete()
db.session.commit()

View File

@ -1,17 +1,13 @@
from .. import db
from ..decorators import background from ..decorators import background
from ..models import Result from ..models import Result
import os
import shutil
@background @background
def delete_result(result_id, *args, **kwargs): def delete_result(result_id, *args, **kwargs):
app = kwargs['app'] with kwargs['app'].app_context():
with app.app_context():
result = Result.query.get(result_id) result = Result.query.get(result_id)
if result is None: if result is None:
return raise Exception('Result {} not found'.format(result_id))
result_file_path = os.path.join(app.config['NOPAQUE_STORAGE'],
result.file[0].dir)
shutil.rmtree(result_file_path)
result.delete() # cascades down and also deletes ResultFile result.delete() # cascades down and also deletes ResultFile
db.session.commit()

View File

@ -33,8 +33,7 @@ class Config:
os.makedirs('logs', exist_ok=True) os.makedirs('logs', exist_ok=True)
logging.basicConfig(filename='logs/nopaque.log', logging.basicConfig(filename='logs/nopaque.log',
format='[%(asctime)s] %(levelname)s in ' format='[%(asctime)s] %(levelname)s in '
'%(name)s/%(filename)s:%(lineno)d - ' '%(pathname)s:%(lineno)d - %(message)s',
'%(message)s',
datefmt='%Y-%m-%d %H:%M:%S', filemode='w') datefmt='%Y-%m-%d %H:%M:%S', filemode='w')
''' ### Security enhancements ### ''' ''' ### Security enhancements ### '''