mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-18 22:00:35 +00:00
Merge branch 'development' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into development
This commit is contained in:
commit
94701ab3b0
@ -1,10 +1,6 @@
|
|||||||
from datetime import datetime
|
|
||||||
from .. import db
|
from .. import db
|
||||||
from ..decorators import background
|
from ..decorators import background
|
||||||
from ..models import Corpus, CorpusFile
|
from ..models import Corpus, CorpusFile
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
|
|
||||||
@background
|
@background
|
||||||
@ -13,68 +9,26 @@ def build_corpus(corpus_id, *args, **kwargs):
|
|||||||
with app.app_context():
|
with app.app_context():
|
||||||
corpus = Corpus.query.get(corpus_id)
|
corpus = Corpus.query.get(corpus_id)
|
||||||
if corpus is None:
|
if corpus is None:
|
||||||
return
|
raise Exception('Corpus {} not found'.format(corpus_id))
|
||||||
corpus.status = 'File processing'
|
corpus.build()
|
||||||
db.session.commit()
|
|
||||||
corpus_dir = os.path.join(app.config['NOPAQUE_STORAGE'],
|
|
||||||
str(corpus.user_id), 'corpora',
|
|
||||||
str(corpus.id))
|
|
||||||
output_dir = os.path.join(corpus_dir, 'merged')
|
|
||||||
shutil.rmtree(output_dir, ignore_errors=True)
|
|
||||||
os.mkdir(output_dir)
|
|
||||||
master_element_tree = ET.ElementTree(
|
|
||||||
ET.fromstring('<corpus>\n</corpus>'))
|
|
||||||
for corpus_file in corpus.files:
|
|
||||||
file = os.path.join(corpus_dir, corpus_file.filename)
|
|
||||||
element_tree = ET.parse(file)
|
|
||||||
text_node = element_tree.find('text')
|
|
||||||
text_node.set('address', corpus_file.address or "NULL")
|
|
||||||
text_node.set('author', corpus_file.author)
|
|
||||||
text_node.set('booktitle', corpus_file.booktitle or "NULL")
|
|
||||||
text_node.set('chapter', corpus_file.chapter or "NULL")
|
|
||||||
text_node.set('editor', corpus_file.editor or "NULL")
|
|
||||||
text_node.set('institution', corpus_file.institution or "NULL")
|
|
||||||
text_node.set('journal', corpus_file.journal or "NULL")
|
|
||||||
text_node.set('pages', corpus_file.pages or "NULL")
|
|
||||||
text_node.set('publisher', corpus_file.publisher or "NULL")
|
|
||||||
text_node.set('publishing_year', str(corpus_file.publishing_year))
|
|
||||||
text_node.set('school', corpus_file.school or "NULL")
|
|
||||||
text_node.set('title', corpus_file.title)
|
|
||||||
element_tree.write(file)
|
|
||||||
master_element_tree.getroot().insert(1, text_node)
|
|
||||||
output_file = os.path.join(output_dir, 'corpus.vrt')
|
|
||||||
master_element_tree.write(output_file, xml_declaration=True,
|
|
||||||
encoding='utf-8')
|
|
||||||
corpus.status = 'submitted'
|
|
||||||
corpus.last_edited_date = datetime.utcnow()
|
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
|
||||||
@background
|
@background
|
||||||
def delete_corpus(corpus_id, *args, **kwargs):
|
def delete_corpus(corpus_id, *args, **kwargs):
|
||||||
app = kwargs['app']
|
with kwargs['app'].app_context():
|
||||||
with app.app_context():
|
|
||||||
corpus = Corpus.query.get(corpus_id)
|
corpus = Corpus.query.get(corpus_id)
|
||||||
if corpus is None:
|
if corpus is None:
|
||||||
return
|
raise Exception('Corpus {} not found'.format(corpus_id))
|
||||||
path = os.path.join(app.config['NOPAQUE_STORAGE'], str(corpus.user_id),
|
|
||||||
'corpora', str(corpus.id))
|
|
||||||
shutil.rmtree(path, ignore_errors=True)
|
|
||||||
corpus.delete()
|
corpus.delete()
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
|
||||||
@background
|
@background
|
||||||
def delete_corpus_file(corpus_file_id, *args, **kwargs):
|
def delete_corpus_file(corpus_file_id, *args, **kwargs):
|
||||||
app = kwargs['app']
|
with kwargs['app'].app_context():
|
||||||
with app.app_context():
|
|
||||||
corpus_file = CorpusFile.query.get(corpus_file_id)
|
corpus_file = CorpusFile.query.get(corpus_file_id)
|
||||||
if corpus_file is None:
|
if corpus_file is None:
|
||||||
return
|
raise Exception('Corpus file {} not found'.format(corpus_file_id))
|
||||||
path = os.path.join(app.config['NOPAQUE_STORAGE'], corpus_file.dir,
|
corpus_file.delete()
|
||||||
corpus_file.filename)
|
db.session.commit()
|
||||||
try:
|
|
||||||
os.remove(path)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
corpus_file.delete()
|
|
||||||
|
@ -2,7 +2,6 @@ from . import socketio
|
|||||||
from flask import abort, current_app, request
|
from flask import abort, current_app, request
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from threading import Thread
|
|
||||||
|
|
||||||
|
|
||||||
def admin_required(f):
|
def admin_required(f):
|
||||||
@ -27,8 +26,7 @@ def background(f):
|
|||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrapped(*args, **kwargs):
|
def wrapped(*args, **kwargs):
|
||||||
kwargs['app'] = current_app._get_current_object()
|
kwargs['app'] = current_app._get_current_object()
|
||||||
thread = Thread(target=f, args=args, kwargs=kwargs)
|
thread = socketio.start_background_task(f, *args, **kwargs)
|
||||||
thread.start()
|
|
||||||
return thread
|
return thread
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
|
@ -1,22 +1,23 @@
|
|||||||
|
from .. import db
|
||||||
from ..decorators import background
|
from ..decorators import background
|
||||||
from ..models import Job
|
from ..models import Job
|
||||||
|
|
||||||
|
|
||||||
@background
|
@background
|
||||||
def delete_job(job_id, *args, **kwargs):
|
def delete_job(job_id, *args, **kwargs):
|
||||||
app = kwargs['app']
|
with kwargs['app'].app_context():
|
||||||
with app.app_context():
|
|
||||||
job = Job.query.get(job_id)
|
job = Job.query.get(job_id)
|
||||||
if job is None:
|
if job is None:
|
||||||
raise Exception('Could not find job with id {}'.format(job_id))
|
raise Exception('Job {} not found'.format(job_id))
|
||||||
job.delete()
|
job.delete()
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
|
||||||
@background
|
@background
|
||||||
def restart_job(job_id, *args, **kwargs):
|
def restart_job(job_id, *args, **kwargs):
|
||||||
app = kwargs['app']
|
with kwargs['app'].app_context():
|
||||||
with app.app_context():
|
|
||||||
job = Job.query.get(job_id)
|
job = Job.query.get(job_id)
|
||||||
if job is None:
|
if job is None:
|
||||||
raise Exception('Could not find job with id {}'.format(job_id))
|
raise Exception('Job {} not found'.format(job_id))
|
||||||
job.restart()
|
job.restart()
|
||||||
|
db.session.commit()
|
||||||
|
@ -60,10 +60,6 @@ def restart(job_id):
|
|||||||
else:
|
else:
|
||||||
tasks.restart_job(job_id)
|
tasks.restart_job(job_id)
|
||||||
flash('Job has been restarted!', 'job')
|
flash('Job has been restarted!', 'job')
|
||||||
job_inputs = [dict(filename=input.filename,
|
|
||||||
id=input.id,
|
|
||||||
job_id=job.id)
|
|
||||||
for input in job.inputs]
|
|
||||||
return redirect(url_for('jobs.job', job_id=job_id))
|
return redirect(url_for('jobs.job', job_id=job_id))
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ from itsdangerous import BadSignature, TimedJSONWebSignatureSerializer
|
|||||||
from time import sleep
|
from time import sleep
|
||||||
from werkzeug.security import generate_password_hash, check_password_hash
|
from werkzeug.security import generate_password_hash, check_password_hash
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
from . import db, login_manager
|
from . import db, login_manager
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
@ -246,9 +247,10 @@ class User(UserMixin, db.Model):
|
|||||||
'''
|
'''
|
||||||
Delete the user and its corpora and jobs from database and filesystem.
|
Delete the user and its corpora and jobs from database and filesystem.
|
||||||
'''
|
'''
|
||||||
|
user_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
|
||||||
|
str(self.id))
|
||||||
|
shutil.rmtree(user_dir, ignore_errors=True)
|
||||||
db.session.delete(self)
|
db.session.delete(self)
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
|
|
||||||
class AnonymousUser(AnonymousUserMixin):
|
class AnonymousUser(AnonymousUserMixin):
|
||||||
@ -383,7 +385,6 @@ class Job(db.Model):
|
|||||||
str(self.id))
|
str(self.id))
|
||||||
shutil.rmtree(job_dir, ignore_errors=True)
|
shutil.rmtree(job_dir, ignore_errors=True)
|
||||||
db.session.delete(self)
|
db.session.delete(self)
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def restart(self):
|
def restart(self):
|
||||||
'''
|
'''
|
||||||
@ -400,7 +401,6 @@ class Job(db.Model):
|
|||||||
shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True)
|
shutil.rmtree(os.path.join(job_dir, 'pyflow.data'), ignore_errors=True)
|
||||||
self.end_date = None
|
self.end_date = None
|
||||||
self.status = 'submitted'
|
self.status = 'submitted'
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
return {'id': self.id,
|
return {'id': self.id,
|
||||||
@ -504,9 +504,17 @@ class CorpusFile(db.Model):
|
|||||||
title = db.Column(db.String(255))
|
title = db.Column(db.String(255))
|
||||||
|
|
||||||
def delete(self):
|
def delete(self):
|
||||||
self.corpus.status = 'unprepared'
|
corpus_file = os.path.join(current_app.config['NOPAQUE_STORAGE'],
|
||||||
|
str(self.corpus.user_id),
|
||||||
|
'corpora',
|
||||||
|
str(self.corpus_id),
|
||||||
|
self.filename)
|
||||||
|
try:
|
||||||
|
os.remove(corpus_file)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
db.session.delete(self)
|
db.session.delete(self)
|
||||||
db.session.commit()
|
self.corpus.status = 'unprepared'
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
return {'id': self.id,
|
return {'id': self.id,
|
||||||
@ -557,9 +565,49 @@ class Corpus(db.Model):
|
|||||||
'title': self.title,
|
'title': self.title,
|
||||||
'files': {file.id: file.to_dict() for file in self.files}}
|
'files': {file.id: file.to_dict() for file in self.files}}
|
||||||
|
|
||||||
|
def build(self):
|
||||||
|
corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
|
||||||
|
str(self.user_id),
|
||||||
|
'corpora',
|
||||||
|
str(self.id))
|
||||||
|
output_dir = os.path.join(corpus_dir, 'merged')
|
||||||
|
shutil.rmtree(output_dir, ignore_errors=True)
|
||||||
|
os.mkdir(output_dir)
|
||||||
|
master_element_tree = ET.ElementTree(
|
||||||
|
ET.fromstring('<corpus>\n</corpus>')
|
||||||
|
)
|
||||||
|
for corpus_file in self.files:
|
||||||
|
corpus_file_path = os.path.join(corpus_dir, corpus_file.filename)
|
||||||
|
element_tree = ET.parse(corpus_file_path)
|
||||||
|
text_node = element_tree.find('text')
|
||||||
|
text_node.set('address', corpus_file.address or "NULL")
|
||||||
|
text_node.set('author', corpus_file.author)
|
||||||
|
text_node.set('booktitle', corpus_file.booktitle or "NULL")
|
||||||
|
text_node.set('chapter', corpus_file.chapter or "NULL")
|
||||||
|
text_node.set('editor', corpus_file.editor or "NULL")
|
||||||
|
text_node.set('institution', corpus_file.institution or "NULL")
|
||||||
|
text_node.set('journal', corpus_file.journal or "NULL")
|
||||||
|
text_node.set('pages', corpus_file.pages or "NULL")
|
||||||
|
text_node.set('publisher', corpus_file.publisher or "NULL")
|
||||||
|
text_node.set('publishing_year', str(corpus_file.publishing_year))
|
||||||
|
text_node.set('school', corpus_file.school or "NULL")
|
||||||
|
text_node.set('title', corpus_file.title)
|
||||||
|
element_tree.write(corpus_file_path)
|
||||||
|
master_element_tree.getroot().insert(1, text_node)
|
||||||
|
output_file = os.path.join(output_dir, 'corpus.vrt')
|
||||||
|
master_element_tree.write(output_file,
|
||||||
|
xml_declaration=True,
|
||||||
|
encoding='utf-8')
|
||||||
|
self.last_edited_date = datetime.utcnow()
|
||||||
|
self.status = 'submitted'
|
||||||
|
|
||||||
def delete(self):
|
def delete(self):
|
||||||
|
corpus_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
|
||||||
|
str(self.user_id),
|
||||||
|
'corpora',
|
||||||
|
str(self.id))
|
||||||
|
shutil.rmtree(corpus_dir, ignore_errors=True)
|
||||||
db.session.delete(self)
|
db.session.delete(self)
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
'''
|
'''
|
||||||
@ -582,8 +630,10 @@ class Result(db.Model):
|
|||||||
cascade='save-update, merge, delete')
|
cascade='save-update, merge, delete')
|
||||||
|
|
||||||
def delete(self):
|
def delete(self):
|
||||||
|
result_file_path = os.path.join(current_app.config['NOPAQUE_STORAGE'],
|
||||||
|
self.file[0].dir)
|
||||||
|
shutil.rmtree(result_file_path)
|
||||||
db.session.delete(self)
|
db.session.delete(self)
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
'''
|
'''
|
||||||
|
@ -1,16 +1,13 @@
|
|||||||
|
from .. import db
|
||||||
from ..decorators import background
|
from ..decorators import background
|
||||||
from ..models import User
|
from ..models import User
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
|
|
||||||
@background
|
@background
|
||||||
def delete_user(user_id, *args, **kwargs):
|
def delete_user(user_id, *args, **kwargs):
|
||||||
app = kwargs['app']
|
with kwargs['app'].app_context():
|
||||||
with app.app_context():
|
|
||||||
user = User.query.get(user_id)
|
user = User.query.get(user_id)
|
||||||
if user is None:
|
if user is None:
|
||||||
raise Exception('User {} not found!'.format(user_id))
|
raise Exception('User {} not found'.format(user_id))
|
||||||
path = os.path.join(app.config['NOPAQUE_STORAGE'], str(user.id))
|
|
||||||
shutil.rmtree(path, ignore_errors=True)
|
|
||||||
user.delete()
|
user.delete()
|
||||||
|
db.session.commit()
|
||||||
|
@ -1,17 +1,13 @@
|
|||||||
|
from .. import db
|
||||||
from ..decorators import background
|
from ..decorators import background
|
||||||
from ..models import Result
|
from ..models import Result
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
|
|
||||||
@background
|
@background
|
||||||
def delete_result(result_id, *args, **kwargs):
|
def delete_result(result_id, *args, **kwargs):
|
||||||
app = kwargs['app']
|
with kwargs['app'].app_context():
|
||||||
with app.app_context():
|
|
||||||
result = Result.query.get(result_id)
|
result = Result.query.get(result_id)
|
||||||
if result is None:
|
if result is None:
|
||||||
return
|
raise Exception('Result {} not found'.format(result_id))
|
||||||
result_file_path = os.path.join(app.config['NOPAQUE_STORAGE'],
|
|
||||||
result.file[0].dir)
|
|
||||||
shutil.rmtree(result_file_path)
|
|
||||||
result.delete() # cascades down and also deletes ResultFile
|
result.delete() # cascades down and also deletes ResultFile
|
||||||
|
db.session.commit()
|
||||||
|
@ -33,8 +33,7 @@ class Config:
|
|||||||
os.makedirs('logs', exist_ok=True)
|
os.makedirs('logs', exist_ok=True)
|
||||||
logging.basicConfig(filename='logs/nopaque.log',
|
logging.basicConfig(filename='logs/nopaque.log',
|
||||||
format='[%(asctime)s] %(levelname)s in '
|
format='[%(asctime)s] %(levelname)s in '
|
||||||
'%(name)s/%(filename)s:%(lineno)d - '
|
'%(pathname)s:%(lineno)d - %(message)s',
|
||||||
'%(message)s',
|
|
||||||
datefmt='%Y-%m-%d %H:%M:%S', filemode='w')
|
datefmt='%Y-%m-%d %H:%M:%S', filemode='w')
|
||||||
|
|
||||||
''' ### Security enhancements ### '''
|
''' ### Security enhancements ### '''
|
||||||
|
Loading…
x
Reference in New Issue
Block a user