2020-01-09 15:04:52 +00:00
|
|
|
from app import db, logger
|
2019-11-05 15:22:36 +00:00
|
|
|
from app.models import Corpus, CorpusFile
|
2019-10-31 09:25:48 +00:00
|
|
|
from flask import (abort, current_app, flash, redirect, request,
|
2019-10-31 14:29:02 +00:00
|
|
|
render_template, url_for, send_from_directory)
|
2019-10-31 09:25:48 +00:00
|
|
|
from flask_login import current_user, login_required
|
2019-11-15 10:45:04 +00:00
|
|
|
from threading import Thread
|
2019-11-05 15:22:36 +00:00
|
|
|
from werkzeug.utils import secure_filename
|
2019-10-31 09:25:48 +00:00
|
|
|
from . import corpora
|
2019-11-14 08:48:30 +00:00
|
|
|
from .background_functions import (delete_corpus_, delete_corpus_file_,
|
2019-11-14 14:53:43 +00:00
|
|
|
edit_corpus_file_)
|
2019-11-04 08:54:30 +00:00
|
|
|
from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm,
|
|
|
|
QueryDownloadForm, QueryForm)
|
2019-10-31 09:25:48 +00:00
|
|
|
import os
|
|
|
|
|
|
|
|
|
2019-10-31 14:29:02 +00:00
|
|
|
@corpora.route('/add', methods=['GET', 'POST'])
|
2019-10-31 09:25:48 +00:00
|
|
|
@login_required
|
|
|
|
def add_corpus():
|
|
|
|
add_corpus_form = AddCorpusForm()
|
2019-10-31 14:29:02 +00:00
|
|
|
if add_corpus_form.validate_on_submit():
|
2019-10-31 10:34:45 +00:00
|
|
|
corpus = Corpus(creator=current_user,
|
|
|
|
description=add_corpus_form.description.data,
|
2019-11-04 14:06:54 +00:00
|
|
|
status='unprepared', title=add_corpus_form.title.data)
|
2019-10-31 10:34:45 +00:00
|
|
|
db.session.add(corpus)
|
2019-10-31 09:25:48 +00:00
|
|
|
db.session.commit()
|
2019-12-02 10:34:28 +00:00
|
|
|
dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
|
2019-10-31 10:34:45 +00:00
|
|
|
str(corpus.user_id), 'corpora', str(corpus.id))
|
|
|
|
try:
|
|
|
|
os.makedirs(dir)
|
|
|
|
except OSError:
|
2019-11-14 08:48:30 +00:00
|
|
|
flash('[ERROR]: Could not add corpus!')
|
|
|
|
corpus.delete()
|
|
|
|
else:
|
|
|
|
flash('Corpus added!')
|
|
|
|
return redirect(url_for('corpora.corpus', corpus_id=corpus.id))
|
2019-10-31 14:29:02 +00:00
|
|
|
return render_template('corpora/add_corpus.html.j2',
|
|
|
|
add_corpus_form=add_corpus_form,
|
|
|
|
title='Add corpus')
|
2019-10-31 09:25:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>')
|
|
|
|
@login_required
|
|
|
|
def corpus(corpus_id):
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
if not (corpus.creator == current_user or current_user.is_administrator()):
|
|
|
|
abort(403)
|
2019-10-31 14:29:02 +00:00
|
|
|
return render_template('corpora/corpus.html.j2', corpus=corpus,
|
|
|
|
title='Corpus')
|
2019-10-31 09:25:48 +00:00
|
|
|
|
|
|
|
|
2019-11-14 14:53:43 +00:00
|
|
|
@corpora.route('/<int:corpus_id>/analyse')
|
2019-10-31 09:25:48 +00:00
|
|
|
@login_required
|
2019-11-14 14:53:43 +00:00
|
|
|
def analyse_corpus(corpus_id):
|
2019-10-31 09:25:48 +00:00
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
2019-11-11 10:51:18 +00:00
|
|
|
if corpus.status == 'prepared':
|
|
|
|
corpus.status = 'start analysis'
|
|
|
|
db.session.commit()
|
2019-11-14 14:53:43 +00:00
|
|
|
query_download_form = QueryDownloadForm()
|
|
|
|
query_form = QueryForm(context=request.args.get('context', 10),
|
|
|
|
hits_per_page=request.args.get('hits_per_page', 30),
|
|
|
|
query=request.args.get('query'))
|
|
|
|
return render_template('corpora/analyse_corpus.html.j2',
|
2019-11-05 13:23:45 +00:00
|
|
|
corpus_id=corpus_id,
|
2019-11-14 14:53:43 +00:00
|
|
|
query_download_form=query_download_form,
|
|
|
|
query_form=query_form, title='Analyse Corpus')
|
2019-10-31 09:25:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/delete')
|
|
|
|
@login_required
|
|
|
|
def delete_corpus(corpus_id):
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
if not (corpus.creator == current_user or current_user.is_administrator()):
|
|
|
|
abort(403)
|
2019-11-15 10:45:04 +00:00
|
|
|
thread = Thread(target=delete_corpus_,
|
|
|
|
args=(current_app._get_current_object(), corpus.id))
|
2019-11-06 09:07:34 +00:00
|
|
|
thread.start()
|
2019-10-31 09:25:48 +00:00
|
|
|
flash('Corpus deleted!')
|
|
|
|
return redirect(url_for('main.dashboard'))
|
|
|
|
|
|
|
|
|
2019-10-31 14:29:02 +00:00
|
|
|
@corpora.route('/<int:corpus_id>/files/add', methods=['GET', 'POST'])
|
2019-10-31 09:25:48 +00:00
|
|
|
@login_required
|
|
|
|
def add_corpus_file(corpus_id):
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
if not (corpus.creator == current_user or current_user.is_administrator()):
|
|
|
|
abort(403)
|
|
|
|
add_corpus_file_form = AddCorpusFileForm()
|
2019-10-31 10:34:45 +00:00
|
|
|
if add_corpus_file_form.validate_on_submit():
|
|
|
|
file = add_corpus_file_form.file.data
|
|
|
|
filename = secure_filename(file.filename)
|
|
|
|
for corpus_file in corpus.files:
|
|
|
|
if filename == corpus_file.filename:
|
|
|
|
flash('File already registered to this corpus.')
|
2019-11-15 11:51:53 +00:00
|
|
|
return redirect(url_for('corpora.add_corpus_file',
|
|
|
|
corpus_id=corpus_id))
|
2019-10-31 10:34:45 +00:00
|
|
|
# Save the file
|
|
|
|
dir = os.path.join(str(corpus.user_id), 'corpora', str(corpus.id))
|
2019-12-02 10:34:28 +00:00
|
|
|
file.save(os.path.join(current_app.config['NOPAQUE_STORAGE'],
|
2019-10-31 14:29:02 +00:00
|
|
|
dir, filename))
|
2020-01-09 15:04:52 +00:00
|
|
|
ids = [field.id for field in add_corpus_file_form if not
|
|
|
|
(field.id == 'submit'
|
|
|
|
or field.id == "csrf_token"
|
|
|
|
or field.id == "file")]
|
|
|
|
data = [field.data for field in add_corpus_file_form if not
|
|
|
|
(field.id == 'submit'
|
|
|
|
or field.id == "csrf_token"
|
|
|
|
or field.id == "file")]
|
|
|
|
field_dict = dict(zip(ids, data))
|
2020-01-10 10:29:26 +00:00
|
|
|
corpus_file = CorpusFile(**field_dict,
|
|
|
|
corpus=corpus,
|
|
|
|
dir=dir,
|
|
|
|
filename=filename)
|
2019-10-31 10:34:45 +00:00
|
|
|
db.session.add(corpus_file)
|
|
|
|
db.session.commit()
|
2019-11-15 10:45:04 +00:00
|
|
|
thread = Thread(target=edit_corpus_file_,
|
|
|
|
args=(current_app._get_current_object(),
|
|
|
|
corpus_file.id))
|
2019-11-06 09:07:34 +00:00
|
|
|
thread.start()
|
2019-10-31 10:34:45 +00:00
|
|
|
flash('Corpus file added!')
|
2019-10-31 14:29:02 +00:00
|
|
|
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
|
|
|
return render_template('corpora/add_corpus_file.html.j2',
|
|
|
|
add_corpus_file_form=add_corpus_file_form,
|
|
|
|
corpus=corpus, title='Add corpus file')
|
2019-10-31 09:25:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>/delete')
|
|
|
|
@login_required
|
|
|
|
def delete_corpus_file(corpus_id, corpus_file_id):
|
|
|
|
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
|
|
|
|
if not corpus_file.corpus_id == corpus_id:
|
|
|
|
abort(404)
|
|
|
|
if not (corpus_file.corpus.creator == current_user
|
|
|
|
or current_user.is_administrator()):
|
|
|
|
abort(403)
|
2019-11-15 10:45:04 +00:00
|
|
|
thread = Thread(target=delete_corpus_file_,
|
|
|
|
args=(current_app._get_current_object(), corpus_file.id))
|
2019-11-06 09:07:34 +00:00
|
|
|
thread.start()
|
2019-10-31 09:25:48 +00:00
|
|
|
flash('Corpus file deleted!')
|
|
|
|
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
|
|
|
|
|
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>/download')
|
|
|
|
@login_required
|
|
|
|
def download_corpus_file(corpus_id, corpus_file_id):
|
|
|
|
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
|
|
|
|
if not corpus_file.corpus_id == corpus_id:
|
|
|
|
abort(404)
|
|
|
|
if not (corpus_file.corpus.creator == current_user
|
|
|
|
or current_user.is_administrator()):
|
|
|
|
abort(403)
|
2019-12-02 10:34:28 +00:00
|
|
|
dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
|
2019-10-31 09:25:48 +00:00
|
|
|
corpus_file.dir)
|
|
|
|
return send_from_directory(as_attachment=True, directory=dir,
|
|
|
|
filename=corpus_file.filename)
|
2019-11-04 08:54:30 +00:00
|
|
|
|
|
|
|
|
2019-11-12 13:02:01 +00:00
|
|
|
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>/edit',
|
|
|
|
methods=['GET', 'POST'])
|
2019-11-04 08:54:30 +00:00
|
|
|
@login_required
|
|
|
|
def edit_corpus_file(corpus_id, corpus_file_id):
|
|
|
|
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
|
|
|
|
if not corpus_file.corpus_id == corpus_id:
|
|
|
|
abort(404)
|
|
|
|
if not (corpus_file.corpus.creator == current_user
|
|
|
|
or current_user.is_administrator()):
|
|
|
|
abort(403)
|
|
|
|
edit_corpus_file_form = EditCorpusFileForm()
|
|
|
|
if edit_corpus_file_form.validate_on_submit():
|
2020-01-10 11:13:42 +00:00
|
|
|
ids = [field.id for field in edit_corpus_file_form if not
|
|
|
|
(field.id == 'submit'
|
|
|
|
or field.id == "csrf_token"
|
|
|
|
or field.id == "file")]
|
|
|
|
data = [field.data for field in edit_corpus_file_form if not
|
|
|
|
(field.id == 'submit'
|
|
|
|
or field.id == "csrf_token"
|
|
|
|
or field.id == "file")]
|
|
|
|
field_dict = dict(zip(ids, data))
|
|
|
|
stmt = db.update(CorpusFile).where(CorpusFile.id==corpus_file_id).values(**field_dict)
|
|
|
|
db.session.execute(stmt)
|
2019-11-04 08:54:30 +00:00
|
|
|
db.session.commit()
|
2019-11-15 10:45:04 +00:00
|
|
|
thread = Thread(target=edit_corpus_file_,
|
|
|
|
args=(current_app._get_current_object(),
|
|
|
|
corpus_file.id))
|
2019-11-06 09:07:34 +00:00
|
|
|
thread.start()
|
2019-11-04 08:54:30 +00:00
|
|
|
flash('Corpus file edited!')
|
|
|
|
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
|
|
|
return render_template('corpora/edit_corpus_file.html.j2',
|
|
|
|
edit_corpus_file_form=edit_corpus_file_form,
|
|
|
|
corpus_file=corpus_file, title='Edit corpus file')
|
2019-11-06 09:07:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/prepare')
|
|
|
|
@login_required
|
|
|
|
def prepare_corpus(corpus_id):
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
if not (corpus.creator == current_user or current_user.is_administrator()):
|
|
|
|
abort(403)
|
|
|
|
if len(corpus.files.all()) > 0:
|
|
|
|
corpus.status = 'preparable'
|
|
|
|
db.session.commit()
|
|
|
|
flash('Corpus marked for preparation!')
|
|
|
|
else:
|
|
|
|
flash('Can not prepare corpus, please add corpus file(s).')
|
|
|
|
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|