2024-12-16 10:09:54 +01:00
|
|
|
|
from datetime import datetime
|
|
|
|
|
from flask import (
|
|
|
|
|
abort,
|
|
|
|
|
current_app,
|
|
|
|
|
flash,
|
|
|
|
|
Flask,
|
|
|
|
|
jsonify,
|
|
|
|
|
redirect,
|
2024-12-16 11:39:54 +01:00
|
|
|
|
request,
|
2024-12-16 10:09:54 +01:00
|
|
|
|
render_template,
|
|
|
|
|
url_for
|
|
|
|
|
)
|
2023-04-11 11:46:33 +02:00
|
|
|
|
from flask_login import current_user
|
2024-12-16 10:09:54 +01:00
|
|
|
|
from string import punctuation
|
|
|
|
|
from threading import Thread
|
|
|
|
|
import nltk
|
2023-03-10 10:33:11 +01:00
|
|
|
|
from app import db
|
2023-02-15 16:17:25 +01:00
|
|
|
|
from app.models import (
|
|
|
|
|
Corpus,
|
|
|
|
|
CorpusFollowerAssociation,
|
2023-02-23 13:05:04 +01:00
|
|
|
|
CorpusFollowerRole,
|
2023-03-13 13:29:01 +01:00
|
|
|
|
User
|
2023-02-15 16:17:25 +01:00
|
|
|
|
)
|
2022-09-02 13:07:30 +02:00
|
|
|
|
from . import bp
|
2023-04-12 12:45:41 +02:00
|
|
|
|
from .decorators import corpus_follower_permission_required
|
2023-03-10 10:33:11 +01:00
|
|
|
|
from .forms import CreateCorpusForm
|
2023-02-15 10:56:54 +01:00
|
|
|
|
|
2023-02-21 16:23:10 +01:00
|
|
|
|
|
2024-12-16 10:09:54 +01:00
|
|
|
|
|
|
|
|
|
def _delete_corpus(app: Flask, corpus_id: int):
|
|
|
|
|
with app.app_context():
|
|
|
|
|
corpus: Corpus = Corpus.query.get(corpus_id)
|
|
|
|
|
corpus.delete()
|
|
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _build_corpus(app: Flask, corpus_id: int):
|
|
|
|
|
with app.app_context():
|
|
|
|
|
corpus = Corpus.query.get(corpus_id)
|
|
|
|
|
corpus.build()
|
|
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
|
|
|
2023-03-13 16:22:42 +01:00
|
|
|
|
@bp.route('')
|
|
|
|
|
def corpora():
|
2024-09-25 12:08:20 +02:00
|
|
|
|
return redirect(url_for('main.dashboard', _anchor='corpora'))
|
2023-03-13 16:22:42 +01:00
|
|
|
|
|
|
|
|
|
|
2022-09-02 13:07:30 +02:00
|
|
|
|
@bp.route('/create', methods=['GET', 'POST'])
|
|
|
|
|
def create_corpus():
|
2022-11-30 14:58:45 +01:00
|
|
|
|
form = CreateCorpusForm()
|
2024-12-16 10:09:54 +01:00
|
|
|
|
|
2020-11-13 10:01:51 +01:00
|
|
|
|
if form.validate_on_submit():
|
2020-04-06 14:12:22 +02:00
|
|
|
|
try:
|
2022-09-02 13:07:30 +02:00
|
|
|
|
corpus = Corpus.create(
|
|
|
|
|
title=form.title.data,
|
|
|
|
|
description=form.description.data,
|
|
|
|
|
user=current_user
|
|
|
|
|
)
|
|
|
|
|
except OSError:
|
2020-11-13 10:01:51 +01:00
|
|
|
|
abort(500)
|
2022-02-03 12:39:16 +01:00
|
|
|
|
db.session.commit()
|
2024-12-16 10:09:54 +01:00
|
|
|
|
|
2023-03-10 10:33:11 +01:00
|
|
|
|
flash(f'Corpus "{corpus.title}" created', 'corpus')
|
2022-09-02 13:07:30 +02:00
|
|
|
|
return redirect(corpus.url)
|
2024-12-16 10:09:54 +01:00
|
|
|
|
|
2022-02-08 12:26:20 +01:00
|
|
|
|
return render_template(
|
2023-04-18 11:32:04 +02:00
|
|
|
|
'corpora/create.html.j2',
|
2023-03-28 14:19:37 +02:00
|
|
|
|
title='Create corpus',
|
|
|
|
|
form=form
|
2022-02-08 12:26:20 +01:00
|
|
|
|
)
|
2020-10-29 15:20:30 +01:00
|
|
|
|
|
|
|
|
|
|
2023-02-21 16:18:04 +01:00
|
|
|
|
@bp.route('/<hashid:corpus_id>')
|
2024-12-16 10:09:54 +01:00
|
|
|
|
def corpus(corpus_id: int):
|
2020-04-06 14:12:22 +02:00
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
2024-12-16 10:09:54 +01:00
|
|
|
|
|
|
|
|
|
cfa = CorpusFollowerAssociation.query.filter_by(
|
|
|
|
|
corpus_id=corpus_id,
|
|
|
|
|
follower_id=current_user.id
|
|
|
|
|
).first()
|
|
|
|
|
|
2023-04-27 15:11:18 +02:00
|
|
|
|
if cfa is None:
|
2024-04-11 14:33:47 +02:00
|
|
|
|
if corpus.user == current_user or current_user.is_administrator:
|
2023-04-27 15:11:18 +02:00
|
|
|
|
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
|
|
|
|
|
else:
|
|
|
|
|
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
|
|
|
|
|
else:
|
|
|
|
|
cfr = cfa.role
|
2024-12-16 10:09:54 +01:00
|
|
|
|
|
|
|
|
|
cfrs = CorpusFollowerRole.query.all()
|
|
|
|
|
|
|
|
|
|
# TODO: Better solution for filtering admin
|
|
|
|
|
users = User.query.filter(
|
|
|
|
|
User.is_public == True,
|
|
|
|
|
User.id != current_user.id,
|
|
|
|
|
User.id != corpus.user.id,
|
|
|
|
|
User.role_id < 4
|
|
|
|
|
).all()
|
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
corpus.user == current_user
|
|
|
|
|
or current_user.is_administrator
|
|
|
|
|
):
|
2023-05-12 13:43:38 +02:00
|
|
|
|
return render_template(
|
|
|
|
|
'corpora/corpus.html.j2',
|
|
|
|
|
title=corpus.title,
|
|
|
|
|
corpus=corpus,
|
|
|
|
|
cfr=cfr,
|
|
|
|
|
cfrs=cfrs,
|
2023-10-25 16:21:30 +02:00
|
|
|
|
users=users
|
2023-05-12 13:43:38 +02:00
|
|
|
|
)
|
2024-12-16 10:09:54 +01:00
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
current_user.is_following_corpus(corpus)
|
|
|
|
|
or corpus.is_public
|
|
|
|
|
):
|
|
|
|
|
cfas = CorpusFollowerAssociation.query.filter(
|
|
|
|
|
Corpus.id == corpus_id,
|
|
|
|
|
CorpusFollowerAssociation.follower_id != corpus.user.id
|
|
|
|
|
).all()
|
2023-10-25 16:21:30 +02:00
|
|
|
|
return render_template(
|
|
|
|
|
'corpora/public_corpus.html.j2',
|
|
|
|
|
title=corpus.title,
|
|
|
|
|
corpus=corpus,
|
|
|
|
|
cfrs=cfrs,
|
|
|
|
|
cfr=cfr,
|
|
|
|
|
cfas=cfas,
|
|
|
|
|
users=users
|
|
|
|
|
)
|
2024-12-16 10:09:54 +01:00
|
|
|
|
|
2023-05-12 13:43:38 +02:00
|
|
|
|
abort(403)
|
|
|
|
|
|
2022-11-29 15:28:10 +01:00
|
|
|
|
|
2024-12-16 10:09:54 +01:00
|
|
|
|
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
|
|
|
|
|
def delete_corpus(corpus_id: int):
|
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
|
|
|
|
|
|
if not (
|
|
|
|
|
corpus.user == current_user
|
|
|
|
|
or current_user.is_administrator
|
|
|
|
|
):
|
|
|
|
|
abort(403)
|
|
|
|
|
|
|
|
|
|
thread = Thread(
|
|
|
|
|
target=_delete_corpus,
|
|
|
|
|
args=(current_app._get_current_object(), corpus.id)
|
|
|
|
|
)
|
|
|
|
|
thread.start()
|
|
|
|
|
|
|
|
|
|
return jsonify(f'Corpus "{corpus.title}" marked for deletion.'), 202
|
|
|
|
|
|
|
|
|
|
|
2024-12-16 11:39:54 +01:00
|
|
|
|
@bp.route('/<hashid:corpus_id>/build', methods=['POST'])
|
2024-12-16 10:09:54 +01:00
|
|
|
|
def build_corpus(corpus_id: int):
|
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
|
|
|
|
|
|
cfa = CorpusFollowerAssociation.query.filter_by(
|
|
|
|
|
corpus_id=corpus_id,
|
|
|
|
|
follower_id=current_user.id
|
|
|
|
|
).first()
|
|
|
|
|
|
|
|
|
|
if not (
|
|
|
|
|
cfa is not None and cfa.role.has_permission('MANAGE_FILES')
|
|
|
|
|
or corpus.user == current_user
|
|
|
|
|
or current_user.is_administrator
|
|
|
|
|
):
|
|
|
|
|
abort(403)
|
|
|
|
|
|
|
|
|
|
if len(corpus.files.all()) == 0:
|
|
|
|
|
abort(409)
|
|
|
|
|
|
|
|
|
|
thread = Thread(
|
|
|
|
|
target=_build_corpus,
|
|
|
|
|
args=(current_app._get_current_object(), corpus.id)
|
|
|
|
|
)
|
|
|
|
|
thread.start()
|
|
|
|
|
|
|
|
|
|
return jsonify(f'Corpus "{corpus.title}" marked for building.'), 202
|
|
|
|
|
|
|
|
|
|
|
2024-12-16 11:39:54 +01:00
|
|
|
|
@bp.route('/<hashid:corpus_id>/create-share-link', methods=['POST'])
|
|
|
|
|
def create_share_link(corpus_id: int):
|
|
|
|
|
data = request.json
|
|
|
|
|
|
|
|
|
|
expiration_date = data['expiration_date']
|
2024-12-16 10:09:54 +01:00
|
|
|
|
if not isinstance(expiration_date, str):
|
|
|
|
|
abort(400)
|
|
|
|
|
|
2024-12-16 11:39:54 +01:00
|
|
|
|
role_name = data['role_name']
|
2024-12-16 10:09:54 +01:00
|
|
|
|
if not isinstance(role_name, str):
|
|
|
|
|
abort(400)
|
|
|
|
|
|
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
|
|
|
|
|
|
cfa = CorpusFollowerAssociation.query.filter_by(
|
|
|
|
|
corpus_id=corpus_id,
|
|
|
|
|
follower_id=current_user.id
|
|
|
|
|
).first()
|
|
|
|
|
|
|
|
|
|
if not (
|
|
|
|
|
cfa is not None and cfa.role.has_permission('MANAGE_FOLLOWERS')
|
|
|
|
|
or corpus.user == current_user
|
|
|
|
|
or current_user.is_administrator
|
|
|
|
|
):
|
|
|
|
|
abort(403)
|
|
|
|
|
|
|
|
|
|
_expiration_date = datetime.strptime(expiration_date, '%b %d, %Y')
|
|
|
|
|
|
|
|
|
|
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
|
|
|
|
if cfr is None:
|
|
|
|
|
abort(400)
|
|
|
|
|
|
|
|
|
|
token = current_user.generate_follow_corpus_token(
|
|
|
|
|
corpus.hashid,
|
|
|
|
|
role_name,
|
|
|
|
|
_expiration_date
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
corpus_share_link = url_for(
|
|
|
|
|
'corpora.follow_corpus',
|
|
|
|
|
corpus_id=corpus_id,
|
|
|
|
|
token=token,
|
|
|
|
|
_external=True
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return jsonify(corpus_share_link)
|
|
|
|
|
|
2023-03-01 16:31:41 +01:00
|
|
|
|
|
2023-04-18 11:32:04 +02:00
|
|
|
|
@bp.route('/<hashid:corpus_id>/analysis')
|
2023-03-09 12:07:16 +01:00
|
|
|
|
@corpus_follower_permission_required('VIEW')
|
2024-12-16 10:09:54 +01:00
|
|
|
|
def analysis(corpus_id: int):
|
2023-03-09 12:07:16 +01:00
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
2024-12-16 10:09:54 +01:00
|
|
|
|
|
2023-03-09 12:07:16 +01:00
|
|
|
|
return render_template(
|
2023-04-18 11:32:04 +02:00
|
|
|
|
'corpora/analysis.html.j2',
|
2023-03-09 12:07:16 +01:00
|
|
|
|
corpus=corpus,
|
|
|
|
|
title=f'Analyse Corpus {corpus.title}'
|
|
|
|
|
)
|
|
|
|
|
|
2024-12-16 11:39:54 +01:00
|
|
|
|
|
2024-12-16 10:09:54 +01:00
|
|
|
|
@bp.route('/<hashid:corpus_id>/analysis/stopwords')
|
2024-12-16 11:39:54 +01:00
|
|
|
|
def get_stopwords(corpus_id: int):
|
2024-12-16 10:09:54 +01:00
|
|
|
|
languages = [
|
|
|
|
|
'german',
|
|
|
|
|
'english',
|
|
|
|
|
'catalan',
|
|
|
|
|
'greek',
|
|
|
|
|
'spanish',
|
|
|
|
|
'french',
|
|
|
|
|
'italian',
|
|
|
|
|
'russian',
|
|
|
|
|
'chinese'
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
nltk.download('stopwords', quiet=True)
|
|
|
|
|
stopwords = {
|
|
|
|
|
language: nltk.corpus.stopwords.words(language)
|
|
|
|
|
for language in languages
|
|
|
|
|
}
|
|
|
|
|
stopwords['punctuation'] = list(punctuation)
|
|
|
|
|
stopwords['punctuation'] += ['—', '|', '–', '“', '„', '--']
|
|
|
|
|
stopwords['user_stopwords'] = []
|
|
|
|
|
|
|
|
|
|
return jsonify(stopwords)
|
|
|
|
|
|
2023-03-09 12:07:16 +01:00
|
|
|
|
|
2023-12-21 12:48:50 +01:00
|
|
|
|
@bp.route('/<hashid:corpus_id>/follow/<token>')
|
2024-12-16 10:09:54 +01:00
|
|
|
|
def follow_corpus(corpus_id: int, token: str):
|
2023-12-21 12:48:50 +01:00
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
2023-03-09 12:07:16 +01:00
|
|
|
|
|
2024-12-16 10:09:54 +01:00
|
|
|
|
if not current_user.follow_corpus_by_token(token):
|
|
|
|
|
abort(403)
|
2023-03-09 12:07:16 +01:00
|
|
|
|
|
2024-12-16 10:09:54 +01:00
|
|
|
|
db.session.commit()
|
2023-03-09 12:07:16 +01:00
|
|
|
|
|
2024-12-16 10:09:54 +01:00
|
|
|
|
flash(f'You are following "{corpus.title}" now', category='corpus')
|
|
|
|
|
return redirect(corpus.url)
|
2024-12-16 11:39:54 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/<hashid:corpus_id>/is-public', methods=['PUT'])
|
|
|
|
|
def update_is_public(corpus_id):
|
|
|
|
|
new_value = request.json
|
|
|
|
|
if not isinstance(new_value, bool):
|
|
|
|
|
abort(400)
|
|
|
|
|
|
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
|
|
|
|
|
|
if not (
|
|
|
|
|
corpus.user == current_user
|
|
|
|
|
or current_user.is_administrator
|
|
|
|
|
):
|
|
|
|
|
abort(403)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
corpus.is_public = new_value
|
|
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
|
|
return jsonify(f'Corpus "{corpus.title}" is now {"public" if new_value else "private"}'), 200
|