From 6c1f48eb2f208e1ace79880755eaeaadcc7e517a Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Mon, 16 Dec 2024 10:09:54 +0100
Subject: [PATCH] Update corpora package
---
app/blueprints/corpora/json_routes.py | 65 +-------
app/blueprints/corpora/routes.py | 211 +++++++++++++++++++++++---
2 files changed, 189 insertions(+), 87 deletions(-)
diff --git a/app/blueprints/corpora/json_routes.py b/app/blueprints/corpora/json_routes.py
index 79283aaf..d41d5297 100644
--- a/app/blueprints/corpora/json_routes.py
+++ b/app/blueprints/corpora/json_routes.py
@@ -1,75 +1,14 @@
from datetime import datetime
-from flask import abort, current_app, request, url_for
+from flask import abort, request, url_for
from flask_login import current_user
-from threading import Thread
from app import db
from app.decorators import content_negotiation
from app.models import Corpus, CorpusFollowerRole
from . import bp
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
-import nltk
-from string import punctuation
-@bp.route('/', methods=['DELETE'])
-@corpus_owner_or_admin_required
-@content_negotiation(produces='application/json')
-def delete_corpus(corpus_id):
- def _delete_corpus(app, corpus_id):
- with app.app_context():
- corpus = Corpus.query.get(corpus_id)
- corpus.delete()
- db.session.commit()
- corpus = Corpus.query.get_or_404(corpus_id)
- thread = Thread(
- target=_delete_corpus,
- args=(current_app._get_current_object(), corpus.id)
- )
- thread.start()
- response_data = {
- 'message': f'Corpus "{corpus.title}" marked for deletion',
- 'category': 'corpus'
- }
- return response_data, 200
-
-
-@bp.route('//build', methods=['POST'])
-@corpus_follower_permission_required('MANAGE_FILES')
-@content_negotiation(produces='application/json')
-def build_corpus(corpus_id):
- def _build_corpus(app, corpus_id):
- with app.app_context():
- corpus = Corpus.query.get(corpus_id)
- corpus.build()
- db.session.commit()
-
- corpus = Corpus.query.get_or_404(corpus_id)
- if len(corpus.files.all()) == 0:
- abort(409)
- thread = Thread(
- target=_build_corpus,
- args=(current_app._get_current_object(), corpus_id)
- )
- thread.start()
- response_data = {
- 'message': f'Corpus "{corpus.title}" marked for building',
- 'category': 'corpus'
- }
- return response_data, 202
-
-@bp.route('/stopwords')
-@content_negotiation(produces='application/json')
-def get_stopwords():
- nltk.download('stopwords', quiet=True)
- languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
- stopwords = {}
- for language in languages:
- stopwords[language] = nltk.corpus.stopwords.words(language)
- stopwords['punctuation'] = list(punctuation) + ['—', '|', '–', '“', '„', '--']
- stopwords['user_stopwords'] = []
- response_data = stopwords
- return response_data, 202
@bp.route('//generate-share-link', methods=['POST'])
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
@@ -102,7 +41,7 @@ def generate_corpus_share_link(corpus_id):
'corpusShareLink': corpus_share_link
}
return response_data, 200
-
+
@bp.route('//is_public', methods=['PUT'])
diff --git a/app/blueprints/corpora/routes.py b/app/blueprints/corpora/routes.py
index 66975ea1..f4ab6e80 100644
--- a/app/blueprints/corpora/routes.py
+++ b/app/blueprints/corpora/routes.py
@@ -1,5 +1,18 @@
-from flask import abort, flash, redirect, render_template, url_for
+from datetime import datetime
+from flask import (
+ abort,
+ current_app,
+ flash,
+ Flask,
+ jsonify,
+ redirect,
+ render_template,
+ url_for
+)
from flask_login import current_user
+from string import punctuation
+from threading import Thread
+import nltk
from app import db
from app.models import (
Corpus,
@@ -12,6 +25,21 @@ from .decorators import corpus_follower_permission_required
from .forms import CreateCorpusForm
+
+def _delete_corpus(app: Flask, corpus_id: int):
+ with app.app_context():
+ corpus: Corpus = Corpus.query.get(corpus_id)
+ corpus.delete()
+ db.session.commit()
+
+
+def _build_corpus(app: Flask, corpus_id: int):
+ with app.app_context():
+ corpus = Corpus.query.get(corpus_id)
+ corpus.build()
+ db.session.commit()
+
+
@bp.route('')
def corpora():
return redirect(url_for('main.dashboard', _anchor='corpora'))
@@ -20,6 +48,7 @@ def corpora():
@bp.route('/create', methods=['GET', 'POST'])
def create_corpus():
form = CreateCorpusForm()
+
if form.validate_on_submit():
try:
corpus = Corpus.create(
@@ -30,8 +59,10 @@ def create_corpus():
except OSError:
abort(500)
db.session.commit()
+
flash(f'Corpus "{corpus.title}" created', 'corpus')
return redirect(corpus.url)
+
return render_template(
'corpora/create.html.j2',
title='Create corpus',
@@ -40,12 +71,14 @@ def create_corpus():
@bp.route('/')
-def corpus(corpus_id):
+def corpus(corpus_id: int):
corpus = Corpus.query.get_or_404(corpus_id)
- cfrs = CorpusFollowerRole.query.all()
- # TODO: Better solution for filtering admin
- users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all()
- cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
+
+ cfa = CorpusFollowerAssociation.query.filter_by(
+ corpus_id=corpus_id,
+ follower_id=current_user.id
+ ).first()
+
if cfa is None:
if corpus.user == current_user or current_user.is_administrator:
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
@@ -53,7 +86,21 @@ def corpus(corpus_id):
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
else:
cfr = cfa.role
- if corpus.user == current_user or current_user.is_administrator:
+
+ cfrs = CorpusFollowerRole.query.all()
+
+ # TODO: Better solution for filtering admin
+ users = User.query.filter(
+ User.is_public == True,
+ User.id != current_user.id,
+ User.id != corpus.user.id,
+ User.role_id < 4
+ ).all()
+
+ if (
+ corpus.user == current_user
+ or current_user.is_administrator
+ ):
return render_template(
'corpora/corpus.html.j2',
title=corpus.title,
@@ -62,8 +109,15 @@ def corpus(corpus_id):
cfrs=cfrs,
users=users
)
- if (current_user.is_following_corpus(corpus) or corpus.is_public):
- cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all()
+
+ if (
+ current_user.is_following_corpus(corpus)
+ or corpus.is_public
+ ):
+ cfas = CorpusFollowerAssociation.query.filter(
+ Corpus.id == corpus_id,
+ CorpusFollowerAssociation.follower_id != corpus.user.id
+ ).all()
return render_template(
'corpora/public_corpus.html.j2',
title=corpus.title,
@@ -73,37 +127,146 @@ def corpus(corpus_id):
cfas=cfas,
users=users
)
+
abort(403)
+@bp.route('/', methods=['DELETE'])
+def delete_corpus(corpus_id: int):
+ corpus = Corpus.query.get_or_404(corpus_id)
+
+ if not (
+ corpus.user == current_user
+ or current_user.is_administrator
+ ):
+ abort(403)
+
+ thread = Thread(
+ target=_delete_corpus,
+ args=(current_app._get_current_object(), corpus.id)
+ )
+ thread.start()
+
+ return jsonify(f'Corpus "{corpus.title}" marked for deletion.'), 202
+
+
+@bp.route('//build', methods=['PATCH'])
+def build_corpus(corpus_id: int):
+ corpus = Corpus.query.get_or_404(corpus_id)
+
+ cfa = CorpusFollowerAssociation.query.filter_by(
+ corpus_id=corpus_id,
+ follower_id=current_user.id
+ ).first()
+
+ if not (
+ cfa is not None and cfa.role.has_permission('MANAGE_FILES')
+ or corpus.user == current_user
+ or current_user.is_administrator
+ ):
+ abort(403)
+
+ if len(corpus.files.all()) == 0:
+ abort(409)
+
+ thread = Thread(
+ target=_build_corpus,
+ args=(current_app._get_current_object(), corpus.id)
+ )
+ thread.start()
+
+ return jsonify(f'Corpus "{corpus.title}" marked for building.'), 202
+
+
+@bp.route('//generate-share-link', methods=['POST'])
+def create_share_link(corpus_id: int, expiration_date: str, role_name: str):
+ if not isinstance(expiration_date, str):
+ abort(400)
+
+ if not isinstance(role_name, str):
+ abort(400)
+
+ corpus = Corpus.query.get_or_404(corpus_id)
+
+ cfa = CorpusFollowerAssociation.query.filter_by(
+ corpus_id=corpus_id,
+ follower_id=current_user.id
+ ).first()
+
+ if not (
+ cfa is not None and cfa.role.has_permission('MANAGE_FOLLOWERS')
+ or corpus.user == current_user
+ or current_user.is_administrator
+ ):
+ abort(403)
+
+ _expiration_date = datetime.strptime(expiration_date, '%b %d, %Y')
+
+ cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
+ if cfr is None:
+ abort(400)
+
+ token = current_user.generate_follow_corpus_token(
+ corpus.hashid,
+ role_name,
+ _expiration_date
+ )
+
+ corpus_share_link = url_for(
+ 'corpora.follow_corpus',
+ corpus_id=corpus_id,
+ token=token,
+ _external=True
+ )
+
+ return jsonify(corpus_share_link)
+
@bp.route('//analysis')
@corpus_follower_permission_required('VIEW')
-def analysis(corpus_id):
+def analysis(corpus_id: int):
corpus = Corpus.query.get_or_404(corpus_id)
+
return render_template(
'corpora/analysis.html.j2',
corpus=corpus,
title=f'Analyse Corpus {corpus.title}'
)
+@bp.route('//analysis/stopwords')
+def get_stopwords():
+ languages = [
+ 'german',
+ 'english',
+ 'catalan',
+ 'greek',
+ 'spanish',
+ 'french',
+ 'italian',
+ 'russian',
+ 'chinese'
+ ]
+
+ nltk.download('stopwords', quiet=True)
+ stopwords = {
+ language: nltk.corpus.stopwords.words(language)
+ for language in languages
+ }
+ stopwords['punctuation'] = list(punctuation)
+ stopwords['punctuation'] += ['—', '|', '–', '“', '„', '--']
+ stopwords['user_stopwords'] = []
+
+ return jsonify(stopwords)
+
@bp.route('//follow/')
-def follow_corpus(corpus_id, token):
+def follow_corpus(corpus_id: int, token: str):
corpus = Corpus.query.get_or_404(corpus_id)
- if current_user.follow_corpus_by_token(token):
- db.session.commit()
- flash(f'You are following "{corpus.title}" now', category='corpus')
- return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
- abort(403)
+ if not current_user.follow_corpus_by_token(token):
+ abort(403)
-@bp.route('/import', methods=['GET', 'POST'])
-def import_corpus():
- abort(503)
+ db.session.commit()
-
-@bp.route('//export')
-@corpus_follower_permission_required('VIEW')
-def export_corpus(corpus_id):
- abort(503)
+ flash(f'You are following "{corpus.title}" now', category='corpus')
+ return redirect(corpus.url)