From 6c1f48eb2f208e1ace79880755eaeaadcc7e517a Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Mon, 16 Dec 2024 10:09:54 +0100 Subject: [PATCH] Update corpora package --- app/blueprints/corpora/json_routes.py | 65 +------- app/blueprints/corpora/routes.py | 211 +++++++++++++++++++++++--- 2 files changed, 189 insertions(+), 87 deletions(-) diff --git a/app/blueprints/corpora/json_routes.py b/app/blueprints/corpora/json_routes.py index 79283aaf..d41d5297 100644 --- a/app/blueprints/corpora/json_routes.py +++ b/app/blueprints/corpora/json_routes.py @@ -1,75 +1,14 @@ from datetime import datetime -from flask import abort, current_app, request, url_for +from flask import abort, request, url_for from flask_login import current_user -from threading import Thread from app import db from app.decorators import content_negotiation from app.models import Corpus, CorpusFollowerRole from . import bp from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required -import nltk -from string import punctuation -@bp.route('/', methods=['DELETE']) -@corpus_owner_or_admin_required -@content_negotiation(produces='application/json') -def delete_corpus(corpus_id): - def _delete_corpus(app, corpus_id): - with app.app_context(): - corpus = Corpus.query.get(corpus_id) - corpus.delete() - db.session.commit() - corpus = Corpus.query.get_or_404(corpus_id) - thread = Thread( - target=_delete_corpus, - args=(current_app._get_current_object(), corpus.id) - ) - thread.start() - response_data = { - 'message': f'Corpus "{corpus.title}" marked for deletion', - 'category': 'corpus' - } - return response_data, 200 - - -@bp.route('//build', methods=['POST']) -@corpus_follower_permission_required('MANAGE_FILES') -@content_negotiation(produces='application/json') -def build_corpus(corpus_id): - def _build_corpus(app, corpus_id): - with app.app_context(): - corpus = Corpus.query.get(corpus_id) - corpus.build() - db.session.commit() - - corpus = Corpus.query.get_or_404(corpus_id) - if len(corpus.files.all()) == 0: - abort(409) - thread = Thread( - target=_build_corpus, - args=(current_app._get_current_object(), corpus_id) - ) - thread.start() - response_data = { - 'message': f'Corpus "{corpus.title}" marked for building', - 'category': 'corpus' - } - return response_data, 202 - -@bp.route('/stopwords') -@content_negotiation(produces='application/json') -def get_stopwords(): - nltk.download('stopwords', quiet=True) - languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"] - stopwords = {} - for language in languages: - stopwords[language] = nltk.corpus.stopwords.words(language) - stopwords['punctuation'] = list(punctuation) + ['—', '|', '–', '“', '„', '--'] - stopwords['user_stopwords'] = [] - response_data = stopwords - return response_data, 202 @bp.route('//generate-share-link', methods=['POST']) @corpus_follower_permission_required('MANAGE_FOLLOWERS') @@ -102,7 +41,7 @@ def generate_corpus_share_link(corpus_id): 'corpusShareLink': corpus_share_link } return response_data, 200 - + @bp.route('//is_public', methods=['PUT']) diff --git a/app/blueprints/corpora/routes.py b/app/blueprints/corpora/routes.py index 66975ea1..f4ab6e80 100644 --- a/app/blueprints/corpora/routes.py +++ b/app/blueprints/corpora/routes.py @@ -1,5 +1,18 @@ -from flask import abort, flash, redirect, render_template, url_for +from datetime import datetime +from flask import ( + abort, + current_app, + flash, + Flask, + jsonify, + redirect, + render_template, + url_for +) from flask_login import current_user +from string import punctuation +from threading import Thread +import nltk from app import db from app.models import ( Corpus, @@ -12,6 +25,21 @@ from .decorators import corpus_follower_permission_required from .forms import CreateCorpusForm + +def _delete_corpus(app: Flask, corpus_id: int): + with app.app_context(): + corpus: Corpus = Corpus.query.get(corpus_id) + corpus.delete() + db.session.commit() + + +def _build_corpus(app: Flask, corpus_id: int): + with app.app_context(): + corpus = Corpus.query.get(corpus_id) + corpus.build() + db.session.commit() + + @bp.route('') def corpora(): return redirect(url_for('main.dashboard', _anchor='corpora')) @@ -20,6 +48,7 @@ def corpora(): @bp.route('/create', methods=['GET', 'POST']) def create_corpus(): form = CreateCorpusForm() + if form.validate_on_submit(): try: corpus = Corpus.create( @@ -30,8 +59,10 @@ def create_corpus(): except OSError: abort(500) db.session.commit() + flash(f'Corpus "{corpus.title}" created', 'corpus') return redirect(corpus.url) + return render_template( 'corpora/create.html.j2', title='Create corpus', @@ -40,12 +71,14 @@ def create_corpus(): @bp.route('/') -def corpus(corpus_id): +def corpus(corpus_id: int): corpus = Corpus.query.get_or_404(corpus_id) - cfrs = CorpusFollowerRole.query.all() - # TODO: Better solution for filtering admin - users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all() - cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first() + + cfa = CorpusFollowerAssociation.query.filter_by( + corpus_id=corpus_id, + follower_id=current_user.id + ).first() + if cfa is None: if corpus.user == current_user or current_user.is_administrator: cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first() @@ -53,7 +86,21 @@ def corpus(corpus_id): cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first() else: cfr = cfa.role - if corpus.user == current_user or current_user.is_administrator: + + cfrs = CorpusFollowerRole.query.all() + + # TODO: Better solution for filtering admin + users = User.query.filter( + User.is_public == True, + User.id != current_user.id, + User.id != corpus.user.id, + User.role_id < 4 + ).all() + + if ( + corpus.user == current_user + or current_user.is_administrator + ): return render_template( 'corpora/corpus.html.j2', title=corpus.title, @@ -62,8 +109,15 @@ def corpus(corpus_id): cfrs=cfrs, users=users ) - if (current_user.is_following_corpus(corpus) or corpus.is_public): - cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all() + + if ( + current_user.is_following_corpus(corpus) + or corpus.is_public + ): + cfas = CorpusFollowerAssociation.query.filter( + Corpus.id == corpus_id, + CorpusFollowerAssociation.follower_id != corpus.user.id + ).all() return render_template( 'corpora/public_corpus.html.j2', title=corpus.title, @@ -73,37 +127,146 @@ def corpus(corpus_id): cfas=cfas, users=users ) + abort(403) +@bp.route('/', methods=['DELETE']) +def delete_corpus(corpus_id: int): + corpus = Corpus.query.get_or_404(corpus_id) + + if not ( + corpus.user == current_user + or current_user.is_administrator + ): + abort(403) + + thread = Thread( + target=_delete_corpus, + args=(current_app._get_current_object(), corpus.id) + ) + thread.start() + + return jsonify(f'Corpus "{corpus.title}" marked for deletion.'), 202 + + +@bp.route('//build', methods=['PATCH']) +def build_corpus(corpus_id: int): + corpus = Corpus.query.get_or_404(corpus_id) + + cfa = CorpusFollowerAssociation.query.filter_by( + corpus_id=corpus_id, + follower_id=current_user.id + ).first() + + if not ( + cfa is not None and cfa.role.has_permission('MANAGE_FILES') + or corpus.user == current_user + or current_user.is_administrator + ): + abort(403) + + if len(corpus.files.all()) == 0: + abort(409) + + thread = Thread( + target=_build_corpus, + args=(current_app._get_current_object(), corpus.id) + ) + thread.start() + + return jsonify(f'Corpus "{corpus.title}" marked for building.'), 202 + + +@bp.route('//generate-share-link', methods=['POST']) +def create_share_link(corpus_id: int, expiration_date: str, role_name: str): + if not isinstance(expiration_date, str): + abort(400) + + if not isinstance(role_name, str): + abort(400) + + corpus = Corpus.query.get_or_404(corpus_id) + + cfa = CorpusFollowerAssociation.query.filter_by( + corpus_id=corpus_id, + follower_id=current_user.id + ).first() + + if not ( + cfa is not None and cfa.role.has_permission('MANAGE_FOLLOWERS') + or corpus.user == current_user + or current_user.is_administrator + ): + abort(403) + + _expiration_date = datetime.strptime(expiration_date, '%b %d, %Y') + + cfr = CorpusFollowerRole.query.filter_by(name=role_name).first() + if cfr is None: + abort(400) + + token = current_user.generate_follow_corpus_token( + corpus.hashid, + role_name, + _expiration_date + ) + + corpus_share_link = url_for( + 'corpora.follow_corpus', + corpus_id=corpus_id, + token=token, + _external=True + ) + + return jsonify(corpus_share_link) + @bp.route('//analysis') @corpus_follower_permission_required('VIEW') -def analysis(corpus_id): +def analysis(corpus_id: int): corpus = Corpus.query.get_or_404(corpus_id) + return render_template( 'corpora/analysis.html.j2', corpus=corpus, title=f'Analyse Corpus {corpus.title}' ) +@bp.route('//analysis/stopwords') +def get_stopwords(): + languages = [ + 'german', + 'english', + 'catalan', + 'greek', + 'spanish', + 'french', + 'italian', + 'russian', + 'chinese' + ] + + nltk.download('stopwords', quiet=True) + stopwords = { + language: nltk.corpus.stopwords.words(language) + for language in languages + } + stopwords['punctuation'] = list(punctuation) + stopwords['punctuation'] += ['—', '|', '–', '“', '„', '--'] + stopwords['user_stopwords'] = [] + + return jsonify(stopwords) + @bp.route('//follow/') -def follow_corpus(corpus_id, token): +def follow_corpus(corpus_id: int, token: str): corpus = Corpus.query.get_or_404(corpus_id) - if current_user.follow_corpus_by_token(token): - db.session.commit() - flash(f'You are following "{corpus.title}" now', category='corpus') - return redirect(url_for('corpora.corpus', corpus_id=corpus_id)) - abort(403) + if not current_user.follow_corpus_by_token(token): + abort(403) -@bp.route('/import', methods=['GET', 'POST']) -def import_corpus(): - abort(503) + db.session.commit() - -@bp.route('//export') -@corpus_follower_permission_required('VIEW') -def export_corpus(corpus_id): - abort(503) + flash(f'You are following "{corpus.title}" now', category='corpus') + return redirect(corpus.url)