mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-10-22 14:35:27 +00:00 
			
		
		
		
	Update corpora package
This commit is contained in:
		| @@ -1,75 +1,14 @@ | |||||||
| from datetime import datetime | from datetime import datetime | ||||||
| from flask import abort, current_app, request, url_for | from flask import abort, request, url_for | ||||||
| from flask_login import current_user | from flask_login import current_user | ||||||
| from threading import Thread |  | ||||||
| from app import db | from app import db | ||||||
| from app.decorators import content_negotiation | from app.decorators import content_negotiation | ||||||
| from app.models import Corpus, CorpusFollowerRole | from app.models import Corpus, CorpusFollowerRole | ||||||
| from . import bp | from . import bp | ||||||
| from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required | from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required | ||||||
| import nltk |  | ||||||
| from string import punctuation |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @bp.route('/<hashid:corpus_id>', methods=['DELETE']) |  | ||||||
| @corpus_owner_or_admin_required |  | ||||||
| @content_negotiation(produces='application/json') |  | ||||||
| def delete_corpus(corpus_id): |  | ||||||
|     def _delete_corpus(app, corpus_id): |  | ||||||
|         with app.app_context(): |  | ||||||
|             corpus = Corpus.query.get(corpus_id) |  | ||||||
|             corpus.delete() |  | ||||||
|             db.session.commit() |  | ||||||
|  |  | ||||||
|     corpus = Corpus.query.get_or_404(corpus_id) |  | ||||||
|     thread = Thread( |  | ||||||
|         target=_delete_corpus, |  | ||||||
|         args=(current_app._get_current_object(), corpus.id) |  | ||||||
|     ) |  | ||||||
|     thread.start() |  | ||||||
|     response_data = { |  | ||||||
|         'message': f'Corpus "{corpus.title}" marked for deletion', |  | ||||||
|         'category': 'corpus' |  | ||||||
|     } |  | ||||||
|     return response_data, 200 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @bp.route('/<hashid:corpus_id>/build', methods=['POST']) |  | ||||||
| @corpus_follower_permission_required('MANAGE_FILES') |  | ||||||
| @content_negotiation(produces='application/json') |  | ||||||
| def build_corpus(corpus_id): |  | ||||||
|     def _build_corpus(app, corpus_id): |  | ||||||
|         with app.app_context(): |  | ||||||
|             corpus = Corpus.query.get(corpus_id) |  | ||||||
|             corpus.build() |  | ||||||
|             db.session.commit() |  | ||||||
|  |  | ||||||
|     corpus = Corpus.query.get_or_404(corpus_id) |  | ||||||
|     if len(corpus.files.all()) == 0: |  | ||||||
|         abort(409) |  | ||||||
|     thread = Thread( |  | ||||||
|         target=_build_corpus, |  | ||||||
|         args=(current_app._get_current_object(), corpus_id) |  | ||||||
|     ) |  | ||||||
|     thread.start() |  | ||||||
|     response_data = { |  | ||||||
|         'message': f'Corpus "{corpus.title}" marked for building', |  | ||||||
|         'category': 'corpus' |  | ||||||
|     } |  | ||||||
|     return response_data, 202 |  | ||||||
|  |  | ||||||
| @bp.route('/stopwords') |  | ||||||
| @content_negotiation(produces='application/json') |  | ||||||
| def get_stopwords(): |  | ||||||
|     nltk.download('stopwords', quiet=True) |  | ||||||
|     languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"] |  | ||||||
|     stopwords = {} |  | ||||||
|     for language in languages: |  | ||||||
|         stopwords[language] = nltk.corpus.stopwords.words(language) |  | ||||||
|     stopwords['punctuation'] = list(punctuation) + ['—', '|', '–', '“', '„', '--'] |  | ||||||
|     stopwords['user_stopwords'] = [] |  | ||||||
|     response_data = stopwords |  | ||||||
|     return response_data, 202 |  | ||||||
|  |  | ||||||
| @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST']) | @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST']) | ||||||
| @corpus_follower_permission_required('MANAGE_FOLLOWERS') | @corpus_follower_permission_required('MANAGE_FOLLOWERS') | ||||||
| @@ -102,7 +41,7 @@ def generate_corpus_share_link(corpus_id): | |||||||
|         'corpusShareLink': corpus_share_link |         'corpusShareLink': corpus_share_link | ||||||
|     } |     } | ||||||
|     return response_data, 200 |     return response_data, 200 | ||||||
|      |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @bp.route('/<hashid:corpus_id>/is_public', methods=['PUT']) | @bp.route('/<hashid:corpus_id>/is_public', methods=['PUT']) | ||||||
|   | |||||||
| @@ -1,5 +1,18 @@ | |||||||
| from flask import abort, flash, redirect, render_template, url_for | from datetime import datetime | ||||||
|  | from flask import ( | ||||||
|  |     abort, | ||||||
|  |     current_app, | ||||||
|  |     flash, | ||||||
|  |     Flask, | ||||||
|  |     jsonify, | ||||||
|  |     redirect, | ||||||
|  |     render_template, | ||||||
|  |     url_for | ||||||
|  | ) | ||||||
| from flask_login import current_user | from flask_login import current_user | ||||||
|  | from string import punctuation | ||||||
|  | from threading import Thread | ||||||
|  | import nltk | ||||||
| from app import db | from app import db | ||||||
| from app.models import ( | from app.models import ( | ||||||
|     Corpus, |     Corpus, | ||||||
| @@ -12,6 +25,21 @@ from .decorators import corpus_follower_permission_required | |||||||
| from .forms import CreateCorpusForm | from .forms import CreateCorpusForm | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def _delete_corpus(app: Flask, corpus_id: int): | ||||||
|  |     with app.app_context(): | ||||||
|  |         corpus: Corpus = Corpus.query.get(corpus_id) | ||||||
|  |         corpus.delete() | ||||||
|  |         db.session.commit() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def _build_corpus(app: Flask, corpus_id: int): | ||||||
|  |     with app.app_context(): | ||||||
|  |         corpus = Corpus.query.get(corpus_id) | ||||||
|  |         corpus.build() | ||||||
|  |         db.session.commit() | ||||||
|  |  | ||||||
|  |  | ||||||
| @bp.route('') | @bp.route('') | ||||||
| def corpora(): | def corpora(): | ||||||
|     return redirect(url_for('main.dashboard', _anchor='corpora')) |     return redirect(url_for('main.dashboard', _anchor='corpora')) | ||||||
| @@ -20,6 +48,7 @@ def corpora(): | |||||||
| @bp.route('/create', methods=['GET', 'POST']) | @bp.route('/create', methods=['GET', 'POST']) | ||||||
| def create_corpus(): | def create_corpus(): | ||||||
|     form = CreateCorpusForm() |     form = CreateCorpusForm() | ||||||
|  |  | ||||||
|     if form.validate_on_submit(): |     if form.validate_on_submit(): | ||||||
|         try: |         try: | ||||||
|             corpus = Corpus.create( |             corpus = Corpus.create( | ||||||
| @@ -30,8 +59,10 @@ def create_corpus(): | |||||||
|         except OSError: |         except OSError: | ||||||
|             abort(500) |             abort(500) | ||||||
|         db.session.commit() |         db.session.commit() | ||||||
|  |  | ||||||
|         flash(f'Corpus "{corpus.title}" created', 'corpus') |         flash(f'Corpus "{corpus.title}" created', 'corpus') | ||||||
|         return redirect(corpus.url) |         return redirect(corpus.url) | ||||||
|  |  | ||||||
|     return render_template( |     return render_template( | ||||||
|         'corpora/create.html.j2', |         'corpora/create.html.j2', | ||||||
|         title='Create corpus', |         title='Create corpus', | ||||||
| @@ -40,12 +71,14 @@ def create_corpus(): | |||||||
|  |  | ||||||
|  |  | ||||||
| @bp.route('/<hashid:corpus_id>') | @bp.route('/<hashid:corpus_id>') | ||||||
| def corpus(corpus_id): | def corpus(corpus_id: int): | ||||||
|     corpus = Corpus.query.get_or_404(corpus_id) |     corpus = Corpus.query.get_or_404(corpus_id) | ||||||
|     cfrs = CorpusFollowerRole.query.all() |  | ||||||
|     # TODO: Better solution for filtering admin |     cfa = CorpusFollowerAssociation.query.filter_by( | ||||||
|     users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all() |         corpus_id=corpus_id, | ||||||
|     cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first() |         follower_id=current_user.id | ||||||
|  |     ).first() | ||||||
|  |  | ||||||
|     if cfa is None: |     if cfa is None: | ||||||
|         if corpus.user == current_user or current_user.is_administrator: |         if corpus.user == current_user or current_user.is_administrator: | ||||||
|             cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first() |             cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first() | ||||||
| @@ -53,7 +86,21 @@ def corpus(corpus_id): | |||||||
|             cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first() |             cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first() | ||||||
|     else: |     else: | ||||||
|         cfr = cfa.role |         cfr = cfa.role | ||||||
|     if corpus.user == current_user or current_user.is_administrator: |  | ||||||
|  |     cfrs = CorpusFollowerRole.query.all() | ||||||
|  |  | ||||||
|  |     # TODO: Better solution for filtering admin | ||||||
|  |     users = User.query.filter( | ||||||
|  |         User.is_public == True, | ||||||
|  |         User.id != current_user.id, | ||||||
|  |         User.id != corpus.user.id, | ||||||
|  |         User.role_id < 4 | ||||||
|  |     ).all() | ||||||
|  |  | ||||||
|  |     if ( | ||||||
|  |         corpus.user == current_user | ||||||
|  |         or current_user.is_administrator | ||||||
|  |     ): | ||||||
|         return render_template( |         return render_template( | ||||||
|             'corpora/corpus.html.j2', |             'corpora/corpus.html.j2', | ||||||
|             title=corpus.title, |             title=corpus.title, | ||||||
| @@ -62,8 +109,15 @@ def corpus(corpus_id): | |||||||
|             cfrs=cfrs, |             cfrs=cfrs, | ||||||
|             users=users |             users=users | ||||||
|         ) |         ) | ||||||
|     if (current_user.is_following_corpus(corpus) or corpus.is_public): |  | ||||||
|         cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all() |     if ( | ||||||
|  |         current_user.is_following_corpus(corpus) | ||||||
|  |         or corpus.is_public | ||||||
|  |     ): | ||||||
|  |         cfas = CorpusFollowerAssociation.query.filter( | ||||||
|  |             Corpus.id == corpus_id, | ||||||
|  |             CorpusFollowerAssociation.follower_id != corpus.user.id | ||||||
|  |         ).all() | ||||||
|         return render_template( |         return render_template( | ||||||
|             'corpora/public_corpus.html.j2', |             'corpora/public_corpus.html.j2', | ||||||
|             title=corpus.title, |             title=corpus.title, | ||||||
| @@ -73,37 +127,146 @@ def corpus(corpus_id): | |||||||
|             cfas=cfas, |             cfas=cfas, | ||||||
|             users=users |             users=users | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|     abort(403) |     abort(403) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @bp.route('/<hashid:corpus_id>', methods=['DELETE']) | ||||||
|  | def delete_corpus(corpus_id: int): | ||||||
|  |     corpus = Corpus.query.get_or_404(corpus_id) | ||||||
|  |  | ||||||
|  |     if not ( | ||||||
|  |         corpus.user == current_user | ||||||
|  |         or current_user.is_administrator | ||||||
|  |     ): | ||||||
|  |         abort(403) | ||||||
|  |  | ||||||
|  |     thread = Thread( | ||||||
|  |         target=_delete_corpus, | ||||||
|  |         args=(current_app._get_current_object(), corpus.id) | ||||||
|  |     ) | ||||||
|  |     thread.start() | ||||||
|  |  | ||||||
|  |     return jsonify(f'Corpus "{corpus.title}" marked for deletion.'), 202 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @bp.route('/<hashid:corpus_id>/build', methods=['PATCH']) | ||||||
|  | def build_corpus(corpus_id: int): | ||||||
|  |     corpus = Corpus.query.get_or_404(corpus_id) | ||||||
|  |  | ||||||
|  |     cfa = CorpusFollowerAssociation.query.filter_by( | ||||||
|  |         corpus_id=corpus_id, | ||||||
|  |         follower_id=current_user.id | ||||||
|  |     ).first() | ||||||
|  |  | ||||||
|  |     if not ( | ||||||
|  |         cfa is not None and cfa.role.has_permission('MANAGE_FILES') | ||||||
|  |         or corpus.user == current_user | ||||||
|  |         or current_user.is_administrator | ||||||
|  |     ): | ||||||
|  |         abort(403) | ||||||
|  |  | ||||||
|  |     if len(corpus.files.all()) == 0: | ||||||
|  |         abort(409) | ||||||
|  |  | ||||||
|  |     thread = Thread( | ||||||
|  |         target=_build_corpus, | ||||||
|  |         args=(current_app._get_current_object(), corpus.id) | ||||||
|  |     ) | ||||||
|  |     thread.start() | ||||||
|  |  | ||||||
|  |     return jsonify(f'Corpus "{corpus.title}" marked for building.'), 202 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST']) | ||||||
|  | def create_share_link(corpus_id: int, expiration_date: str, role_name: str): | ||||||
|  |     if not isinstance(expiration_date, str): | ||||||
|  |         abort(400) | ||||||
|  |  | ||||||
|  |     if not isinstance(role_name, str): | ||||||
|  |         abort(400) | ||||||
|  |  | ||||||
|  |     corpus = Corpus.query.get_or_404(corpus_id) | ||||||
|  |  | ||||||
|  |     cfa = CorpusFollowerAssociation.query.filter_by( | ||||||
|  |         corpus_id=corpus_id, | ||||||
|  |         follower_id=current_user.id | ||||||
|  |     ).first() | ||||||
|  |  | ||||||
|  |     if not ( | ||||||
|  |         cfa is not None and cfa.role.has_permission('MANAGE_FOLLOWERS') | ||||||
|  |         or corpus.user == current_user | ||||||
|  |         or current_user.is_administrator | ||||||
|  |     ): | ||||||
|  |         abort(403) | ||||||
|  |  | ||||||
|  |     _expiration_date = datetime.strptime(expiration_date, '%b %d, %Y') | ||||||
|  |  | ||||||
|  |     cfr = CorpusFollowerRole.query.filter_by(name=role_name).first() | ||||||
|  |     if cfr is None: | ||||||
|  |         abort(400) | ||||||
|  |  | ||||||
|  |     token = current_user.generate_follow_corpus_token( | ||||||
|  |         corpus.hashid, | ||||||
|  |         role_name, | ||||||
|  |         _expiration_date | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     corpus_share_link = url_for( | ||||||
|  |         'corpora.follow_corpus', | ||||||
|  |         corpus_id=corpus_id, | ||||||
|  |         token=token, | ||||||
|  |         _external=True | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     return jsonify(corpus_share_link) | ||||||
|  |  | ||||||
|  |  | ||||||
| @bp.route('/<hashid:corpus_id>/analysis') | @bp.route('/<hashid:corpus_id>/analysis') | ||||||
| @corpus_follower_permission_required('VIEW') | @corpus_follower_permission_required('VIEW') | ||||||
| def analysis(corpus_id): | def analysis(corpus_id: int): | ||||||
|     corpus = Corpus.query.get_or_404(corpus_id) |     corpus = Corpus.query.get_or_404(corpus_id) | ||||||
|  |  | ||||||
|     return render_template( |     return render_template( | ||||||
|         'corpora/analysis.html.j2', |         'corpora/analysis.html.j2', | ||||||
|         corpus=corpus, |         corpus=corpus, | ||||||
|         title=f'Analyse Corpus {corpus.title}' |         title=f'Analyse Corpus {corpus.title}' | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  | @bp.route('/<hashid:corpus_id>/analysis/stopwords') | ||||||
|  | def get_stopwords(): | ||||||
|  |         languages = [ | ||||||
|  |             'german', | ||||||
|  |             'english', | ||||||
|  |             'catalan', | ||||||
|  |             'greek', | ||||||
|  |             'spanish', | ||||||
|  |             'french', | ||||||
|  |             'italian', | ||||||
|  |             'russian', | ||||||
|  |             'chinese' | ||||||
|  |         ] | ||||||
|  |  | ||||||
|  |         nltk.download('stopwords', quiet=True) | ||||||
|  |         stopwords = { | ||||||
|  |             language: nltk.corpus.stopwords.words(language) | ||||||
|  |             for language in languages | ||||||
|  |         } | ||||||
|  |         stopwords['punctuation'] = list(punctuation) | ||||||
|  |         stopwords['punctuation'] += ['—', '|', '–', '“', '„', '--'] | ||||||
|  |         stopwords['user_stopwords'] = [] | ||||||
|  |  | ||||||
|  |         return jsonify(stopwords) | ||||||
|  |  | ||||||
|  |  | ||||||
| @bp.route('/<hashid:corpus_id>/follow/<token>') | @bp.route('/<hashid:corpus_id>/follow/<token>') | ||||||
| def follow_corpus(corpus_id, token): | def follow_corpus(corpus_id: int, token: str): | ||||||
|     corpus = Corpus.query.get_or_404(corpus_id) |     corpus = Corpus.query.get_or_404(corpus_id) | ||||||
|     if current_user.follow_corpus_by_token(token): |  | ||||||
|         db.session.commit() |  | ||||||
|         flash(f'You are following "{corpus.title}" now', category='corpus') |  | ||||||
|         return redirect(url_for('corpora.corpus', corpus_id=corpus_id)) |  | ||||||
|     abort(403) |  | ||||||
|  |  | ||||||
|  |     if not current_user.follow_corpus_by_token(token): | ||||||
|  |         abort(403) | ||||||
|  |  | ||||||
| @bp.route('/import', methods=['GET', 'POST']) |     db.session.commit() | ||||||
| def import_corpus(): |  | ||||||
|     abort(503) |  | ||||||
|  |  | ||||||
|  |     flash(f'You are following "{corpus.title}" now', category='corpus') | ||||||
| @bp.route('/<hashid:corpus_id>/export') |     return redirect(corpus.url) | ||||||
| @corpus_follower_permission_required('VIEW') |  | ||||||
| def export_corpus(corpus_id): |  | ||||||
|     abort(503) |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user