mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 12:22:47 +00:00 
			
		
		
		
	Update corpora package
This commit is contained in:
		@@ -1,75 +1,14 @@
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from flask import abort, current_app, request, url_for
 | 
			
		||||
from flask import abort, request, url_for
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from threading import Thread
 | 
			
		||||
from app import db
 | 
			
		||||
from app.decorators import content_negotiation
 | 
			
		||||
from app.models import Corpus, CorpusFollowerRole
 | 
			
		||||
from . import bp
 | 
			
		||||
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
 | 
			
		||||
import nltk
 | 
			
		||||
from string import punctuation
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
 | 
			
		||||
@corpus_owner_or_admin_required
 | 
			
		||||
@content_negotiation(produces='application/json')
 | 
			
		||||
def delete_corpus(corpus_id):
 | 
			
		||||
    def _delete_corpus(app, corpus_id):
 | 
			
		||||
        with app.app_context():
 | 
			
		||||
            corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
            corpus.delete()
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
    thread = Thread(
 | 
			
		||||
        target=_delete_corpus,
 | 
			
		||||
        args=(current_app._get_current_object(), corpus.id)
 | 
			
		||||
    )
 | 
			
		||||
    thread.start()
 | 
			
		||||
    response_data = {
 | 
			
		||||
        'message': f'Corpus "{corpus.title}" marked for deletion',
 | 
			
		||||
        'category': 'corpus'
 | 
			
		||||
    }
 | 
			
		||||
    return response_data, 200
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>/build', methods=['POST'])
 | 
			
		||||
@corpus_follower_permission_required('MANAGE_FILES')
 | 
			
		||||
@content_negotiation(produces='application/json')
 | 
			
		||||
def build_corpus(corpus_id):
 | 
			
		||||
    def _build_corpus(app, corpus_id):
 | 
			
		||||
        with app.app_context():
 | 
			
		||||
            corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
            corpus.build()
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
    if len(corpus.files.all()) == 0:
 | 
			
		||||
        abort(409)
 | 
			
		||||
    thread = Thread(
 | 
			
		||||
        target=_build_corpus,
 | 
			
		||||
        args=(current_app._get_current_object(), corpus_id)
 | 
			
		||||
    )
 | 
			
		||||
    thread.start()
 | 
			
		||||
    response_data = {
 | 
			
		||||
        'message': f'Corpus "{corpus.title}" marked for building',
 | 
			
		||||
        'category': 'corpus'
 | 
			
		||||
    }
 | 
			
		||||
    return response_data, 202
 | 
			
		||||
 | 
			
		||||
@bp.route('/stopwords')
 | 
			
		||||
@content_negotiation(produces='application/json')
 | 
			
		||||
def get_stopwords():
 | 
			
		||||
    nltk.download('stopwords', quiet=True)
 | 
			
		||||
    languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
 | 
			
		||||
    stopwords = {}
 | 
			
		||||
    for language in languages:
 | 
			
		||||
        stopwords[language] = nltk.corpus.stopwords.words(language)
 | 
			
		||||
    stopwords['punctuation'] = list(punctuation) + ['—', '|', '–', '“', '„', '--']
 | 
			
		||||
    stopwords['user_stopwords'] = []
 | 
			
		||||
    response_data = stopwords
 | 
			
		||||
    return response_data, 202
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
 | 
			
		||||
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
 | 
			
		||||
@@ -102,7 +41,7 @@ def generate_corpus_share_link(corpus_id):
 | 
			
		||||
        'corpusShareLink': corpus_share_link
 | 
			
		||||
    }
 | 
			
		||||
    return response_data, 200
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,18 @@
 | 
			
		||||
from flask import abort, flash, redirect, render_template, url_for
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from flask import (
 | 
			
		||||
    abort,
 | 
			
		||||
    current_app,
 | 
			
		||||
    flash,
 | 
			
		||||
    Flask,
 | 
			
		||||
    jsonify,
 | 
			
		||||
    redirect,
 | 
			
		||||
    render_template,
 | 
			
		||||
    url_for
 | 
			
		||||
)
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from string import punctuation
 | 
			
		||||
from threading import Thread
 | 
			
		||||
import nltk
 | 
			
		||||
from app import db
 | 
			
		||||
from app.models import (
 | 
			
		||||
    Corpus,
 | 
			
		||||
@@ -12,6 +25,21 @@ from .decorators import corpus_follower_permission_required
 | 
			
		||||
from .forms import CreateCorpusForm
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _delete_corpus(app: Flask, corpus_id: int):
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
        corpus: Corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
        corpus.delete()
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _build_corpus(app: Flask, corpus_id: int):
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
        corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
        corpus.build()
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('')
 | 
			
		||||
def corpora():
 | 
			
		||||
    return redirect(url_for('main.dashboard', _anchor='corpora'))
 | 
			
		||||
@@ -20,6 +48,7 @@ def corpora():
 | 
			
		||||
@bp.route('/create', methods=['GET', 'POST'])
 | 
			
		||||
def create_corpus():
 | 
			
		||||
    form = CreateCorpusForm()
 | 
			
		||||
 | 
			
		||||
    if form.validate_on_submit():
 | 
			
		||||
        try:
 | 
			
		||||
            corpus = Corpus.create(
 | 
			
		||||
@@ -30,8 +59,10 @@ def create_corpus():
 | 
			
		||||
        except OSError:
 | 
			
		||||
            abort(500)
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
        flash(f'Corpus "{corpus.title}" created', 'corpus')
 | 
			
		||||
        return redirect(corpus.url)
 | 
			
		||||
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'corpora/create.html.j2',
 | 
			
		||||
        title='Create corpus',
 | 
			
		||||
@@ -40,12 +71,14 @@ def create_corpus():
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>')
 | 
			
		||||
def corpus(corpus_id):
 | 
			
		||||
def corpus(corpus_id: int):
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
    cfrs = CorpusFollowerRole.query.all()
 | 
			
		||||
    # TODO: Better solution for filtering admin
 | 
			
		||||
    users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all()
 | 
			
		||||
    cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
 | 
			
		||||
 | 
			
		||||
    cfa = CorpusFollowerAssociation.query.filter_by(
 | 
			
		||||
        corpus_id=corpus_id,
 | 
			
		||||
        follower_id=current_user.id
 | 
			
		||||
    ).first()
 | 
			
		||||
 | 
			
		||||
    if cfa is None:
 | 
			
		||||
        if corpus.user == current_user or current_user.is_administrator:
 | 
			
		||||
            cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
 | 
			
		||||
@@ -53,7 +86,21 @@ def corpus(corpus_id):
 | 
			
		||||
            cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
 | 
			
		||||
    else:
 | 
			
		||||
        cfr = cfa.role
 | 
			
		||||
    if corpus.user == current_user or current_user.is_administrator:
 | 
			
		||||
 | 
			
		||||
    cfrs = CorpusFollowerRole.query.all()
 | 
			
		||||
 | 
			
		||||
    # TODO: Better solution for filtering admin
 | 
			
		||||
    users = User.query.filter(
 | 
			
		||||
        User.is_public == True,
 | 
			
		||||
        User.id != current_user.id,
 | 
			
		||||
        User.id != corpus.user.id,
 | 
			
		||||
        User.role_id < 4
 | 
			
		||||
    ).all()
 | 
			
		||||
 | 
			
		||||
    if (
 | 
			
		||||
        corpus.user == current_user
 | 
			
		||||
        or current_user.is_administrator
 | 
			
		||||
    ):
 | 
			
		||||
        return render_template(
 | 
			
		||||
            'corpora/corpus.html.j2',
 | 
			
		||||
            title=corpus.title,
 | 
			
		||||
@@ -62,8 +109,15 @@ def corpus(corpus_id):
 | 
			
		||||
            cfrs=cfrs,
 | 
			
		||||
            users=users
 | 
			
		||||
        )
 | 
			
		||||
    if (current_user.is_following_corpus(corpus) or corpus.is_public):
 | 
			
		||||
        cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all()
 | 
			
		||||
 | 
			
		||||
    if (
 | 
			
		||||
        current_user.is_following_corpus(corpus)
 | 
			
		||||
        or corpus.is_public
 | 
			
		||||
    ):
 | 
			
		||||
        cfas = CorpusFollowerAssociation.query.filter(
 | 
			
		||||
            Corpus.id == corpus_id,
 | 
			
		||||
            CorpusFollowerAssociation.follower_id != corpus.user.id
 | 
			
		||||
        ).all()
 | 
			
		||||
        return render_template(
 | 
			
		||||
            'corpora/public_corpus.html.j2',
 | 
			
		||||
            title=corpus.title,
 | 
			
		||||
@@ -73,37 +127,146 @@ def corpus(corpus_id):
 | 
			
		||||
            cfas=cfas,
 | 
			
		||||
            users=users
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    abort(403)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
 | 
			
		||||
def delete_corpus(corpus_id: int):
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
 | 
			
		||||
    if not (
 | 
			
		||||
        corpus.user == current_user
 | 
			
		||||
        or current_user.is_administrator
 | 
			
		||||
    ):
 | 
			
		||||
        abort(403)
 | 
			
		||||
 | 
			
		||||
    thread = Thread(
 | 
			
		||||
        target=_delete_corpus,
 | 
			
		||||
        args=(current_app._get_current_object(), corpus.id)
 | 
			
		||||
    )
 | 
			
		||||
    thread.start()
 | 
			
		||||
 | 
			
		||||
    return jsonify(f'Corpus "{corpus.title}" marked for deletion.'), 202
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>/build', methods=['PATCH'])
 | 
			
		||||
def build_corpus(corpus_id: int):
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
 | 
			
		||||
    cfa = CorpusFollowerAssociation.query.filter_by(
 | 
			
		||||
        corpus_id=corpus_id,
 | 
			
		||||
        follower_id=current_user.id
 | 
			
		||||
    ).first()
 | 
			
		||||
 | 
			
		||||
    if not (
 | 
			
		||||
        cfa is not None and cfa.role.has_permission('MANAGE_FILES')
 | 
			
		||||
        or corpus.user == current_user
 | 
			
		||||
        or current_user.is_administrator
 | 
			
		||||
    ):
 | 
			
		||||
        abort(403)
 | 
			
		||||
 | 
			
		||||
    if len(corpus.files.all()) == 0:
 | 
			
		||||
        abort(409)
 | 
			
		||||
 | 
			
		||||
    thread = Thread(
 | 
			
		||||
        target=_build_corpus,
 | 
			
		||||
        args=(current_app._get_current_object(), corpus.id)
 | 
			
		||||
    )
 | 
			
		||||
    thread.start()
 | 
			
		||||
 | 
			
		||||
    return jsonify(f'Corpus "{corpus.title}" marked for building.'), 202
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
 | 
			
		||||
def create_share_link(corpus_id: int, expiration_date: str, role_name: str):
 | 
			
		||||
    if not isinstance(expiration_date, str):
 | 
			
		||||
        abort(400)
 | 
			
		||||
 | 
			
		||||
    if not isinstance(role_name, str):
 | 
			
		||||
        abort(400)
 | 
			
		||||
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
 | 
			
		||||
    cfa = CorpusFollowerAssociation.query.filter_by(
 | 
			
		||||
        corpus_id=corpus_id,
 | 
			
		||||
        follower_id=current_user.id
 | 
			
		||||
    ).first()
 | 
			
		||||
 | 
			
		||||
    if not (
 | 
			
		||||
        cfa is not None and cfa.role.has_permission('MANAGE_FOLLOWERS')
 | 
			
		||||
        or corpus.user == current_user
 | 
			
		||||
        or current_user.is_administrator
 | 
			
		||||
    ):
 | 
			
		||||
        abort(403)
 | 
			
		||||
 | 
			
		||||
    _expiration_date = datetime.strptime(expiration_date, '%b %d, %Y')
 | 
			
		||||
 | 
			
		||||
    cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
 | 
			
		||||
    if cfr is None:
 | 
			
		||||
        abort(400)
 | 
			
		||||
 | 
			
		||||
    token = current_user.generate_follow_corpus_token(
 | 
			
		||||
        corpus.hashid,
 | 
			
		||||
        role_name,
 | 
			
		||||
        _expiration_date
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    corpus_share_link = url_for(
 | 
			
		||||
        'corpora.follow_corpus',
 | 
			
		||||
        corpus_id=corpus_id,
 | 
			
		||||
        token=token,
 | 
			
		||||
        _external=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    return jsonify(corpus_share_link)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>/analysis')
 | 
			
		||||
@corpus_follower_permission_required('VIEW')
 | 
			
		||||
def analysis(corpus_id):
 | 
			
		||||
def analysis(corpus_id: int):
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'corpora/analysis.html.j2',
 | 
			
		||||
        corpus=corpus,
 | 
			
		||||
        title=f'Analyse Corpus {corpus.title}'
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>/analysis/stopwords')
 | 
			
		||||
def get_stopwords():
 | 
			
		||||
        languages = [
 | 
			
		||||
            'german',
 | 
			
		||||
            'english',
 | 
			
		||||
            'catalan',
 | 
			
		||||
            'greek',
 | 
			
		||||
            'spanish',
 | 
			
		||||
            'french',
 | 
			
		||||
            'italian',
 | 
			
		||||
            'russian',
 | 
			
		||||
            'chinese'
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        nltk.download('stopwords', quiet=True)
 | 
			
		||||
        stopwords = {
 | 
			
		||||
            language: nltk.corpus.stopwords.words(language)
 | 
			
		||||
            for language in languages
 | 
			
		||||
        }
 | 
			
		||||
        stopwords['punctuation'] = list(punctuation)
 | 
			
		||||
        stopwords['punctuation'] += ['—', '|', '–', '“', '„', '--']
 | 
			
		||||
        stopwords['user_stopwords'] = []
 | 
			
		||||
 | 
			
		||||
        return jsonify(stopwords)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>/follow/<token>')
 | 
			
		||||
def follow_corpus(corpus_id, token):
 | 
			
		||||
def follow_corpus(corpus_id: int, token: str):
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
    if current_user.follow_corpus_by_token(token):
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
        flash(f'You are following "{corpus.title}" now', category='corpus')
 | 
			
		||||
        return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
 | 
			
		||||
    abort(403)
 | 
			
		||||
 | 
			
		||||
    if not current_user.follow_corpus_by_token(token):
 | 
			
		||||
        abort(403)
 | 
			
		||||
 | 
			
		||||
@bp.route('/import', methods=['GET', 'POST'])
 | 
			
		||||
def import_corpus():
 | 
			
		||||
    abort(503)
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:corpus_id>/export')
 | 
			
		||||
@corpus_follower_permission_required('VIEW')
 | 
			
		||||
def export_corpus(corpus_id):
 | 
			
		||||
    abort(503)
 | 
			
		||||
    flash(f'You are following "{corpus.title}" now', category='corpus')
 | 
			
		||||
    return redirect(corpus.url)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user