nopaque/app/namespaces/corpora.py
2024-12-09 16:12:49 +01:00

216 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from datetime import datetime
from flask import current_app, Flask, url_for
from flask_login import current_user
from flask_socketio import Namespace
from string import punctuation
import nltk
from app import db, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusFollowerAssociation, CorpusFollowerRole
def _delete_corpus(app: Flask, corpus_id: int):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.delete()
db.session.commit()
def _build_corpus(app: Flask, corpus_id: int):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.build()
db.session.commit()
class CorporaNamespace(Namespace):
@socketio_login_required
def on_delete(self, corpus_hashid: str) -> dict:
if not isinstance(corpus_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
if not (
corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
socketio.start_background_task(
_delete_corpus,
current_app._get_current_object(),
corpus_id
)
return {
'body': f'Corpus "{corpus.title}" marked for deletion',
'status': 202,
'statusText': 'Accepted'
}
@socketio_login_required
def on_build(self, corpus_hashid: str) -> dict:
if not isinstance(corpus_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if not (
cfa is not None and cfa.role.has_permission('MANAGE_FILES')
or corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
if len(corpus.files.all()) == 0:
return {'status': 409, 'statusText': 'Conflict'}
socketio.start_background_task(
_build_corpus,
current_app._get_current_object(),
corpus_id
)
return {
'body': f'Corpus "{corpus.title}" marked for building',
'status': 202,
'statusText': 'Accepted'
}
# TODO: Think about where to place this, as this does not belong here...
@socketio_login_required
def on_get_stopwords(self):
languages = [
'german',
'english',
'catalan',
'greek',
'spanish',
'french',
'italian',
'russian',
'chinese'
]
nltk.download('stopwords', quiet=True)
stopwords = {
language: nltk.corpus.stopwords.words(language)
for language in languages
}
stopwords['punctuation'] = list(punctuation)
stopwords['punctuation'] += ['', '|', '', '', '', '--']
stopwords['user_stopwords'] = []
return {
'body': stopwords,
'status': 200,
'statusText': 'OK'
}
@socketio_login_required
def on_create_share_link(self, corpus_hashid: str, expiration_date: str, role_name: str) -> dict:
if not isinstance(corpus_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
if not isinstance(expiration_date, str):
return {'status': 400, 'statusText': 'Bad Request'}
if not isinstance(role_name, str):
return {'status': 400, 'statusText': 'Bad Request'}
print(corpus_hashid, expiration_date, role_name)
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if not (
cfa is not None and cfa.role.has_permission('MANAGE_FOLLOWERS')
or corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
_expiration_date = datetime.strptime(expiration_date, '%b %d, %Y')
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
if cfr is None:
return {'status': 400, 'statusText': 'Bad Request'}
token = current_user.generate_follow_corpus_token(
corpus.hashid,
role_name,
_expiration_date
)
corpus_share_link = url_for(
'corpora.follow_corpus',
corpus_id=corpus_id,
token=token,
_external=True
)
return {
'body': corpus_share_link,
'status': 200,
'statusText': 'OK'
}
@socketio_login_required
def on_set_is_public(corpus_hashid: str, new_value: bool) -> dict:
if not isinstance(corpus_id, str):
return {'status': 400, 'statusText': 'Bad Request'}
if not isinstance(new_value, bool):
return {'status': 400, 'statusText': 'Bad Request'}
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
if not (
corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
corpus.is_public = new_value
db.session.commit()
return {
'body': f'Corpus "{corpus.title}" is now {"public" if new_value else "private"}',
'status': 200,
'statusText': 'OK'
}