Implement corpora endpoint/socket.io namespace

This commit is contained in:
Patrick Jentsch
2024-12-09 16:12:49 +01:00
parent 93344c9573
commit 328f85ba52
16 changed files with 339 additions and 182 deletions

215
app/namespaces/corpora.py Normal file
View File

@ -0,0 +1,215 @@
from datetime import datetime
from flask import current_app, Flask, url_for
from flask_login import current_user
from flask_socketio import Namespace
from string import punctuation
import nltk
from app import db, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusFollowerAssociation, CorpusFollowerRole
def _delete_corpus(app: Flask, corpus_id: int):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.delete()
db.session.commit()
def _build_corpus(app: Flask, corpus_id: int):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.build()
db.session.commit()
class CorporaNamespace(Namespace):
@socketio_login_required
def on_delete(self, corpus_hashid: str) -> dict:
if not isinstance(corpus_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
if not (
corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
socketio.start_background_task(
_delete_corpus,
current_app._get_current_object(),
corpus_id
)
return {
'body': f'Corpus "{corpus.title}" marked for deletion',
'status': 202,
'statusText': 'Accepted'
}
@socketio_login_required
def on_build(self, corpus_hashid: str) -> dict:
if not isinstance(corpus_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if not (
cfa is not None and cfa.role.has_permission('MANAGE_FILES')
or corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
if len(corpus.files.all()) == 0:
return {'status': 409, 'statusText': 'Conflict'}
socketio.start_background_task(
_build_corpus,
current_app._get_current_object(),
corpus_id
)
return {
'body': f'Corpus "{corpus.title}" marked for building',
'status': 202,
'statusText': 'Accepted'
}
# TODO: Think about where to place this, as this does not belong here...
@socketio_login_required
def on_get_stopwords(self):
languages = [
'german',
'english',
'catalan',
'greek',
'spanish',
'french',
'italian',
'russian',
'chinese'
]
nltk.download('stopwords', quiet=True)
stopwords = {
language: nltk.corpus.stopwords.words(language)
for language in languages
}
stopwords['punctuation'] = list(punctuation)
stopwords['punctuation'] += ['', '|', '', '', '', '--']
stopwords['user_stopwords'] = []
return {
'body': stopwords,
'status': 200,
'statusText': 'OK'
}
@socketio_login_required
def on_create_share_link(self, corpus_hashid: str, expiration_date: str, role_name: str) -> dict:
if not isinstance(corpus_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
if not isinstance(expiration_date, str):
return {'status': 400, 'statusText': 'Bad Request'}
if not isinstance(role_name, str):
return {'status': 400, 'statusText': 'Bad Request'}
print(corpus_hashid, expiration_date, role_name)
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if not (
cfa is not None and cfa.role.has_permission('MANAGE_FOLLOWERS')
or corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
_expiration_date = datetime.strptime(expiration_date, '%b %d, %Y')
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
if cfr is None:
return {'status': 400, 'statusText': 'Bad Request'}
token = current_user.generate_follow_corpus_token(
corpus.hashid,
role_name,
_expiration_date
)
corpus_share_link = url_for(
'corpora.follow_corpus',
corpus_id=corpus_id,
token=token,
_external=True
)
return {
'body': corpus_share_link,
'status': 200,
'statusText': 'OK'
}
@socketio_login_required
def on_set_is_public(corpus_hashid: str, new_value: bool) -> dict:
if not isinstance(corpus_id, str):
return {'status': 400, 'statusText': 'Bad Request'}
if not isinstance(new_value, bool):
return {'status': 400, 'statusText': 'Bad Request'}
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
if not (
corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
corpus.is_public = new_value
db.session.commit()
return {
'body': f'Corpus "{corpus.title}" is now {"public" if new_value else "private"}',
'status': 200,
'statusText': 'OK'
}

View File

@ -169,6 +169,7 @@ class CQiOverSocketIONamespace(Namespace):
for param in signature(fn).parameters.values():
# Check if the parameter is optional or required
# The following is true for required parameters
if param.default is param.empty:
if param.name not in fn_args:
return {'code': 400, 'msg': 'Bad Request'}

View File

@ -23,6 +23,9 @@ def _restart_job(app: Flask, job_id: int):
class JobsNamespace(Namespace):
@socketio_login_required
def on_delete(self, job_hashid: str) -> dict:
if not isinstance(job_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
job_id = hashids.decode(job_hashid)
if not isinstance(job_id, int):
@ -31,7 +34,7 @@ class JobsNamespace(Namespace):
job = Job.query.get(job_id)
if job is None:
return {'status': 404, 'statusText': 'Not found'}
return {'status': 404, 'statusText': 'Not Found'}
if not (
job.user == current_user
@ -53,6 +56,9 @@ class JobsNamespace(Namespace):
@socketio_admin_required
def on_log(self, job_hashid: str) -> dict:
if not isinstance(job_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
job_id = hashids.decode(job_hashid)
if not isinstance(job_id, int):
@ -61,7 +67,7 @@ class JobsNamespace(Namespace):
job = Job.query.get(job_id)
if job is None:
return {'status': 404, 'statusText': 'Not found'}
return {'status': 404, 'statusText': 'Not Found'}
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return {'status': 409, 'statusText': 'Conflict'}
@ -72,11 +78,14 @@ class JobsNamespace(Namespace):
return {
'body': log,
'status': 200,
'statusText': 'Forbidden'
'statusText': 'OK'
}
socketio_login_required
def on_restart(self, job_hashid: str) -> dict:
if not isinstance(job_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
job_id = hashids.decode(job_hashid)
if not isinstance(job_id, int):
@ -85,7 +94,7 @@ class JobsNamespace(Namespace):
job = Job.query.get(job_id)
if job is None:
return {'status': 404, 'statusText': 'Not found'}
return {'status': 404, 'statusText': 'Not Found'}
if not (
job.user == current_user

View File

@ -16,6 +16,9 @@ def _delete_user(app: Flask, user_id: int):
class UsersNamespace(Namespace):
@socketio_login_required
def on_get(self, user_hashid: str) -> dict:
if not isinstance(user_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
user_id = hashids.decode(user_hashid)
if not isinstance(user_id, int):
@ -24,7 +27,7 @@ class UsersNamespace(Namespace):
user = User.query.get(user_id)
if user is None:
return {'status': 404, 'statusText': 'Not found'}
return {'status': 404, 'statusText': 'Not Found'}
if not (
user == current_user
@ -43,6 +46,9 @@ class UsersNamespace(Namespace):
@socketio_login_required
def on_subscribe(self, user_hashid: str) -> dict:
if not isinstance(user_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
user_id = hashids.decode(user_hashid)
if not isinstance(user_id, int):
@ -51,7 +57,7 @@ class UsersNamespace(Namespace):
user = User.query.get(user_id)
if user is None:
return {'status': 404, 'statusText': 'Not found'}
return {'status': 404, 'statusText': 'Not Found'}
if not (
user == current_user
@ -65,6 +71,9 @@ class UsersNamespace(Namespace):
@socketio_login_required
def on_unsubscribe(self, user_hashid: str) -> dict:
if not isinstance(user_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
user_id = hashids.decode(user_hashid)
if not isinstance(user_id, int):
@ -73,7 +82,7 @@ class UsersNamespace(Namespace):
user = User.query.get(user_id)
if user is None:
return {'status': 404, 'statusText': 'Not found'}
return {'status': 404, 'statusText': 'Not Found'}
if not (
user == current_user
@ -87,6 +96,9 @@ class UsersNamespace(Namespace):
@socketio_login_required
def on_delete(self, user_hashid: str) -> dict:
if not isinstance(user_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
user_id = hashids.decode(user_hashid)
if not isinstance(user_id, int):
@ -95,7 +107,7 @@ class UsersNamespace(Namespace):
user = User.query.get(user_id)
if user is None:
return {'status': 404, 'statusText': 'Not found'}
return {'status': 404, 'statusText': 'Not Found'}
if not (
user == current_user