Restructure corpora blueprint

This commit is contained in:
Patrick Jentsch 2024-12-16 11:39:54 +01:00
parent 6c1f48eb2f
commit c405061574
8 changed files with 100 additions and 319 deletions

View File

@ -132,9 +132,6 @@ def create_app(config: Config = Config) -> Flask:
# region SocketIO Namespaces
from .namespaces.cqi_over_sio import CQiOverSocketIONamespace
socketio.on_namespace(CQiOverSocketIONamespace('/cqi_over_sio'))
from .namespaces.corpora import CorporaNamespace
socketio.on_namespace(CorporaNamespace('/corpora'))
# endregion SocketIO Namespaces
# region Database event Listeners

View File

@ -16,4 +16,4 @@ def before_request():
pass
from . import cli, files, followers, routes, json_routes
from . import cli, files, followers, routes

View File

@ -1,64 +0,0 @@
from datetime import datetime
from flask import abort, request, url_for
from flask_login import current_user
from app import db
from app.decorators import content_negotiation
from app.models import Corpus, CorpusFollowerRole
from . import bp
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
@bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
@content_negotiation(consumes='application/json', produces='application/json')
def generate_corpus_share_link(corpus_id):
data = request.json
if not isinstance(data, dict):
abort(400)
expiration = data.get('expiration')
if not isinstance(expiration, str):
abort(400)
role_name = data.get('role')
if not isinstance(role_name, str):
abort(400)
expiration_date = datetime.strptime(expiration, '%b %d, %Y')
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
if cfr is None:
abort(400)
corpus = Corpus.query.get_or_404(corpus_id)
token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date)
corpus_share_link = url_for(
'corpora.follow_corpus',
corpus_id=corpus_id,
token=token,
_external=True
)
response_data = {
'message': 'Corpus share link generated',
'category': 'corpus',
'corpusShareLink': corpus_share_link
}
return response_data, 200
@bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
@corpus_owner_or_admin_required
@content_negotiation(consumes='application/json', produces='application/json')
def update_corpus_is_public(corpus_id):
is_public = request.json
if not isinstance(is_public, bool):
abort(400)
corpus = Corpus.query.get_or_404(corpus_id)
corpus.is_public = is_public
db.session.commit()
response_data = {
'message': (
f'Corpus "{corpus.title}" is now'
f' {"public" if is_public else "private"}'
),
'category': 'corpus'
}
return response_data, 200

View File

@ -6,6 +6,7 @@ from flask import (
Flask,
jsonify,
redirect,
request,
render_template,
url_for
)
@ -150,7 +151,7 @@ def delete_corpus(corpus_id: int):
return jsonify(f'Corpus "{corpus.title}" marked for deletion.'), 202
@bp.route('/<hashid:corpus_id>/build', methods=['PATCH'])
@bp.route('/<hashid:corpus_id>/build', methods=['POST'])
def build_corpus(corpus_id: int):
corpus = Corpus.query.get_or_404(corpus_id)
@ -178,11 +179,15 @@ def build_corpus(corpus_id: int):
return jsonify(f'Corpus "{corpus.title}" marked for building.'), 202
@bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
def create_share_link(corpus_id: int, expiration_date: str, role_name: str):
@bp.route('/<hashid:corpus_id>/create-share-link', methods=['POST'])
def create_share_link(corpus_id: int):
data = request.json
expiration_date = data['expiration_date']
if not isinstance(expiration_date, str):
abort(400)
role_name = data['role_name']
if not isinstance(role_name, str):
abort(400)
@ -233,8 +238,9 @@ def analysis(corpus_id: int):
title=f'Analyse Corpus {corpus.title}'
)
@bp.route('/<hashid:corpus_id>/analysis/stopwords')
def get_stopwords():
def get_stopwords(corpus_id: int):
languages = [
'german',
'english',
@ -270,3 +276,24 @@ def follow_corpus(corpus_id: int, token: str):
flash(f'You are following "{corpus.title}" now', category='corpus')
return redirect(corpus.url)
@bp.route('/<hashid:corpus_id>/is-public', methods=['PUT'])
def update_is_public(corpus_id):
new_value = request.json
if not isinstance(new_value, bool):
abort(400)
corpus = Corpus.query.get_or_404(corpus_id)
if not (
corpus.user == current_user
or current_user.is_administrator
):
abort(403)
corpus.is_public = new_value
db.session.commit()
return jsonify(f'Corpus "{corpus.title}" is now {"public" if new_value else "private"}'), 200

View File

@ -1,215 +0,0 @@
from datetime import datetime
from flask import current_app, Flask, url_for
from flask_login import current_user
from flask_socketio import Namespace
from string import punctuation
import nltk
from app import db, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusFollowerAssociation, CorpusFollowerRole
def _delete_corpus(app: Flask, corpus_id: int):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.delete()
db.session.commit()
def _build_corpus(app: Flask, corpus_id: int):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.build()
db.session.commit()
class CorporaNamespace(Namespace):
@socketio_login_required
def on_delete(self, corpus_hashid: str) -> dict:
if not isinstance(corpus_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
if not (
corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
socketio.start_background_task(
_delete_corpus,
current_app._get_current_object(),
corpus_id
)
return {
'body': f'Corpus "{corpus.title}" marked for deletion',
'status': 202,
'statusText': 'Accepted'
}
@socketio_login_required
def on_build(self, corpus_hashid: str) -> dict:
if not isinstance(corpus_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if not (
cfa is not None and cfa.role.has_permission('MANAGE_FILES')
or corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
if len(corpus.files.all()) == 0:
return {'status': 409, 'statusText': 'Conflict'}
socketio.start_background_task(
_build_corpus,
current_app._get_current_object(),
corpus_id
)
return {
'body': f'Corpus "{corpus.title}" marked for building',
'status': 202,
'statusText': 'Accepted'
}
# TODO: Think about where to place this, as this does not belong here...
@socketio_login_required
def on_get_stopwords(self):
languages = [
'german',
'english',
'catalan',
'greek',
'spanish',
'french',
'italian',
'russian',
'chinese'
]
nltk.download('stopwords', quiet=True)
stopwords = {
language: nltk.corpus.stopwords.words(language)
for language in languages
}
stopwords['punctuation'] = list(punctuation)
stopwords['punctuation'] += ['', '|', '', '', '', '--']
stopwords['user_stopwords'] = []
return {
'body': stopwords,
'status': 200,
'statusText': 'OK'
}
@socketio_login_required
def on_create_share_link(self, corpus_hashid: str, expiration_date: str, role_name: str) -> dict:
if not isinstance(corpus_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
if not isinstance(expiration_date, str):
return {'status': 400, 'statusText': 'Bad Request'}
if not isinstance(role_name, str):
return {'status': 400, 'statusText': 'Bad Request'}
print(corpus_hashid, expiration_date, role_name)
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if not (
cfa is not None and cfa.role.has_permission('MANAGE_FOLLOWERS')
or corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
_expiration_date = datetime.strptime(expiration_date, '%b %d, %Y')
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
if cfr is None:
return {'status': 400, 'statusText': 'Bad Request'}
token = current_user.generate_follow_corpus_token(
corpus.hashid,
role_name,
_expiration_date
)
corpus_share_link = url_for(
'corpora.follow_corpus',
corpus_id=corpus_id,
token=token,
_external=True
)
return {
'body': corpus_share_link,
'status': 200,
'statusText': 'OK'
}
@socketio_login_required
def on_set_is_public(corpus_hashid: str, new_value: bool) -> dict:
if not isinstance(corpus_id, str):
return {'status': 400, 'statusText': 'Bad Request'}
if not isinstance(new_value, bool):
return {'status': 400, 'statusText': 'Bad Request'}
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'status': 404, 'statusText': 'Not Found'}
if not (
corpus.user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
corpus.is_public = new_value
db.session.commit()
return {
'body': f'Corpus "{corpus.title}" is now {"public" if new_value else "private"}',
'status': 200,
'statusText': 'OK'
}

View File

@ -5,53 +5,89 @@ nopaque.app.endpoints.Corpora = class Corpora {
this.socket = io('/corpora', {transports: ['websocket'], upgrade: false});
}
async delete(id) {
const response = await this.socket.emitWithAck('delete', id);
async delete(corpusId) {
const options = {
headers: {
Accept: 'application/json'
},
method: 'DELETE'
};
if (response.status != 202) {
throw new Error(`[${response.status}] ${response.statusText}`);
}
const response = await fetch(`/corpora/${corpusId}`, options);
const data = await response.json();
return response.body;
if (!response.ok) {throw new Error(`${data.name}: ${data.description}`);}
return data;
}
async build(id) {
const response = await this.socket.emitWithAck('build', id);
async build(corpusId) {
const options = {
headers: {
Accept: 'application/json'
},
method: 'POST'
};
if (response.status != 202) {
throw new Error(`[${response.status}] ${response.statusText}`);
}
const response = await fetch(`/corpora/${corpusId}/build`, options);
const data = await response.json();
return response.body;
if (!response.ok) {throw new Error(`${data.name}: ${data.description}`);}
return data;
}
async getStopwords() {
const response = await this.socket.emitWithAck('get_stopwords');
async getStopwords(corpusId) {
const options = {
headers: {
Accept: 'application/json'
}
};
if (response.status != 200) {
throw new Error(`[${response.status}] ${response.statusText}`);
}
const response = await fetch(`/corpora/${corpusId}/analysis/stopwords`, options);
const data = await response.json();
return response.body;
if (!response.ok) {throw new Error(`${data.name}: ${data.description}`);}
return data;
}
async createShareLink(id, expirationDate, roleName) {
const response = await this.socket.emitWithAck('create_share_link', id, expirationDate, roleName);
async createShareLink(corpusId, expirationDate, roleName) {
const options = {
body: JSON.stringify({
'expiration_date': expirationDate,
'role_name': roleName
}),
headers: {
Accept: 'application/json',
'Content-Type': 'application/json'
},
method: 'POST'
};
if (response.status != 200) {
throw new Error(`[${response.status}] ${response.statusText}`);
}
const response = await fetch(`/corpora/${corpusId}/create-share-link`, options);
const data = await response.json();
return response.body;
if (!response.ok) {throw new Error(`${data.name}: ${data.description}`);}
return data;
}
async setIsPublic(id, newValue) {
const response = await this.socket.emitWithAck('set_is_public', id, newValue);
async updateIsPublic(corpusId, newValue) {
const options = {
body: JSON.stringify(newValue),
headers: {
Accept: 'application/json',
'Content-Type': 'application/json'
},
method: 'PUT',
};
if (response.status != 200) {
throw new Error(`[${response.status}] ${response.statusText}`);
}
const response = await fetch(`/corpora/${corpusId}/is-public`, options);
const data = await response.json();
return response.body;
if (!response.ok) {throw new Error(`${data.name}: ${data.description}`);}
return data;
}
}

View File

@ -73,7 +73,7 @@ nopaque.corpus_analysis.StaticVisualizationExtension = class StaticVisualization
}
async getStopwords() {
const stopwords = await app.corpora.getStopwords();
const stopwords = await app.corpora.getStopwords(this.app.corpusId);
this.data.originalStopwords = structuredClone(stopwords);
this.data.stopwords = structuredClone(stopwords);
return stopwords;

View File

@ -246,7 +246,7 @@
let publishingModalIsPublicSwitchElement = document.querySelector('#publishing-modal-is-public-switch');
publishingModalIsPublicSwitchElement.addEventListener('change', (event) => {
let newIsPublic = publishingModalIsPublicSwitchElement.checked;
app.corpora.setIsPublic.update({{ corpus.hashid|tojson }}, newIsPublic)
app.corpora.updateIsPublic({{ corpus.hashid|tojson }}, newIsPublic)
.catch((response) => {
publishingModalIsPublicSwitchElement.checked = !newIsPublic;
});