2023-03-10 10:33:11 +01:00
|
|
|
from datetime import datetime
|
2023-04-11 13:30:38 +02:00
|
|
|
from flask import abort, current_app, request, url_for
|
2023-04-11 11:46:33 +02:00
|
|
|
from flask_login import current_user
|
2023-03-10 10:33:11 +01:00
|
|
|
from threading import Thread
|
2023-04-12 12:45:41 +02:00
|
|
|
from app import db
|
2023-03-10 10:33:11 +01:00
|
|
|
from app.decorators import content_negotiation
|
|
|
|
from app.models import Corpus, CorpusFollowerRole
|
|
|
|
from . import bp
|
2023-04-12 12:45:41 +02:00
|
|
|
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
|
2023-07-03 11:06:43 +02:00
|
|
|
import nltk
|
|
|
|
from string import punctuation
|
2023-03-10 10:33:11 +01:00
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
|
|
|
|
@corpus_owner_or_admin_required
|
|
|
|
@content_negotiation(produces='application/json')
|
|
|
|
def delete_corpus(corpus_id):
|
|
|
|
def _delete_corpus(app, corpus_id):
|
|
|
|
with app.app_context():
|
|
|
|
corpus = Corpus.query.get(corpus_id)
|
|
|
|
corpus.delete()
|
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
thread = Thread(
|
|
|
|
target=_delete_corpus,
|
|
|
|
args=(current_app._get_current_object(), corpus.id)
|
|
|
|
)
|
|
|
|
thread.start()
|
|
|
|
response_data = {
|
|
|
|
'message': f'Corpus "{corpus.title}" marked for deletion',
|
|
|
|
'category': 'corpus'
|
|
|
|
}
|
2023-04-11 13:30:38 +02:00
|
|
|
return response_data, 200
|
2023-03-10 10:33:11 +01:00
|
|
|
|
|
|
|
|
|
|
|
@bp.route('/<hashid:corpus_id>/build', methods=['POST'])
|
2023-04-27 15:11:18 +02:00
|
|
|
@corpus_follower_permission_required('MANAGE_FILES')
|
2023-03-10 10:33:11 +01:00
|
|
|
@content_negotiation(produces='application/json')
|
|
|
|
def build_corpus(corpus_id):
|
|
|
|
def _build_corpus(app, corpus_id):
|
|
|
|
with app.app_context():
|
|
|
|
corpus = Corpus.query.get(corpus_id)
|
|
|
|
corpus.build()
|
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
if len(corpus.files.all()) == 0:
|
|
|
|
abort(409)
|
|
|
|
thread = Thread(
|
|
|
|
target=_build_corpus,
|
|
|
|
args=(current_app._get_current_object(), corpus_id)
|
|
|
|
)
|
|
|
|
thread.start()
|
|
|
|
response_data = {
|
|
|
|
'message': f'Corpus "{corpus.title}" marked for building',
|
|
|
|
'category': 'corpus'
|
|
|
|
}
|
2023-04-11 13:30:38 +02:00
|
|
|
return response_data, 202
|
2023-03-10 10:33:11 +01:00
|
|
|
|
2023-07-03 11:06:43 +02:00
|
|
|
@bp.route('/stopwords')
|
|
|
|
@content_negotiation(produces='application/json')
|
|
|
|
def get_stopwords():
|
|
|
|
# data = request.json
|
|
|
|
# if not isinstance(data, dict):
|
|
|
|
# abort(400)
|
|
|
|
# language = data.get('language')
|
|
|
|
# if not isinstance(language, str):
|
|
|
|
# abort(400)
|
|
|
|
nltk.download('stopwords')
|
|
|
|
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
|
|
|
|
stopwords = {}
|
|
|
|
for language in languages:
|
|
|
|
stopwords[language] = nltk.corpus.stopwords.words(language)
|
|
|
|
stopwords['punctuation'] = list(punctuation) + ['—', '|']
|
|
|
|
stopwords['user_stopwords'] = []
|
|
|
|
print(stopwords)
|
|
|
|
response_data = {
|
|
|
|
'stopwords': stopwords
|
|
|
|
}
|
|
|
|
return response_data, 202
|
2023-03-10 10:33:11 +01:00
|
|
|
|
2023-06-07 15:13:47 +02:00
|
|
|
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
|
|
|
|
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
|
|
|
# @content_negotiation(consumes='application/json', produces='application/json')
|
|
|
|
# def generate_corpus_share_link(corpus_id):
|
|
|
|
# data = request.json
|
|
|
|
# if not isinstance(data, dict):
|
|
|
|
# abort(400)
|
|
|
|
# expiration = data.get('expiration')
|
|
|
|
# if not isinstance(expiration, str):
|
|
|
|
# abort(400)
|
|
|
|
# role_name = data.get('role')
|
|
|
|
# if not isinstance(role_name, str):
|
|
|
|
# abort(400)
|
|
|
|
# expiration_date = datetime.strptime(expiration, '%b %d, %Y')
|
|
|
|
# cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
|
|
|
# if cfr is None:
|
|
|
|
# abort(400)
|
|
|
|
# corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
# token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date)
|
|
|
|
# corpus_share_link = url_for(
|
|
|
|
# 'corpora.follow_corpus',
|
|
|
|
# corpus_id=corpus_id,
|
|
|
|
# token=token,
|
|
|
|
# _external=True
|
|
|
|
# )
|
|
|
|
# response_data = {
|
|
|
|
# 'message': 'Corpus share link generated',
|
|
|
|
# 'category': 'corpus',
|
|
|
|
# 'corpusShareLink': corpus_share_link
|
|
|
|
# }
|
|
|
|
# return response_data, 200
|
2023-03-10 10:33:11 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
2023-06-07 15:13:47 +02:00
|
|
|
# @bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
|
|
|
|
# @corpus_owner_or_admin_required
|
|
|
|
# @content_negotiation(consumes='application/json', produces='application/json')
|
|
|
|
# def update_corpus_is_public(corpus_id):
|
|
|
|
# is_public = request.json
|
|
|
|
# if not isinstance(is_public, bool):
|
|
|
|
# abort(400)
|
|
|
|
# corpus = Corpus.query.get_or_404(corpus_id)
|
|
|
|
# corpus.is_public = is_public
|
|
|
|
# db.session.commit()
|
|
|
|
# response_data = {
|
|
|
|
# 'message': (
|
|
|
|
# f'Corpus "{corpus.title}" is now'
|
|
|
|
# f' {"public" if is_public else "private"}'
|
|
|
|
# ),
|
|
|
|
# 'category': 'corpus'
|
|
|
|
# }
|
|
|
|
# return response_data, 200
|