2023-06-29 10:09:28 +00:00
|
|
|
from cqi import CQiClient
|
|
|
|
from cqi.errors import CQiException
|
2023-07-13 10:42:47 +00:00
|
|
|
from cqi.status import CQiStatus
|
2024-11-06 11:27:49 +00:00
|
|
|
from flask import current_app
|
2023-06-29 10:09:28 +00:00
|
|
|
from flask_login import current_user
|
2023-07-13 10:42:47 +00:00
|
|
|
from flask_socketio import Namespace
|
|
|
|
from inspect import signature
|
2023-06-29 10:09:28 +00:00
|
|
|
from threading import Lock
|
2023-09-25 11:28:48 +00:00
|
|
|
from app import db, docker_client, hashids, socketio
|
2024-05-04 12:55:05 +00:00
|
|
|
from app.decorators import socketio_login_required
|
2023-06-29 10:09:28 +00:00
|
|
|
from app.models import Corpus, CorpusStatus
|
2023-07-13 10:42:47 +00:00
|
|
|
from . import extensions
|
2024-11-06 11:27:49 +00:00
|
|
|
from .utils import CQiOverSocketIOSessionManager
|
2023-06-29 10:09:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
'''
|
|
|
|
This package tunnels the Corpus Query interface (CQi) protocol through
|
2023-07-13 10:42:47 +00:00
|
|
|
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
|
2023-06-29 10:09:28 +00:00
|
|
|
|
|
|
|
Basic concept:
|
2024-09-25 08:45:53 +00:00
|
|
|
1. A client connects to the namespace.
|
2023-07-13 10:42:47 +00:00
|
|
|
2. The client emits the "init" event and provides a corpus id for the corpus
|
|
|
|
that should be analysed in this session.
|
2023-06-29 10:09:28 +00:00
|
|
|
1.1 The analysis session counter of the corpus is incremented.
|
|
|
|
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
|
|
|
|
1.3 Wait until the CQP server is running.
|
|
|
|
1.4 Connect the CQiClient to the server.
|
2023-07-13 10:42:47 +00:00
|
|
|
1.5 Save the CQiClient, the Lock and the corpus id in the session for
|
|
|
|
subsequential use.
|
2024-09-25 08:45:53 +00:00
|
|
|
3. The client emits "exec" events, within which it provides the name of a CQi
|
|
|
|
API function and the corresponding arguments.
|
|
|
|
3.1 The "exec" event handler will execute the function, make sure that
|
|
|
|
the result is serializable and returns the result back to the client.
|
|
|
|
4. The client disconnects from the namespace
|
|
|
|
4.1 The analysis session counter of the corpus is decremented.
|
|
|
|
4.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
2023-06-29 10:09:28 +00:00
|
|
|
'''
|
|
|
|
|
2024-09-25 15:46:53 +00:00
|
|
|
|
2024-11-06 11:27:49 +00:00
|
|
|
CQI_API_FUNCTION_NAMES = [
|
2023-07-13 10:42:47 +00:00
|
|
|
'ask_feature_cl_2_3',
|
|
|
|
'ask_feature_cqi_1_0',
|
|
|
|
'ask_feature_cqp_2_3',
|
|
|
|
'cl_alg2cpos',
|
|
|
|
'cl_attribute_size',
|
|
|
|
'cl_cpos2alg',
|
|
|
|
'cl_cpos2id',
|
|
|
|
'cl_cpos2lbound',
|
|
|
|
'cl_cpos2rbound',
|
|
|
|
'cl_cpos2str',
|
|
|
|
'cl_cpos2struc',
|
|
|
|
'cl_drop_attribute',
|
|
|
|
'cl_id2cpos',
|
|
|
|
'cl_id2freq',
|
|
|
|
'cl_id2str',
|
|
|
|
'cl_idlist2cpos',
|
|
|
|
'cl_lexicon_size',
|
|
|
|
'cl_regex2id',
|
|
|
|
'cl_str2id',
|
|
|
|
'cl_struc2cpos',
|
|
|
|
'cl_struc2str',
|
|
|
|
'corpus_alignment_attributes',
|
|
|
|
'corpus_charset',
|
|
|
|
'corpus_drop_corpus',
|
|
|
|
'corpus_full_name',
|
|
|
|
'corpus_info',
|
|
|
|
'corpus_list_corpora',
|
|
|
|
'corpus_positional_attributes',
|
|
|
|
'corpus_properties',
|
|
|
|
'corpus_structural_attribute_has_values',
|
|
|
|
'corpus_structural_attributes',
|
|
|
|
'cqp_drop_subcorpus',
|
|
|
|
'cqp_dump_subcorpus',
|
|
|
|
'cqp_fdist_1',
|
|
|
|
'cqp_fdist_2',
|
|
|
|
'cqp_list_subcorpora',
|
|
|
|
'cqp_query',
|
|
|
|
'cqp_subcorpus_has_field',
|
|
|
|
'cqp_subcorpus_size',
|
|
|
|
'ctrl_bye',
|
|
|
|
'ctrl_connect',
|
|
|
|
'ctrl_last_general_error',
|
|
|
|
'ctrl_ping',
|
|
|
|
'ctrl_user_abort'
|
|
|
|
]
|
2023-06-29 10:09:28 +00:00
|
|
|
|
|
|
|
|
2024-11-06 11:27:49 +00:00
|
|
|
class CQiOverSocketIONamespace(Namespace):
|
2024-05-04 12:55:05 +00:00
|
|
|
@socketio_login_required
|
2023-07-13 10:42:47 +00:00
|
|
|
def on_connect(self):
|
|
|
|
pass
|
2023-06-29 10:09:28 +00:00
|
|
|
|
2024-05-04 12:55:05 +00:00
|
|
|
@socketio_login_required
|
2024-11-06 11:27:49 +00:00
|
|
|
def on_init(self, corpus_hashid: str) -> dict:
|
|
|
|
corpus_id = hashids.decode(corpus_hashid)
|
|
|
|
|
|
|
|
if not isinstance(corpus_id, int):
|
|
|
|
return {'code': 400, 'msg': 'Bad Request'}
|
|
|
|
|
|
|
|
corpus = Corpus.query.get(corpus_id)
|
|
|
|
|
|
|
|
if corpus is None:
|
2023-07-13 10:42:47 +00:00
|
|
|
return {'code': 404, 'msg': 'Not Found'}
|
2024-11-06 11:27:49 +00:00
|
|
|
|
|
|
|
if not (
|
|
|
|
corpus.user == current_user
|
|
|
|
or current_user.is_following_corpus(corpus)
|
|
|
|
or current_user.is_administrator
|
|
|
|
):
|
2023-07-13 10:42:47 +00:00
|
|
|
return {'code': 403, 'msg': 'Forbidden'}
|
2024-11-06 11:27:49 +00:00
|
|
|
|
|
|
|
if corpus.status not in [
|
2023-07-13 10:42:47 +00:00
|
|
|
CorpusStatus.BUILT,
|
|
|
|
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
|
|
|
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
|
|
|
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
|
|
|
]:
|
|
|
|
return {'code': 424, 'msg': 'Failed Dependency'}
|
2024-11-06 11:27:49 +00:00
|
|
|
|
|
|
|
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
2023-07-13 10:42:47 +00:00
|
|
|
db.session.commit()
|
2024-11-06 11:27:49 +00:00
|
|
|
retry_counter = 20
|
|
|
|
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
2023-07-13 10:42:47 +00:00
|
|
|
if retry_counter == 0:
|
2024-11-06 11:27:49 +00:00
|
|
|
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
2023-07-13 10:42:47 +00:00
|
|
|
db.session.commit()
|
|
|
|
return {'code': 408, 'msg': 'Request Timeout'}
|
|
|
|
socketio.sleep(3)
|
|
|
|
retry_counter -= 1
|
2024-11-06 11:27:49 +00:00
|
|
|
db.session.refresh(corpus)
|
|
|
|
|
|
|
|
cqpserver_container_name = f'nopaque-cqpserver-{corpus_id}'
|
|
|
|
cqpserver_container = docker_client.containers.get(cqpserver_container_name)
|
|
|
|
cqpserver_ip_address = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
|
|
|
|
cqi_client = CQiClient(cqpserver_ip_address)
|
|
|
|
cqi_client_lock = Lock()
|
|
|
|
|
|
|
|
CQiOverSocketIOSessionManager.setup()
|
|
|
|
CQiOverSocketIOSessionManager.set_corpus_id(corpus_id)
|
|
|
|
CQiOverSocketIOSessionManager.set_cqi_client(cqi_client)
|
|
|
|
CQiOverSocketIOSessionManager.set_cqi_client_lock(cqi_client_lock)
|
|
|
|
|
2023-07-13 10:42:47 +00:00
|
|
|
return {'code': 200, 'msg': 'OK'}
|
2023-06-29 10:09:28 +00:00
|
|
|
|
2024-05-04 12:55:05 +00:00
|
|
|
@socketio_login_required
|
2024-11-06 11:27:49 +00:00
|
|
|
def on_exec(self, fn_name: str, fn_args: dict = {}) -> dict:
|
2023-07-13 10:42:47 +00:00
|
|
|
try:
|
2024-11-06 11:27:49 +00:00
|
|
|
cqi_client = CQiOverSocketIOSessionManager.get_cqi_client()
|
|
|
|
cqi_client_lock = CQiOverSocketIOSessionManager.get_cqi_client_lock()
|
2023-07-13 10:42:47 +00:00
|
|
|
except KeyError:
|
|
|
|
return {'code': 424, 'msg': 'Failed Dependency'}
|
2024-11-06 11:27:49 +00:00
|
|
|
|
2023-07-13 10:42:47 +00:00
|
|
|
if fn_name in CQI_API_FUNCTION_NAMES:
|
2024-11-06 11:27:49 +00:00
|
|
|
fn = getattr(cqi_client.api, fn_name)
|
2023-07-13 10:42:47 +00:00
|
|
|
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
|
2024-11-06 11:27:49 +00:00
|
|
|
fn = getattr(extensions, fn_name)
|
2023-07-13 10:42:47 +00:00
|
|
|
else:
|
|
|
|
return {'code': 400, 'msg': 'Bad Request'}
|
2024-11-06 11:27:49 +00:00
|
|
|
|
2023-07-13 10:42:47 +00:00
|
|
|
for param in signature(fn).parameters.values():
|
2024-11-06 11:27:49 +00:00
|
|
|
# Check if the parameter is optional or required
|
2023-07-13 10:42:47 +00:00
|
|
|
if param.default is param.empty:
|
|
|
|
if param.name not in fn_args:
|
|
|
|
return {'code': 400, 'msg': 'Bad Request'}
|
|
|
|
else:
|
|
|
|
if param.name not in fn_args:
|
|
|
|
continue
|
|
|
|
if type(fn_args[param.name]) is not param.annotation:
|
|
|
|
return {'code': 400, 'msg': 'Bad Request'}
|
2024-11-06 11:27:49 +00:00
|
|
|
|
2023-07-13 10:42:47 +00:00
|
|
|
cqi_client_lock.acquire()
|
|
|
|
try:
|
|
|
|
fn_return_value = fn(**fn_args)
|
|
|
|
except BrokenPipeError as e:
|
|
|
|
return {'code': 500, 'msg': 'Internal Server Error'}
|
|
|
|
except CQiException as e:
|
|
|
|
return {
|
|
|
|
'code': 502,
|
|
|
|
'msg': 'Bad Gateway',
|
|
|
|
'payload': {
|
|
|
|
'code': e.code,
|
|
|
|
'desc': e.description,
|
|
|
|
'msg': e.__class__.__name__
|
|
|
|
}
|
|
|
|
}
|
|
|
|
finally:
|
|
|
|
cqi_client_lock.release()
|
2024-11-06 11:27:49 +00:00
|
|
|
|
2023-07-13 10:42:47 +00:00
|
|
|
if isinstance(fn_return_value, CQiStatus):
|
|
|
|
payload = {
|
|
|
|
'code': fn_return_value.code,
|
|
|
|
'msg': fn_return_value.__class__.__name__
|
|
|
|
}
|
|
|
|
else:
|
|
|
|
payload = fn_return_value
|
2024-11-06 11:27:49 +00:00
|
|
|
|
2023-07-13 10:42:47 +00:00
|
|
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
2023-06-29 10:09:28 +00:00
|
|
|
|
2023-07-13 10:42:47 +00:00
|
|
|
def on_disconnect(self):
|
|
|
|
try:
|
2024-11-06 11:27:49 +00:00
|
|
|
corpus_id = CQiOverSocketIOSessionManager.get_corpus_id()
|
|
|
|
cqi_client = CQiOverSocketIOSessionManager.get_cqi_client()
|
|
|
|
cqi_client_lock = CQiOverSocketIOSessionManager.get_cqi_client_lock()
|
|
|
|
CQiOverSocketIOSessionManager.teardown()
|
2023-07-13 10:42:47 +00:00
|
|
|
except KeyError:
|
|
|
|
return
|
2024-11-06 11:27:49 +00:00
|
|
|
|
2023-07-13 10:42:47 +00:00
|
|
|
cqi_client_lock.acquire()
|
2024-11-06 11:27:49 +00:00
|
|
|
|
2023-07-13 10:42:47 +00:00
|
|
|
try:
|
|
|
|
cqi_client.api.ctrl_bye()
|
|
|
|
except (BrokenPipeError, CQiException):
|
|
|
|
pass
|
2024-11-06 11:27:49 +00:00
|
|
|
|
2023-07-13 10:42:47 +00:00
|
|
|
cqi_client_lock.release()
|
2024-11-06 11:27:49 +00:00
|
|
|
|
|
|
|
corpus = Corpus.query.get(corpus_id)
|
|
|
|
|
|
|
|
if corpus is None:
|
2023-08-10 13:48:49 +00:00
|
|
|
return
|
2024-11-06 11:27:49 +00:00
|
|
|
|
|
|
|
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
2023-08-10 13:48:49 +00:00
|
|
|
db.session.commit()
|