mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-15 02:20:40 +00:00
Cleanup in cqi over socketio
This commit is contained in:
@ -16,4 +16,4 @@ def before_request():
|
||||
pass
|
||||
|
||||
|
||||
from . import cli, cqi_over_sio, files, followers, routes, json_routes
|
||||
from . import cli, files, followers, routes, json_routes
|
||||
|
@ -1,113 +1,200 @@
|
||||
from cqi import CQiClient
|
||||
from cqi.errors import CQiException
|
||||
from cqi.status import CQiStatus
|
||||
from flask import session
|
||||
from flask_login import current_user
|
||||
from flask_socketio import ConnectionRefusedError
|
||||
from flask_socketio import Namespace
|
||||
from inspect import signature
|
||||
from threading import Lock
|
||||
from typing import Callable, Dict, List
|
||||
import math
|
||||
from app import db, hashids, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus, CorpusStatus
|
||||
import math
|
||||
from . import extensions
|
||||
|
||||
|
||||
'''
|
||||
This package tunnels the Corpus Query interface (CQi) protocol through
|
||||
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
|
||||
|
||||
This module only handles the SIO connect/disconnect, which handles the setup
|
||||
and teardown of necessary ressources for later use. Each CQi function has a
|
||||
corresponding SIO event. The event handlers are spread across the different
|
||||
modules within this package.
|
||||
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
|
||||
|
||||
Basic concept:
|
||||
1. A client connects to the SIO namespace and provides the id of a corpus to be
|
||||
analysed.
|
||||
1. A client connects to the "/cqi_over_sio" namespace.
|
||||
2. The client emits the "init" event and provides a corpus id for the corpus
|
||||
that should be analysed in this session.
|
||||
1.1 The analysis session counter of the corpus is incremented.
|
||||
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
|
||||
1.3 Wait until the CQP server is running.
|
||||
1.4 Connect the CQiClient to the server.
|
||||
1.5 Save the CQiClient and the Lock in the session for subsequential use.
|
||||
2. A client emits an event and may provide a single json object with necessary
|
||||
arguments for the targeted CQi function.
|
||||
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
|
||||
- The event handler function defines all arguments. Hence the client
|
||||
is sent as a single json object, the decorator decomposes it to fit
|
||||
the functions signature. This also includes type checking and proper
|
||||
use of the lock (acquire/release) mechanism.
|
||||
1.5 Save the CQiClient, the Lock and the corpus id in the session for
|
||||
subsequential use.
|
||||
2. The client emits the "exec" event provides the name of a CQi API function
|
||||
arguments (optional).
|
||||
- The event "exec" handler will execute the function, make sure that the
|
||||
result is serializable and returns the result back to the client.
|
||||
4. Wait for more events
|
||||
5. The client disconnects from the SIO namespace
|
||||
5. The client disconnects from the "/cqi_over_sio" namespace
|
||||
1.1 The analysis session counter of the corpus is decremented.
|
||||
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
||||
'''
|
||||
|
||||
|
||||
NAMESPACE = '/cqi_over_sio'
|
||||
CQI_API_FUNCTION_NAMES: List[str] = [
|
||||
'ask_feature_cl_2_3',
|
||||
'ask_feature_cqi_1_0',
|
||||
'ask_feature_cqp_2_3',
|
||||
'cl_alg2cpos',
|
||||
'cl_attribute_size',
|
||||
'cl_cpos2alg',
|
||||
'cl_cpos2id',
|
||||
'cl_cpos2lbound',
|
||||
'cl_cpos2rbound',
|
||||
'cl_cpos2str',
|
||||
'cl_cpos2struc',
|
||||
'cl_drop_attribute',
|
||||
'cl_id2cpos',
|
||||
'cl_id2freq',
|
||||
'cl_id2str',
|
||||
'cl_idlist2cpos',
|
||||
'cl_lexicon_size',
|
||||
'cl_regex2id',
|
||||
'cl_str2id',
|
||||
'cl_struc2cpos',
|
||||
'cl_struc2str',
|
||||
'corpus_alignment_attributes',
|
||||
'corpus_charset',
|
||||
'corpus_drop_corpus',
|
||||
'corpus_full_name',
|
||||
'corpus_info',
|
||||
'corpus_list_corpora',
|
||||
'corpus_positional_attributes',
|
||||
'corpus_properties',
|
||||
'corpus_structural_attribute_has_values',
|
||||
'corpus_structural_attributes',
|
||||
'cqp_drop_subcorpus',
|
||||
'cqp_dump_subcorpus',
|
||||
'cqp_fdist_1',
|
||||
'cqp_fdist_2',
|
||||
'cqp_list_subcorpora',
|
||||
'cqp_query',
|
||||
'cqp_subcorpus_has_field',
|
||||
'cqp_subcorpus_size',
|
||||
'ctrl_bye',
|
||||
'ctrl_connect',
|
||||
'ctrl_last_general_error',
|
||||
'ctrl_ping',
|
||||
'ctrl_user_abort'
|
||||
]
|
||||
|
||||
|
||||
from .cqi import * # noqa
|
||||
|
||||
|
||||
@socketio.on('connect', namespace=NAMESPACE)
|
||||
@socketio_login_required
|
||||
def connect(auth):
|
||||
# the auth variable is used in a hacky way. It contains the corpus id for
|
||||
# which a corpus analysis session should be started.
|
||||
corpus_id = hashids.decode(auth['corpus_id'])
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
# return {'code': 404, 'msg': 'Not Found'}
|
||||
raise ConnectionRefusedError('Not Found')
|
||||
if not (corpus.user == current_user
|
||||
or current_user.is_following_corpus(corpus)
|
||||
or current_user.is_administrator()):
|
||||
# return {'code': 403, 'msg': 'Forbidden'}
|
||||
raise ConnectionRefusedError('Forbidden')
|
||||
if corpus.status not in [
|
||||
CorpusStatus.BUILT,
|
||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||
]:
|
||||
# return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
raise ConnectionRefusedError('Failed Dependency')
|
||||
if corpus.num_analysis_sessions is None:
|
||||
corpus.num_analysis_sessions = 0
|
||||
db.session.commit()
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||
db.session.commit()
|
||||
retry_counter = 20
|
||||
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
||||
if retry_counter == 0:
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
return {'code': 408, 'msg': 'Request Timeout'}
|
||||
socketio.sleep(3)
|
||||
retry_counter -= 1
|
||||
db.session.refresh(corpus)
|
||||
cqi_client = CQiClient(f'cqpserver_{corpus_id}', timeout=math.inf)
|
||||
session['cqi_over_sio'] = {
|
||||
'corpus_id': corpus_id,
|
||||
'cqi_client': cqi_client,
|
||||
'cqi_client_lock': Lock(),
|
||||
}
|
||||
# return {'code': 200, 'msg': 'OK'}
|
||||
|
||||
|
||||
@socketio.on('disconnect', namespace=NAMESPACE)
|
||||
def disconnect():
|
||||
try:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||
except KeyError:
|
||||
return
|
||||
cqi_client_lock.acquire()
|
||||
try:
|
||||
cqi_client.api.ctrl_bye()
|
||||
except (BrokenPipeError, CQiException):
|
||||
class CQiNamespace(Namespace):
|
||||
@socketio_login_required
|
||||
def on_connect(self):
|
||||
pass
|
||||
cqi_client_lock.release()
|
||||
corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
session.pop('cqi_over_sio')
|
||||
# return {'code': 200, 'msg': 'OK'}
|
||||
|
||||
@socketio_login_required
|
||||
def on_init(self, db_corpus_hashid: str):
|
||||
db_corpus_id = hashids.decode(db_corpus_hashid)
|
||||
db_corpus = Corpus.query.get(db_corpus_id)
|
||||
if db_corpus is None:
|
||||
return {'code': 404, 'msg': 'Not Found'}
|
||||
if not (db_corpus.user == current_user
|
||||
or current_user.is_following_corpus(db_corpus)
|
||||
or current_user.is_administrator()):
|
||||
return {'code': 403, 'msg': 'Forbidden'}
|
||||
if db_corpus.status not in [
|
||||
CorpusStatus.BUILT,
|
||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||
]:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
if db_corpus.num_analysis_sessions is None:
|
||||
db_corpus.num_analysis_sessions = 0
|
||||
db.session.commit()
|
||||
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||
db.session.commit()
|
||||
retry_counter = 20
|
||||
while db_corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
||||
if retry_counter == 0:
|
||||
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
return {'code': 408, 'msg': 'Request Timeout'}
|
||||
socketio.sleep(3)
|
||||
retry_counter -= 1
|
||||
db.session.refresh(db_corpus)
|
||||
cqi_client = CQiClient(f'cqpserver_{db_corpus_id}', timeout=math.inf)
|
||||
session['cqi_over_sio'] = {}
|
||||
session['cqi_over_sio']['cqi_client'] = cqi_client
|
||||
session['cqi_over_sio']['cqi_client_lock'] = Lock()
|
||||
session['cqi_over_sio']['db_corpus_id'] = db_corpus_id
|
||||
return {'code': 200, 'msg': 'OK'}
|
||||
|
||||
@socketio_login_required
|
||||
def on_exec(self, fn_name: str, fn_args: Dict = {}):
|
||||
try:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||
except KeyError:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
if fn_name in CQI_API_FUNCTION_NAMES:
|
||||
fn: Callable = getattr(cqi_client.api, fn_name)
|
||||
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
|
||||
fn: Callable = getattr(extensions, fn_name)
|
||||
else:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
for param in signature(fn).parameters.values():
|
||||
if param.default is param.empty:
|
||||
if param.name not in fn_args:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
else:
|
||||
if param.name not in fn_args:
|
||||
continue
|
||||
if type(fn_args[param.name]) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
cqi_client_lock.acquire()
|
||||
try:
|
||||
fn_return_value = fn(**fn_args)
|
||||
except BrokenPipeError as e:
|
||||
return {'code': 500, 'msg': 'Internal Server Error'}
|
||||
except CQiException as e:
|
||||
return {
|
||||
'code': 502,
|
||||
'msg': 'Bad Gateway',
|
||||
'payload': {
|
||||
'code': e.code,
|
||||
'desc': e.description,
|
||||
'msg': e.__class__.__name__
|
||||
}
|
||||
}
|
||||
finally:
|
||||
cqi_client_lock.release()
|
||||
if isinstance(fn_return_value, CQiStatus):
|
||||
payload = {
|
||||
'code': fn_return_value.code,
|
||||
'msg': fn_return_value.__class__.__name__
|
||||
}
|
||||
else:
|
||||
payload = fn_return_value
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
def on_disconnect(self):
|
||||
try:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||
except KeyError:
|
||||
return
|
||||
cqi_client_lock.acquire()
|
||||
try:
|
||||
session.pop('cqi_over_sio')
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
cqi_client.api.ctrl_bye()
|
||||
except (BrokenPipeError, CQiException):
|
||||
pass
|
||||
cqi_client_lock.release()
|
||||
db_corpus = Corpus.query.get(db_corpus_id)
|
||||
if db_corpus is not None:
|
||||
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
|
@ -1,110 +0,0 @@
|
||||
from cqi import CQiClient
|
||||
from cqi.errors import CQiException
|
||||
from cqi.status import CQiStatus
|
||||
from flask import session
|
||||
from inspect import signature
|
||||
from threading import Lock
|
||||
from typing import Callable, Dict, List
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .extensions import CQI_EXTENSION_FUNCTION_NAMES
|
||||
from . import extensions as extensions_module
|
||||
|
||||
|
||||
CQI_FUNCTION_NAMES: List[str] = [
|
||||
'ask_feature_cl_2_3',
|
||||
'ask_feature_cqi_1_0',
|
||||
'ask_feature_cqp_2_3',
|
||||
'cl_alg2cpos',
|
||||
'cl_attribute_size',
|
||||
'cl_cpos2alg',
|
||||
'cl_cpos2id',
|
||||
'cl_cpos2lbound',
|
||||
'cl_cpos2rbound',
|
||||
'cl_cpos2str',
|
||||
'cl_cpos2struc',
|
||||
'cl_drop_attribute',
|
||||
'cl_id2cpos',
|
||||
'cl_id2freq',
|
||||
'cl_id2str',
|
||||
'cl_idlist2cpos',
|
||||
'cl_lexicon_size',
|
||||
'cl_regex2id',
|
||||
'cl_str2id',
|
||||
'cl_struc2cpos',
|
||||
'cl_struc2str',
|
||||
'corpus_alignment_attributes',
|
||||
'corpus_charset',
|
||||
'corpus_drop_corpus',
|
||||
'corpus_full_name',
|
||||
'corpus_info',
|
||||
'corpus_list_corpora',
|
||||
'corpus_positional_attributes',
|
||||
'corpus_properties',
|
||||
'corpus_structural_attribute_has_values',
|
||||
'corpus_structural_attributes',
|
||||
'cqp_drop_subcorpus',
|
||||
'cqp_dump_subcorpus',
|
||||
'cqp_fdist_1',
|
||||
'cqp_fdist_2',
|
||||
'cqp_list_subcorpora',
|
||||
'cqp_query',
|
||||
'cqp_subcorpus_has_field',
|
||||
'cqp_subcorpus_size',
|
||||
'ctrl_bye',
|
||||
'ctrl_connect',
|
||||
'ctrl_last_general_error',
|
||||
'ctrl_ping',
|
||||
'ctrl_user_abort'
|
||||
]
|
||||
|
||||
|
||||
@socketio.on('cqi', namespace=ns)
|
||||
@socketio_login_required
|
||||
def cqi_over_sio(fn_name: str, fn_args: Dict = {}):
|
||||
try:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||
except KeyError:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
if fn_name in CQI_FUNCTION_NAMES:
|
||||
fn: Callable = getattr(cqi_client.api, fn_name)
|
||||
elif fn_name in CQI_EXTENSION_FUNCTION_NAMES:
|
||||
fn: Callable = getattr(extensions_module, fn_name)
|
||||
else:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
for param in signature(fn).parameters.values():
|
||||
if param.default is param.empty:
|
||||
if param.name not in fn_args:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
else:
|
||||
if param.name not in fn_args:
|
||||
continue
|
||||
if type(fn_args[param.name]) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
cqi_client_lock.acquire()
|
||||
try:
|
||||
fn_return_value = fn(**fn_args)
|
||||
except BrokenPipeError as e:
|
||||
return {'code': 500, 'msg': 'Internal Server Error'}
|
||||
except CQiException as e:
|
||||
return {
|
||||
'code': 502,
|
||||
'msg': 'Bad Gateway',
|
||||
'payload': {
|
||||
'code': e.code,
|
||||
'desc': e.description,
|
||||
'msg': e.__class__.__name__
|
||||
}
|
||||
}
|
||||
finally:
|
||||
cqi_client_lock.release()
|
||||
if isinstance(fn_return_value, CQiStatus):
|
||||
payload = {
|
||||
'code': fn_return_value.code,
|
||||
'msg': fn_return_value.__class__.__name__
|
||||
}
|
||||
else:
|
||||
payload = fn_return_value
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@ -28,8 +28,9 @@ CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
|
||||
|
||||
|
||||
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
|
||||
db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
|
||||
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
|
||||
db_corpus.num_tokens = cqi_corpus.size
|
||||
db.session.commit()
|
||||
@ -37,10 +38,11 @@ def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
|
||||
|
||||
|
||||
def ext_corpus_static_data(corpus: str) -> Dict:
|
||||
db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
|
||||
static_corpus_data_file = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
|
||||
if os.path.exists(static_corpus_data_file):
|
||||
with open(static_corpus_data_file, 'rb') as f:
|
||||
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
|
||||
cache_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
|
||||
if os.path.exists(cache_file_path):
|
||||
with open(cache_file_path, 'rb') as f:
|
||||
return f.read()
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
|
||||
@ -189,10 +191,10 @@ def ext_corpus_static_data(corpus: str) -> Dict:
|
||||
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
|
||||
}
|
||||
del sub_s_attr_values
|
||||
with gzip.open(static_corpus_data_file, 'wt') as f:
|
||||
with gzip.open(cache_file_path, 'wt') as f:
|
||||
json.dump(static_corpus_data, f)
|
||||
del static_corpus_data
|
||||
with open(static_corpus_data_file, 'rb') as f:
|
||||
with open(cache_file_path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user