nopaque/app/corpora/events.py

111 lines
4.4 KiB
Python
Raw Normal View History

2019-11-12 13:23:54 +01:00
from app import db, logger, socketio
from app.events import connected_sessions
from app.models import Corpus
from flask import current_app, request
from flask_login import current_user, login_required
2019-11-18 14:24:31 +01:00
from .CQiWrapper.CQiWrapper import CQiWrapper
2019-11-11 14:20:44 +01:00
'''
' A dictionary containing lists of, with corpus ids associated, Socket.IO
' session ids (sid). {<corpus_id>: [<sid>, ...], ...}
2019-11-11 14:20:44 +01:00
'''
analysis_sessions = {}
2019-11-11 14:20:44 +01:00
'''
' A dictionary containing Socket.IO session id - CQi client pairs.
' {<sid>: CQiClient, ...}
2019-11-11 14:20:44 +01:00
'''
2019-11-07 14:33:58 +01:00
analysis_clients = {}
@socketio.on('init_corpus_analysis')
@login_required
def init_corpus_analysis(corpus_id):
2019-11-15 13:09:12 +01:00
corpus = Corpus.query.get(corpus_id)
if corpus is None:
socketio.emit('init_corpus_analysis', '[ERROR 404]: Not Found',
room=request.sid)
2019-11-18 11:08:33 +01:00
elif not (corpus.creator == current_user
or current_user.is_administrator()):
socketio.emit('init_corpus_analysis', '[ERROR 403]: Forbidden',
room=request.sid)
2019-11-14 13:19:05 +01:00
else:
2019-11-18 11:08:33 +01:00
if corpus_id not in analysis_sessions:
analysis_sessions[corpus_id] = [request.sid]
else:
analysis_sessions[corpus_id].append(request.sid)
while corpus.status != 'analysing':
db.session.refresh(corpus)
socketio.sleep(3)
analysis_clients[request.sid] = CQiClient(
host='{}_analysis_container{}'.format(corpus.creator.username,
corpus.id))
analysis_clients[request.sid].ctrl_connect('opaque', 'opaque')
socketio.emit('init_corpus_analysis', 'Ready', room=request.sid)
socketio.start_background_task(observe_corpus_analysis_connection,
current_app._get_current_object(),
corpus_id, request.sid)
2019-11-14 15:34:07 +01:00
@socketio.on('query')
@login_required
def recv_query(message):
2019-11-18 11:08:33 +01:00
analysis_client = analysis_clients.get(request.sid)
if analysis_client is None:
socketio.emit('query', '[ERROR 424]: Failed Dependency',
room=request.sid)
return
2019-11-12 14:00:03 +01:00
""" Prepare and execute a query """
2019-11-18 14:24:31 +01:00
corpus_name = 'CORPUS'
2019-11-12 16:45:54 +01:00
query = message['query']
2019-11-12 14:00:03 +01:00
query_subcorpus = 'Results'
analysis_client.cqp_query(corpus, query_subcorpus, query)
""" Evaluate query results """
match_corpus = '{}:{}'.format(corpus, query_subcorpus)
2019-11-14 13:19:05 +01:00
match_num = min(int(message['hits_per_page']) - 1,
analysis_client.cqp_subcorpus_size(match_corpus))
2019-11-12 14:00:03 +01:00
if match_num == 0:
print('No matches found.')
exit()
if not analysis_client.cqp_subcorpus_has_field(match_corpus, CONST_FIELD_MATCH):
print('Error.')
exit()
if not analysis_client.cqp_subcorpus_has_field(match_corpus, CONST_FIELD_MATCHEND):
print('Error')
exit()
match_boundaries = zip(analysis_client.cqp_dump_subcorpus(match_corpus, CONST_FIELD_MATCH, 0, match_num - 1),
analysis_client.cqp_dump_subcorpus(match_corpus, CONST_FIELD_MATCHEND, 0, match_num - 1))
matches = []
for match_start, match_end in match_boundaries:
2019-11-14 13:19:05 +01:00
matches.append({'cpos_list': list(range(match_start, match_end + 1))})
cpos_list = []
for match in matches:
cpos_list = cpos_list + match['cpos_list']
cpos_list = list(set(cpos_list))
pos_list = analysis_client.cl_cpos2str('{}.pos'.format(corpus), cpos_list)
word_list = analysis_client.cl_cpos2str('{}.word'.format(corpus), cpos_list)
foo = {}
for cpos, pos, word in zip(cpos_list, pos_list, word_list):
foo[cpos] = {'pos': pos, 'word': word}
for match in matches:
match['pos_list'] = [foo[cpos]['pos'] for cpos in match['cpos_list']]
match['word_list'] = [foo[cpos]['word'] for cpos in match['cpos_list']]
match.pop('cpos_list', None)
2019-11-12 14:00:03 +01:00
logger.warning(matches)
2019-11-14 15:34:07 +01:00
socketio.emit('query', matches, room=request.sid)
def observe_corpus_analysis_connection(app, corpus_id, session_id):
with app.app_context():
while session_id in connected_sessions:
2019-11-12 13:23:54 +01:00
socketio.sleep(3)
2019-11-18 11:08:33 +01:00
analysis_client = analysis_clients.pop(session_id, None)
if analysis_client is not None:
analysis_client.ctrl_bye()
2019-11-14 13:19:05 +01:00
analysis_sessions[corpus_id].remove(session_id)
if not analysis_sessions[corpus_id]:
analysis_sessions.pop(corpus_id, None)
2019-11-18 11:08:33 +01:00
corpus = Corpus.query.get(corpus_id)
corpus.status = 'stop analysis'
db.session.commit()