diff --git a/app/corpora/events.py b/app/corpora/events.py index 0da86efc..1298a847 100644 --- a/app/corpora/events.py +++ b/app/corpora/events.py @@ -47,15 +47,44 @@ def corpus_analysis(message): room=request.sid) return """ Prepare and execute a query """ - logger.warning('Payload: {}'.format(message)) - corpus_name = 'CORPUS' - query = message['query'] - client.select_corpus(corpus_name) - client.query_subcorpus(query) - results = client.show_query_results( - result_len=int(message['hits_per_page']), - context_len=int(message['context'])) - socketio.emit('corpus_analysis', results, room=request.sid) + corpus = 'CORPUS' + query = (message['query']) + query_subcorpus = 'Results' + client.cqp_query(corpus, query_subcorpus, query) + + data = {'matches': [], 'cpos_lookup': {}, 'text_loopup': {}} + + """ Evaluate query results """ + match_corpus = '{}:{}'.format(corpus, query_subcorpus) + match_num = min(int(message['hits_per_page']), client.cqp_subcorpus_size(match_corpus)) + match_boundaries = zip(client.cqp_dump_subcorpus(match_corpus, + 0x10, + 0, match_num - 1), + client.cqp_dump_subcorpus(match_corpus, + 0x11, + 0, match_num - 1)) + context = 15 + corpus_len = 10000 + for match_start, match_end in match_boundaries: + data['matches'].append({'lc': list(range(max(0, match_start - int(message['context'])), match_start)), + 'hit': list(range(match_start, match_end + 1)), + 'rc': list(range(match_end + 1, min(corpus_len, match_end + 1 + int(message['context']))))}) + cpos_list = [] + for match in data['matches']: + cpos_list += match['lc'] + match['hit'] + match['rc'] + cpos_list = list(set(cpos_list)) + pos_list = client.cl_cpos2str('{}.pos'.format(corpus), cpos_list) + simple_pos_list = client.cl_cpos2str('{}.simple_pos'.format(corpus), cpos_list) + text_id_list = client.cl_cpos2struc('{}.text_title'.format(corpus), cpos_list) + word_list = client.cl_cpos2str('{}.word'.format(corpus), cpos_list) + for cpos, pos, simple_pos, text_id, word in zip(cpos_list, pos_list, simple_pos_list, text_id_list, word_list): + data['cpos_lookup'][cpos] = {'pos': pos, 'simple_pos': simple_pos, 'text_id': text_id, 'word': word} + text_author_list = client.cl_struc2str('{}.text_author'.format(corpus), text_id_list) + text_publishing_year_list = client.cl_struc2str('{}.text_publishing_year'.format(corpus), text_id_list) + text_title_list = client.cl_struc2str('{}.text_title'.format(corpus), text_id_list) + for text_id, text_author, text_publishing_year, text_title in zip(text_id_list, text_author_list, text_publishing_year_list, text_title_list): + data['text_loopup'][text_id] = {'author': text_author, 'publishing_year': text_publishing_year, 'title': text_title} + socketio.emit('corpus_analysis', data, room=request.sid) def corpus_analysis_session_handler(app, corpus_id, session_id): diff --git a/app/templates/corpora/analyse_corpus.html.j2 b/app/templates/corpora/analyse_corpus.html.j2 index c60fa662..da81332b 100644 --- a/app/templates/corpora/analyse_corpus.html.j2 +++ b/app/templates/corpora/analyse_corpus.html.j2 @@ -42,6 +42,7 @@