Merge branch 'development' of gitlab.ub.uni-bielefeld.de:sfb1288inf/opaque into development

This commit is contained in:
Stephan Porada
2019-11-18 14:26:39 +01:00
27 changed files with 749 additions and 637 deletions

View File

@ -3,7 +3,7 @@ from app.models import Corpus, CorpusFile
def delete_corpus_(app, corpus_id):
with app.app_context():
corpus = Corpus.query.filter_by(id=corpus_id).first()
corpus = Corpus.query.get(corpus_id)
if corpus is None:
raise Exception('Corpus {} not found!'.format(corpus_id))
corpus.delete()
@ -11,7 +11,7 @@ def delete_corpus_(app, corpus_id):
def delete_corpus_file_(app, corpus_file_id):
with app.app_context():
corpus_file = CorpusFile.query.filter_by(id=corpus_file_id).first()
corpus_file = CorpusFile.query.get(corpus_file_id)
if corpus_file is None:
raise Exception('Corpus file {} not found!'.format(corpus_file_id))
corpus_file.delete()
@ -19,7 +19,7 @@ def delete_corpus_file_(app, corpus_file_id):
def edit_corpus_file_(app, corpus_file_id):
with app.app_context():
corpus_file = CorpusFile.query.filter_by(id=corpus_file_id).first()
corpus_file = CorpusFile.query.get(corpus_file_id)
if corpus_file is None:
raise Exception('Corpus file {} not found!'.format(corpus_file_id))
corpus_file.insert_metadata()

View File

@ -21,68 +21,90 @@ analysis_clients = {}
@socketio.on('init_corpus_analysis')
@login_required
def init_corpus_analysis(corpus_id):
corpus = Corpus.query.filter_by(id=corpus_id).first()
corpus = Corpus.query.get(corpus_id)
if corpus is None:
socketio.emit('init_corpus_analysis', '[ERROR 404]: Not Found',
room=request.sid)
if not (corpus.creator == current_user or current_user.is_administrator()):
elif not (corpus.creator == current_user
or current_user.is_administrator()):
socketio.emit('init_corpus_analysis', '[ERROR 403]: Forbidden',
room=request.sid)
if str(corpus_id) not in analysis_sessions:
analysis_sessions[str(corpus_id)] = [request.sid]
socketio.start_background_task(observe_corpus_analysis_connection,
current_app._get_current_object(),
corpus_id, request.sid)
else:
if corpus_id not in analysis_sessions:
analysis_sessions[corpus_id] = [request.sid]
else:
analysis_sessions[corpus_id].append(request.sid)
while corpus.status != 'analysing':
db.session.refresh(corpus)
socketio.sleep(3)
analysis_clients[request.sid] = CQiClient(
host='{}_analysis_container{}'.format(corpus.creator.username,
corpus.id))
analysis_clients[request.sid].ctrl_connect('opaque', 'opaque')
socketio.emit('init_corpus_analysis', 'Ready', room=request.sid)
socketio.start_background_task(observe_corpus_analysis_connection,
current_app._get_current_object(),
corpus_id, request.sid)
@socketio.on('query_event')
@socketio.on('query')
@login_required
def recv_query(message):
logger.warning(message)
analysis_client = analysis_clients[request.sid]
analysis_client.connect()
analysis_client = analysis_clients.get(request.sid)
if analysis_client is None:
socketio.emit('query', '[ERROR 424]: Failed Dependency',
room=request.sid)
return
""" Prepare and execute a query """
corpus_name = 'CORPUS'
query = message['query']
result_subcorpus_name = 'Results'
analysis_client.set_corpus_name(corpus_name)
logger.warning('Corpus name has been set.')
analysis_client.create_attribute_strings()
logger.warning('Attribute Strings have been created.')
analysis_client.query_subcorpus(result_subcorpus_name, query)
logger.warning('Subcorpus from query has been created.')
subcorpora = analysis_client.show_subcorpora()
logger.warning('Known subcorpora: {}'.format(subcorpora))
matches = analysis_client.show_results(result_start_count=1,
result_max_count=3)
logger.warning('Match data: {}'.format(matches))
socketio.emit('query_results', matches, room=request.sid)
query_subcorpus = 'Results'
analysis_client.cqp_query(corpus, query_subcorpus, query)
""" Evaluate query results """
match_corpus = '{}:{}'.format(corpus, query_subcorpus)
match_num = min(int(message['hits_per_page']) - 1,
analysis_client.cqp_subcorpus_size(match_corpus))
if match_num == 0:
print('No matches found.')
exit()
if not analysis_client.cqp_subcorpus_has_field(match_corpus, CONST_FIELD_MATCH):
print('Error.')
exit()
if not analysis_client.cqp_subcorpus_has_field(match_corpus, CONST_FIELD_MATCHEND):
print('Error')
exit()
match_boundaries = zip(analysis_client.cqp_dump_subcorpus(match_corpus, CONST_FIELD_MATCH, 0, match_num - 1),
analysis_client.cqp_dump_subcorpus(match_corpus, CONST_FIELD_MATCHEND, 0, match_num - 1))
matches = []
for match_start, match_end in match_boundaries:
matches.append({'cpos_list': list(range(match_start, match_end + 1))})
cpos_list = []
for match in matches:
cpos_list = cpos_list + match['cpos_list']
cpos_list = list(set(cpos_list))
pos_list = analysis_client.cl_cpos2str('{}.pos'.format(corpus), cpos_list)
word_list = analysis_client.cl_cpos2str('{}.word'.format(corpus), cpos_list)
foo = {}
for cpos, pos, word in zip(cpos_list, pos_list, word_list):
foo[cpos] = {'pos': pos, 'word': word}
for match in matches:
match['pos_list'] = [foo[cpos]['pos'] for cpos in match['cpos_list']]
match['word_list'] = [foo[cpos]['word'] for cpos in match['cpos_list']]
match.pop('cpos_list', None)
logger.warning(matches)
socketio.emit('query', matches, room=request.sid)
def observe_corpus_analysis_connection(app, corpus_id, session_id):
with app.app_context():
corpus = Corpus.query.filter_by(id=corpus_id).first()
while corpus.status != 'analysing':
db.session.refresh(corpus)
socketio.sleep(3)
analysis_client = CQiWrapper(host='{}_analysis_container{}'.format(corpus.creator.username, corpus.id), port=4877, password='opaque', username='opaque')
analysis_clients[session_id] = analysis_client
socketio.emit('init_corpus_analysis', 'Ready', room=session_id)
while session_id in connected_sessions:
'''
try:
analysis_client.ctrl_ping()
except Exception as err:
logger.warning('[Exception]: {}'.format(err))
break
else:
socketio.sleep(3)
'''
socketio.sleep(3)
analysis_client.disconnect()
analysis_clients.pop(session_id, None)
analysis_sessions[str(corpus_id)].remove(session_id)
if not analysis_sessions[str(corpus_id)]:
analysis_sessions.pop(str(corpus_id), None)
analysis_client = analysis_clients.pop(session_id, None)
if analysis_client is not None:
analysis_client.ctrl_bye()
analysis_sessions[corpus_id].remove(session_id)
if not analysis_sessions[corpus_id]:
analysis_sessions.pop(corpus_id, None)
corpus = Corpus.query.get(corpus_id)
corpus.status = 'stop analysis'
db.session.commit()

View File

@ -1,16 +1,16 @@
from app import db, logger
from app import db
from app.models import Corpus, CorpusFile
from flask import (abort, current_app, flash, redirect, request,
render_template, url_for, send_from_directory)
from flask_login import current_user, login_required
from threading import Thread
from werkzeug.utils import secure_filename
from . import corpora
from .background_functions import (delete_corpus_, delete_corpus_file_,
edit_corpus_file_)
edit_corpus_file_)
from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm,
QueryDownloadForm, QueryForm)
import os
import threading
@corpora.route('/add', methods=['GET', 'POST'])
@ -48,31 +48,21 @@ def corpus(corpus_id):
title='Corpus')
@corpora.route('/<int:corpus_id>/analysis', methods=['GET', 'POST'])
@corpora.route('/<int:corpus_id>/analyse')
@login_required
def corpus_analysis(corpus_id):
def analyse_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if corpus.status == 'prepared':
corpus.status = 'start analysis'
db.session.commit()
query = request.args.get('query')
logger.warning('Query first: {}'.format(query))
hits_per_page = request.args.get('hits_per_page', 30)
context = request.args.get('context', 10)
dl_form = QueryDownloadForm()
form = QueryForm(hits_per_page=hits_per_page, context=context, query=query)
if form.validate_on_submit():
flash('Query has been sent!')
query = form.query.data
hits_per_page = form.hits_per_page.data
context = form.context.data
return redirect(url_for('corpora.corpus_analysis', corpus_id=corpus_id,
query=query, hits_per_page=hits_per_page,
context=context))
return render_template('corpora/corpus_analysis.html.j2',
query_download_form = QueryDownloadForm()
query_form = QueryForm(context=request.args.get('context', 10),
hits_per_page=request.args.get('hits_per_page', 30),
query=request.args.get('query'))
return render_template('corpora/analyse_corpus.html.j2',
corpus_id=corpus_id,
form=form, dl_form=dl_form,
title='Corpus: {}'.format(corpus.title))
query_download_form=query_download_form,
query_form=query_form, title='Analyse Corpus')
@corpora.route('/<int:corpus_id>/delete')
@ -81,9 +71,8 @@ def delete_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
thread = threading.Thread(target=delete_corpus_,
args=(current_app._get_current_object(),
corpus.id))
thread = Thread(target=delete_corpus_,
args=(current_app._get_current_object(), corpus.id))
thread.start()
flash('Corpus deleted!')
return redirect(url_for('main.dashboard'))
@ -102,23 +91,21 @@ def add_corpus_file(corpus_id):
for corpus_file in corpus.files:
if filename == corpus_file.filename:
flash('File already registered to this corpus.')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
return redirect(url_for('corpora.add_corpus_file',
corpus_id=corpus_id))
# Save the file
dir = os.path.join(str(corpus.user_id), 'corpora', str(corpus.id))
file.save(os.path.join(current_app.config['OPAQUE_STORAGE_DIRECTORY'],
dir, filename))
corpus_file = CorpusFile(author=add_corpus_file_form.author.data,
corpus=corpus,
dir=dir,
filename=filename,
corpus=corpus, dir=dir, filename=filename,
publishing_year=add_corpus_file_form.publishing_year.data,
title=add_corpus_file_form.title.data)
db.session.add(corpus_file)
db.session.commit()
thread = threading.Thread(target=edit_corpus_file_,
args=(current_app._get_current_object(),
corpus_file.id))
thread = Thread(target=edit_corpus_file_,
args=(current_app._get_current_object(),
corpus_file.id))
thread.start()
flash('Corpus file added!')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
@ -136,9 +123,8 @@ def delete_corpus_file(corpus_id, corpus_file_id):
if not (corpus_file.corpus.creator == current_user
or current_user.is_administrator()):
abort(403)
thread = threading.Thread(target=delete_corpus_file_,
args=(current_app._get_current_object(),
corpus_file.id))
thread = Thread(target=delete_corpus_file_,
args=(current_app._get_current_object(), corpus_file.id))
thread.start()
flash('Corpus file deleted!')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
@ -175,9 +161,9 @@ def edit_corpus_file(corpus_id, corpus_file_id):
corpus_file.publishing_year = edit_corpus_file_form.publishing_year.data
corpus_file.title = edit_corpus_file_form.title.data
db.session.commit()
thread = threading.Thread(target=edit_corpus_file_,
args=(current_app._get_current_object(),
corpus_file.id))
thread = Thread(target=edit_corpus_file_,
args=(current_app._get_current_object(),
corpus_file.id))
thread.start()
flash('Corpus file edited!')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))