mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-12 17:10:41 +00:00
Merge branch 'development' of gitlab.ub.uni-bielefeld.de:sfb1288inf/opaque into development
This commit is contained in:
@ -3,7 +3,7 @@ from app.models import Corpus, CorpusFile
|
||||
|
||||
def delete_corpus_(app, corpus_id):
|
||||
with app.app_context():
|
||||
corpus = Corpus.query.filter_by(id=corpus_id).first()
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
raise Exception('Corpus {} not found!'.format(corpus_id))
|
||||
corpus.delete()
|
||||
@ -11,7 +11,7 @@ def delete_corpus_(app, corpus_id):
|
||||
|
||||
def delete_corpus_file_(app, corpus_file_id):
|
||||
with app.app_context():
|
||||
corpus_file = CorpusFile.query.filter_by(id=corpus_file_id).first()
|
||||
corpus_file = CorpusFile.query.get(corpus_file_id)
|
||||
if corpus_file is None:
|
||||
raise Exception('Corpus file {} not found!'.format(corpus_file_id))
|
||||
corpus_file.delete()
|
||||
@ -19,7 +19,7 @@ def delete_corpus_file_(app, corpus_file_id):
|
||||
|
||||
def edit_corpus_file_(app, corpus_file_id):
|
||||
with app.app_context():
|
||||
corpus_file = CorpusFile.query.filter_by(id=corpus_file_id).first()
|
||||
corpus_file = CorpusFile.query.get(corpus_file_id)
|
||||
if corpus_file is None:
|
||||
raise Exception('Corpus file {} not found!'.format(corpus_file_id))
|
||||
corpus_file.insert_metadata()
|
||||
|
@ -21,68 +21,90 @@ analysis_clients = {}
|
||||
@socketio.on('init_corpus_analysis')
|
||||
@login_required
|
||||
def init_corpus_analysis(corpus_id):
|
||||
corpus = Corpus.query.filter_by(id=corpus_id).first()
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
socketio.emit('init_corpus_analysis', '[ERROR 404]: Not Found',
|
||||
room=request.sid)
|
||||
if not (corpus.creator == current_user or current_user.is_administrator()):
|
||||
elif not (corpus.creator == current_user
|
||||
or current_user.is_administrator()):
|
||||
socketio.emit('init_corpus_analysis', '[ERROR 403]: Forbidden',
|
||||
room=request.sid)
|
||||
if str(corpus_id) not in analysis_sessions:
|
||||
analysis_sessions[str(corpus_id)] = [request.sid]
|
||||
socketio.start_background_task(observe_corpus_analysis_connection,
|
||||
current_app._get_current_object(),
|
||||
corpus_id, request.sid)
|
||||
else:
|
||||
if corpus_id not in analysis_sessions:
|
||||
analysis_sessions[corpus_id] = [request.sid]
|
||||
else:
|
||||
analysis_sessions[corpus_id].append(request.sid)
|
||||
while corpus.status != 'analysing':
|
||||
db.session.refresh(corpus)
|
||||
socketio.sleep(3)
|
||||
analysis_clients[request.sid] = CQiClient(
|
||||
host='{}_analysis_container{}'.format(corpus.creator.username,
|
||||
corpus.id))
|
||||
analysis_clients[request.sid].ctrl_connect('opaque', 'opaque')
|
||||
socketio.emit('init_corpus_analysis', 'Ready', room=request.sid)
|
||||
socketio.start_background_task(observe_corpus_analysis_connection,
|
||||
current_app._get_current_object(),
|
||||
corpus_id, request.sid)
|
||||
|
||||
|
||||
@socketio.on('query_event')
|
||||
@socketio.on('query')
|
||||
@login_required
|
||||
def recv_query(message):
|
||||
logger.warning(message)
|
||||
analysis_client = analysis_clients[request.sid]
|
||||
analysis_client.connect()
|
||||
analysis_client = analysis_clients.get(request.sid)
|
||||
if analysis_client is None:
|
||||
socketio.emit('query', '[ERROR 424]: Failed Dependency',
|
||||
room=request.sid)
|
||||
return
|
||||
""" Prepare and execute a query """
|
||||
corpus_name = 'CORPUS'
|
||||
query = message['query']
|
||||
result_subcorpus_name = 'Results'
|
||||
analysis_client.set_corpus_name(corpus_name)
|
||||
logger.warning('Corpus name has been set.')
|
||||
analysis_client.create_attribute_strings()
|
||||
logger.warning('Attribute Strings have been created.')
|
||||
analysis_client.query_subcorpus(result_subcorpus_name, query)
|
||||
logger.warning('Subcorpus from query has been created.')
|
||||
subcorpora = analysis_client.show_subcorpora()
|
||||
logger.warning('Known subcorpora: {}'.format(subcorpora))
|
||||
matches = analysis_client.show_results(result_start_count=1,
|
||||
result_max_count=3)
|
||||
logger.warning('Match data: {}'.format(matches))
|
||||
socketio.emit('query_results', matches, room=request.sid)
|
||||
query_subcorpus = 'Results'
|
||||
analysis_client.cqp_query(corpus, query_subcorpus, query)
|
||||
""" Evaluate query results """
|
||||
match_corpus = '{}:{}'.format(corpus, query_subcorpus)
|
||||
match_num = min(int(message['hits_per_page']) - 1,
|
||||
analysis_client.cqp_subcorpus_size(match_corpus))
|
||||
if match_num == 0:
|
||||
print('No matches found.')
|
||||
exit()
|
||||
if not analysis_client.cqp_subcorpus_has_field(match_corpus, CONST_FIELD_MATCH):
|
||||
print('Error.')
|
||||
exit()
|
||||
if not analysis_client.cqp_subcorpus_has_field(match_corpus, CONST_FIELD_MATCHEND):
|
||||
print('Error')
|
||||
exit()
|
||||
match_boundaries = zip(analysis_client.cqp_dump_subcorpus(match_corpus, CONST_FIELD_MATCH, 0, match_num - 1),
|
||||
analysis_client.cqp_dump_subcorpus(match_corpus, CONST_FIELD_MATCHEND, 0, match_num - 1))
|
||||
matches = []
|
||||
for match_start, match_end in match_boundaries:
|
||||
matches.append({'cpos_list': list(range(match_start, match_end + 1))})
|
||||
cpos_list = []
|
||||
for match in matches:
|
||||
cpos_list = cpos_list + match['cpos_list']
|
||||
cpos_list = list(set(cpos_list))
|
||||
pos_list = analysis_client.cl_cpos2str('{}.pos'.format(corpus), cpos_list)
|
||||
word_list = analysis_client.cl_cpos2str('{}.word'.format(corpus), cpos_list)
|
||||
foo = {}
|
||||
for cpos, pos, word in zip(cpos_list, pos_list, word_list):
|
||||
foo[cpos] = {'pos': pos, 'word': word}
|
||||
for match in matches:
|
||||
match['pos_list'] = [foo[cpos]['pos'] for cpos in match['cpos_list']]
|
||||
match['word_list'] = [foo[cpos]['word'] for cpos in match['cpos_list']]
|
||||
match.pop('cpos_list', None)
|
||||
logger.warning(matches)
|
||||
socketio.emit('query', matches, room=request.sid)
|
||||
|
||||
|
||||
def observe_corpus_analysis_connection(app, corpus_id, session_id):
|
||||
with app.app_context():
|
||||
corpus = Corpus.query.filter_by(id=corpus_id).first()
|
||||
while corpus.status != 'analysing':
|
||||
db.session.refresh(corpus)
|
||||
socketio.sleep(3)
|
||||
analysis_client = CQiWrapper(host='{}_analysis_container{}'.format(corpus.creator.username, corpus.id), port=4877, password='opaque', username='opaque')
|
||||
analysis_clients[session_id] = analysis_client
|
||||
socketio.emit('init_corpus_analysis', 'Ready', room=session_id)
|
||||
while session_id in connected_sessions:
|
||||
'''
|
||||
try:
|
||||
analysis_client.ctrl_ping()
|
||||
except Exception as err:
|
||||
logger.warning('[Exception]: {}'.format(err))
|
||||
break
|
||||
else:
|
||||
socketio.sleep(3)
|
||||
'''
|
||||
socketio.sleep(3)
|
||||
analysis_client.disconnect()
|
||||
analysis_clients.pop(session_id, None)
|
||||
analysis_sessions[str(corpus_id)].remove(session_id)
|
||||
if not analysis_sessions[str(corpus_id)]:
|
||||
analysis_sessions.pop(str(corpus_id), None)
|
||||
analysis_client = analysis_clients.pop(session_id, None)
|
||||
if analysis_client is not None:
|
||||
analysis_client.ctrl_bye()
|
||||
analysis_sessions[corpus_id].remove(session_id)
|
||||
if not analysis_sessions[corpus_id]:
|
||||
analysis_sessions.pop(corpus_id, None)
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
corpus.status = 'stop analysis'
|
||||
db.session.commit()
|
||||
|
@ -1,16 +1,16 @@
|
||||
from app import db, logger
|
||||
from app import db
|
||||
from app.models import Corpus, CorpusFile
|
||||
from flask import (abort, current_app, flash, redirect, request,
|
||||
render_template, url_for, send_from_directory)
|
||||
from flask_login import current_user, login_required
|
||||
from threading import Thread
|
||||
from werkzeug.utils import secure_filename
|
||||
from . import corpora
|
||||
from .background_functions import (delete_corpus_, delete_corpus_file_,
|
||||
edit_corpus_file_)
|
||||
edit_corpus_file_)
|
||||
from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm,
|
||||
QueryDownloadForm, QueryForm)
|
||||
import os
|
||||
import threading
|
||||
|
||||
|
||||
@corpora.route('/add', methods=['GET', 'POST'])
|
||||
@ -48,31 +48,21 @@ def corpus(corpus_id):
|
||||
title='Corpus')
|
||||
|
||||
|
||||
@corpora.route('/<int:corpus_id>/analysis', methods=['GET', 'POST'])
|
||||
@corpora.route('/<int:corpus_id>/analyse')
|
||||
@login_required
|
||||
def corpus_analysis(corpus_id):
|
||||
def analyse_corpus(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if corpus.status == 'prepared':
|
||||
corpus.status = 'start analysis'
|
||||
db.session.commit()
|
||||
query = request.args.get('query')
|
||||
logger.warning('Query first: {}'.format(query))
|
||||
hits_per_page = request.args.get('hits_per_page', 30)
|
||||
context = request.args.get('context', 10)
|
||||
dl_form = QueryDownloadForm()
|
||||
form = QueryForm(hits_per_page=hits_per_page, context=context, query=query)
|
||||
if form.validate_on_submit():
|
||||
flash('Query has been sent!')
|
||||
query = form.query.data
|
||||
hits_per_page = form.hits_per_page.data
|
||||
context = form.context.data
|
||||
return redirect(url_for('corpora.corpus_analysis', corpus_id=corpus_id,
|
||||
query=query, hits_per_page=hits_per_page,
|
||||
context=context))
|
||||
return render_template('corpora/corpus_analysis.html.j2',
|
||||
query_download_form = QueryDownloadForm()
|
||||
query_form = QueryForm(context=request.args.get('context', 10),
|
||||
hits_per_page=request.args.get('hits_per_page', 30),
|
||||
query=request.args.get('query'))
|
||||
return render_template('corpora/analyse_corpus.html.j2',
|
||||
corpus_id=corpus_id,
|
||||
form=form, dl_form=dl_form,
|
||||
title='Corpus: {}'.format(corpus.title))
|
||||
query_download_form=query_download_form,
|
||||
query_form=query_form, title='Analyse Corpus')
|
||||
|
||||
|
||||
@corpora.route('/<int:corpus_id>/delete')
|
||||
@ -81,9 +71,8 @@ def delete_corpus(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if not (corpus.creator == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = threading.Thread(target=delete_corpus_,
|
||||
args=(current_app._get_current_object(),
|
||||
corpus.id))
|
||||
thread = Thread(target=delete_corpus_,
|
||||
args=(current_app._get_current_object(), corpus.id))
|
||||
thread.start()
|
||||
flash('Corpus deleted!')
|
||||
return redirect(url_for('main.dashboard'))
|
||||
@ -102,23 +91,21 @@ def add_corpus_file(corpus_id):
|
||||
for corpus_file in corpus.files:
|
||||
if filename == corpus_file.filename:
|
||||
flash('File already registered to this corpus.')
|
||||
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
||||
return redirect(url_for('corpora.add_corpus_file',
|
||||
corpus_id=corpus_id))
|
||||
# Save the file
|
||||
dir = os.path.join(str(corpus.user_id), 'corpora', str(corpus.id))
|
||||
file.save(os.path.join(current_app.config['OPAQUE_STORAGE_DIRECTORY'],
|
||||
dir, filename))
|
||||
|
||||
corpus_file = CorpusFile(author=add_corpus_file_form.author.data,
|
||||
corpus=corpus,
|
||||
dir=dir,
|
||||
filename=filename,
|
||||
corpus=corpus, dir=dir, filename=filename,
|
||||
publishing_year=add_corpus_file_form.publishing_year.data,
|
||||
title=add_corpus_file_form.title.data)
|
||||
db.session.add(corpus_file)
|
||||
db.session.commit()
|
||||
thread = threading.Thread(target=edit_corpus_file_,
|
||||
args=(current_app._get_current_object(),
|
||||
corpus_file.id))
|
||||
thread = Thread(target=edit_corpus_file_,
|
||||
args=(current_app._get_current_object(),
|
||||
corpus_file.id))
|
||||
thread.start()
|
||||
flash('Corpus file added!')
|
||||
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
||||
@ -136,9 +123,8 @@ def delete_corpus_file(corpus_id, corpus_file_id):
|
||||
if not (corpus_file.corpus.creator == current_user
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = threading.Thread(target=delete_corpus_file_,
|
||||
args=(current_app._get_current_object(),
|
||||
corpus_file.id))
|
||||
thread = Thread(target=delete_corpus_file_,
|
||||
args=(current_app._get_current_object(), corpus_file.id))
|
||||
thread.start()
|
||||
flash('Corpus file deleted!')
|
||||
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
||||
@ -175,9 +161,9 @@ def edit_corpus_file(corpus_id, corpus_file_id):
|
||||
corpus_file.publishing_year = edit_corpus_file_form.publishing_year.data
|
||||
corpus_file.title = edit_corpus_file_form.title.data
|
||||
db.session.commit()
|
||||
thread = threading.Thread(target=edit_corpus_file_,
|
||||
args=(current_app._get_current_object(),
|
||||
corpus_file.id))
|
||||
thread = Thread(target=edit_corpus_file_,
|
||||
args=(current_app._get_current_object(),
|
||||
corpus_file.id))
|
||||
thread.start()
|
||||
flash('Corpus file edited!')
|
||||
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
||||
|
Reference in New Issue
Block a user