Big Corpus analysis update

This commit is contained in:
Patrick Jentsch
2021-11-16 15:23:57 +01:00
parent c1436c2a5d
commit f6c2292e03
47 changed files with 2549 additions and 1840 deletions

View File

@ -2,4 +2,4 @@ from flask import Blueprint
bp = Blueprint('corpora', __name__)
from . import events, routes # noqa
from . import cqi_over_socketio, routes # noqa

View File

@ -0,0 +1,108 @@
from app import db, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
from flask import session
from flask_login import current_user
from flask_socketio import ConnectionRefusedError
from threading import Lock
import cqi
'''
This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
This module only handles the SIO connect/disconnect, which handles the setup
and teardown of necessary ressources for later use. Each CQi function has a
corresponding SIO event. The event handlers are spread across the different
modules within this package.
Basic concept:
1. A client connects to the SIO namespace and provides the id of a corpus to be
analysed.
1.1 The analysis session counter of the corpus is incremented.
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
1.3 Wait until the CQP server is running.
1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient and the Lock in the session for subsequential use.
2. A client emits an event and may provide a single json object with necessary
arguments for the targeted CQi function.
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
- The event handler function defines all arguments. Hence the client
is sent as a single json object, the decorator decomposes it to fit
the functions signature. This also includes type checking and proper
use of the lock (acquire/release) mechanism.
4. Wait for more events
5. The client disconnects from the SIO namespace
1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
'''
NAMESPACE = '/corpora/corpus/corpus_analysis'
# Import all CQi over Socket.IO event handlers
from .cqi_corpora_corpus_subcorpora import * # noqa
from .cqi_corpora_corpus_structural_attributes import * # noqa
from .cqi_corpora_corpus_positional_attributes import * # noqa
from .cqi_corpora_corpus_alignment_attributes import * # noqa
from .cqi_corpora_corpus import * # noqa
from .cqi_corpora import * # noqa
from .cqi import * # noqa
@socketio.on('connect', namespace=NAMESPACE)
@socketio_login_required
def connect(auth):
# the auth variable is used in a hacky way. It contains the corpus id for
# which a corpus analysis session should be started.
corpus_id = auth['corpus_id']
corpus = Corpus.query.get(corpus_id)
if corpus is None:
# return {'code': 404, 'msg': 'Not Found'}
raise ConnectionRefusedError('Not Found')
if not (corpus.creator == current_user or current_user.is_administrator()):
# return {'code': 403, 'msg': 'Forbidden'}
raise ConnectionRefusedError('Forbidden')
if corpus.status not in ['prepared', 'start analysis', 'analysing', 'stop analysis']:
# return {'code': 424, 'msg': 'Failed Dependency'}
raise ConnectionRefusedError('Failed Dependency')
if corpus.num_analysis_sessions is None:
corpus.num_analysis_sessions = 0
db.session.commit()
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
db.session.commit()
retry_counter = 20
while corpus.status != 'analysing':
if retry_counter == 0:
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
return {'code': 408, 'msg': 'Request Timeout'}
socketio.sleep(3)
retry_counter -= 1
db.session.refresh(corpus)
cqi_client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
session['d'] = {
'corpus_id': corpus_id,
'cqi_client': cqi_client,
'cqi_client_lock': Lock(),
}
# return {'code': 200, 'msg': 'OK'}
@socketio.on('disconnect', namespace=NAMESPACE)
def disconnect():
session['d']['cqi_client_lock'].acquire()
try:
session['d']['cqi_client'].disconnect()
except cqi.errors.CQiException:
pass
except BrokenPipeError:
pass
session['d']['cqi_client_lock'].release()
corpus = Corpus.query.get(session['d']['corpus_id'])
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
session.pop('d')
# return {'code': 200, 'msg': 'OK'}

View File

@ -0,0 +1,43 @@
from app import socketio
from app.decorators import socketio_login_required
from socket import gaierror
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
import cqi
@socketio.on('cqi.connect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_connect(cqi_client: cqi.CQiClient):
try:
cqi_status = cqi_client.connect()
except gaierror as e:
return {
'code': 500,
'msg': 'Internal Server Error',
'payload': {'code': e.args[0], 'desc': e.args[1]}
}
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.disconnect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_disconnect(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.disconnect()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.ping', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_ping(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.ping()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,22 @@
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
import cqi
@socketio.on('cqi.corpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = {**cqi_corpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_list(cqi_client: cqi.CQiClient):
payload = [{**x.attrs} for x in cqi_client.corpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,85 @@
from app import db, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
from flask import session
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, lookups_by_cpos
import cqi
import math
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.drop()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.query(subcorpus_name, query)
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
corpus.num_tokens = cqi_client.corpora.get('CORPUS').attrs['size']
db.session.commit()
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_corpus.attrs['size'] > 0
and page > math.ceil(cqi_corpus.attrs['size'] / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
# the lookups for the items
payload['lookups'] = lookups
# the total number of items matching the query
payload['total'] = cqi_corpus.attrs['size']
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,24 @@
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
import cqi
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
payload = {**cqi_alignment_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,24 @@
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
import cqi
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
payload = {**cqi_positional_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,24 @@
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
import cqi
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
payload = {**cqi_structural_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,123 @@
from app import socketio
from app.decorators import socketio_login_required
from app.models import Corpus
from flask import session
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, export_subcorpus
import cqi
import json
import math
import os
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
payload = {**cqi_subcorpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_status = cqi_subcorpus.drop()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_subcorpus.attrs['size'] > 0
and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
payload['items'] = cqi_results_export.pop('matches')
# the lookups for the items
payload['lookups'] = cqi_results_export
# the total number of items matching the query
payload['total'] = cqi_subcorpus.attrs['size']
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
corpus = Corpus.query.get(session['d']['corpus_id'])
file_path = os.path.join(corpus.path, f'{subcorpus_name}.json')
with open(file_path, 'w') as file:
json.dump(cqi_subcorpus_export, file)
return {'code': 200, 'msg': 'OK'}

View File

@ -0,0 +1,129 @@
from flask import session
from functools import wraps
from inspect import signature
import cqi
def cqi_over_socketio(f):
@wraps(f)
def wrapped(*args):
if 'd' not in session:
return {'code': 424, 'msg': 'Failed Dependency'}
f_args = {}
# Check for missing args and if all provided args are of the right type
for param in signature(f).parameters.values():
if param.annotation == cqi.CQiClient:
f_args[param.name] = session['d']['cqi_client']
continue
if param.default is param.empty:
# args
if param.name not in args[0]:
return {'code': 400, 'msg': 'Bad Request'}
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
else:
# kwargs
if param.name not in args[0]:
continue
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
session['d']['cqi_client_lock'].acquire()
try:
return_value = f(**f_args)
except BrokenPipeError:
pass
except cqi.errors.CQiException as e:
return_value = {
'code': 500,
'msg': 'Internal Server Error',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.name
}
}
finally:
session['d']['cqi_client_lock'].release()
return return_value
return wrapped
def lookups_by_cpos(corpus, cpos_list):
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
cpos_attr_values = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
cpos_attr_values[i]
for attr in corpus.structural_attributes.list():
# We only want to iterate over non subattributes, identifiable by
# attr.attrs['has_values']==False
if attr.attrs['has_values']:
continue
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1:
continue
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
if not occured_attr_ids:
continue
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
if not subattrs:
continue
lookup_name = f'{attr.attrs["name"]}_lookup'
lookups[lookup_name] = {}
for attr_id in occured_attr_ids:
lookups[lookup_name][attr_id] = {}
for subattr in subattrs:
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
return lookups
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
if subcorpus.attrs['size'] == 0:
return {"matches": []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
match_boundaries = zip(
subcorpus.dump(
subcorpus.attrs['fields']['match'], first_match, last_match),
subcorpus.dump(
subcorpus.attrs['fields']['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
match_num = offset + 1
for match_start, match_end in match_boundaries:
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - 1 - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if (match_end == (subcorpus.collection.corpus.attrs['size'] - 1)
or context == 0):
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(match_end + 1 + context,
subcorpus.collection.corpus.attrs['size'] - 1)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
match_num += 1
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}

View File

@ -1,304 +0,0 @@
from datetime import datetime
from flask import current_app, request
from flask_login import current_user
from socket import gaierror
from werkzeug.utils import secure_filename
from .. import db, socketio
from ..decorators import socketio_login_required
from ..events.socketio import sessions as socketio_sessions
from ..models import Corpus
import cqi
import math
import os
import shutil
'''
' A dictionary containing lists of, with corpus ids associated, Socket.IO
' session ids (sid). {<corpus_id>: [<sid>, ...], ...}
'''
corpus_analysis_sessions = {}
'''
' A dictionary containing Socket.IO session id - CQi client pairs.
' {<sid>: CQiClient, ...}
'''
corpus_analysis_clients = {}
@socketio.on('corpus_analysis_init')
@socketio_login_required
def init_corpus_analysis(corpus_id):
corpus = Corpus.query.get(corpus_id)
if corpus is None:
response = {'code': 404, 'desc': None, 'msg': 'Not Found'}
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if not (corpus.creator == current_user or current_user.is_administrator()):
response = {'code': 403, 'desc': None, 'msg': 'Forbidden'}
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if corpus.status not in ['prepared', 'start analysis', 'analysing']:
response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if corpus.status == 'prepared':
corpus.status = 'start analysis'
db.session.commit()
socketio.start_background_task(corpus_analysis_session_handler,
current_app._get_current_object(),
corpus_id, current_user.id, request.sid)
def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
with app.app_context():
''' Setup analysis session '''
corpus = Corpus.query.get(corpus_id)
retry_counter = 15
while corpus.status != 'analysing':
db.session.refresh(corpus)
retry_counter -= 1
if retry_counter == 0:
response = {'code': 408, 'desc': None, 'msg': 'Request Timeout'} # noqa
socketio.emit('corpus_analysis_init', response, room=session_id) # noqa
corpus.status = 'stop analysis'
db.session.commit()
return
socketio.sleep(3)
client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
try:
connect_status = client.connect()
payload = {'code': connect_status, 'msg': cqi.api.specification.lookup[connect_status]} # noqa
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_init', e, session_id)
corpus.status = 'stop analysis'
db.session.commit()
return
except gaierror:
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error'} # noqa
socketio.emit('corpus_analysis_init', response, room=session_id)
corpus.status = 'stop analysis'
db.session.commit()
return
corpus_analysis_clients[session_id] = client
if corpus_id in corpus_analysis_sessions:
corpus_analysis_sessions[corpus_id].append(session_id)
else:
corpus_analysis_sessions[corpus_id] = [session_id]
client.status = 'ready'
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_init', response, room=session_id)
''' Observe analysis session '''
while session_id in socketio_sessions:
socketio.sleep(3)
''' Teardown analysis session '''
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.3)
try:
client.disconnect()
except cqi.errors.CQiException:
pass
corpus_analysis_clients.pop(session_id, None)
corpus_analysis_sessions[corpus_id].remove(session_id)
if not corpus_analysis_sessions[corpus_id]:
corpus_analysis_sessions.pop(corpus_id, None)
corpus.status = 'stop analysis'
db.session.commit()
@socketio.on('corpus_analysis_meta_data')
@socketio_login_required
def corpus_analysis_get_meta_data(corpus_id):
# get meta data from db
db_corpus = Corpus.query.get(corpus_id)
metadata = {}
metadata['corpus_name'] = db_corpus.title
metadata['corpus_description'] = db_corpus.description
metadata['corpus_creation_date'] = db_corpus.creation_date.isoformat() + 'Z'
metadata['corpus_last_edited_date'] = \
db_corpus.last_edited_date.isoformat() + 'Z'
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
return
# check if client is busy or not
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.3)
# get meta data from corpus in cqp server
client.status = 'running'
try:
cwb_corpus = client.corpora.get('CORPUS')
metadata['corpus_properties'] = cwb_corpus.attrs['properties']
metadata['corpus_size_tokens'] = cwb_corpus.attrs['size']
text_attr = cwb_corpus.structural_attributes.get('text')
struct_attrs = cwb_corpus.structural_attributes.list(
filters={'part_of': text_attr})
text_ids = range(0, (text_attr.attrs['size']))
texts_metadata = {}
for text_id in text_ids:
texts_metadata[text_id] = {}
for struct_attr in struct_attrs:
texts_metadata[text_id][struct_attr.attrs['name'][(len(text_attr.attrs['name']) + 1):]] = struct_attr.values_by_ids(list(range(struct_attr.attrs['size'])))[text_id] # noqa
metadata['corpus_all_texts'] = texts_metadata
metadata['corpus_analysis_date'] = datetime.utcnow().isoformat() + 'Z'
metadata['corpus_cqi_py_protocol_version'] = client.api.version
metadata['corpus_cqi_py_package_version'] = cqi.__version__
# TODO: make this dynamically
metadata['corpus_cqpserver_version'] = 'CQPserver v3.4.22'
# write some metadata to the db
db_corpus.current_nr_of_tokens = metadata['corpus_size_tokens']
db.session.commit()
# emit data
payload = metadata
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
except cqi.errors.CQiException as e:
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
'payload': payload}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
client.status = 'ready'
@socketio.on('corpus_analysis_query')
@socketio_login_required
def corpus_analysis_query(query):
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_query', response, room=request.sid)
return
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.3)
client.status = 'running'
try:
corpus = client.corpora.get('CORPUS')
query_status = corpus.query(query)
results = corpus.subcorpora.get('Results')
except cqi.errors.CQiException as e:
client.status = 'ready'
handle_cqi_exception('corpus_analysis_query', e, request.sid)
return
payload = {'status': query_status,
'msg': cqi.api.specification.lookup[query_status],
'match_count': results.attrs['size']}
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_query', response, room=request.sid)
chunk_size = 100
chunk_start = 0
context = 50
progress = 0
while chunk_start <= results.attrs['size']:
if client.status == 'abort':
break
try:
chunk = results.export(context=context, cutoff=chunk_size, offset=chunk_start) # noqa
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_query', e, request.sid)
break
if (results.attrs['size'] == 0):
progress = 100
else:
progress = ((chunk_start + chunk_size) / results.attrs['size']) * 100 # noqa
progress = min(100, int(math.ceil(progress)))
payload = {'chunk': chunk, 'progress': progress}
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_query_results', response, room=request.sid) # noqa
chunk_start += chunk_size
client.status = 'ready'
@socketio.on('corpus_analysis_get_match_with_full_context')
@socketio_login_required
def corpus_analysis_get_match_with_full_context(payload):
type = payload['type']
data_indexes = payload['data_indexes']
first_cpos = payload['first_cpos']
last_cpos = payload['last_cpos']
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': 'No client found for this session',
'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_get_match_with_full_context', response,
room=request.sid)
return
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.3)
client.status = 'running'
try:
corpus = client.corpora.get('CORPUS')
s = corpus.structural_attributes.get('s')
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa
return
i = 0
# Send data one match at a time.
for index, f_cpos, l_cpos in zip(data_indexes, first_cpos, last_cpos):
if client.status == 'abort':
break
i += 1
matches = []
cpos_lookup = text_lookup = {}
try:
tmp = s.export(f_cpos, l_cpos, context=10)
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa
break
matches.append(tmp['matches'][0])
cpos_lookup.update(tmp['cpos_lookup'])
text_lookup.update(tmp['text_lookup'])
progress = i / len(data_indexes) * 100
payload = {'matches': matches, 'progress': progress,
'cpos_lookup': cpos_lookup, 'text_lookup': text_lookup}
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload,
'type': type, 'data_indexes': data_indexes}
socketio.emit('corpus_analysis_get_match_with_full_context',
response, room=request.sid)
client.status = 'ready'
@socketio.on('export_corpus')
@socketio_login_required
def export_corpus(corpus_id):
corpus = Corpus.query.get(corpus_id)
if corpus is None:
response = {'code': 404, 'msg': 'Not found'}
socketio.emit('export_corpus', response, room=request.sid)
return
if corpus.status != 'prepared':
response = {'code': 412, 'msg': 'Precondition Failed'}
socketio.emit('export_corpus', response, room=request.sid)
return
# delete old corpus archive if it exists/has been build before
if corpus.archive_file is not None and os.path.isfile(corpus.archive_file):
os.remove(corpus.archive_file)
archive_file_base_name = '[corpus]_' + secure_filename(corpus.title)
corpus.archive_file = archive_file_base_name + '.zip'
db.session.commit()
shutil.make_archive(
os.path.join(corpus.creator.path, 'corpora', archive_file_base_name),
'zip',
corpus.path
)
socketio.emit('export_corpus_{}'.format(corpus.id), room=request.sid)
def handle_cqi_exception(event, exception, room):
response = {'code': 500,
'desc': None,
'msg': 'Internal Server Error',
'payload': {'code': exception.code,
'desc': exception.description,
'msg': exception.name}}
socketio.emit(event, response, room=room)

View File

@ -1,8 +1,8 @@
from flask_wtf import FlaskForm
from werkzeug.utils import secure_filename
from wtforms import (BooleanField, FileField, StringField, SubmitField,
ValidationError, IntegerField, SelectField)
from wtforms.validators import DataRequired, Length, NumberRange
from wtforms import (FileField, StringField, SubmitField,
ValidationError, IntegerField)
from wtforms.validators import DataRequired, Length
class AddCorpusFileForm(FlaskForm):
@ -91,76 +91,3 @@ class ImportCorpusForm(FlaskForm):
raise ValidationError('File does not have an approved extension: '
'.zip')
field.data.filename = secure_filename(field.data.filename)
class QueryForm(FlaskForm):
'''
Form to submit a query to the server which is executed via cqi-py.
'''
query = StringField('Query',
validators=[DataRequired(), Length(1, 1024)])
submit = SubmitField('Search')
class DisplayOptionsForm(FlaskForm):
'''
Form to alter how the matches are represented to the user by the user.
'''
expert_mode = BooleanField('Expert mode')
result_context = SelectField('Result context',
choices=[('', 'Choose your option'),
('10', '10'),
('20', '20'),
('30', '30'),
('40', '40'),
('50', '50')])
results_per_page = SelectField('Results per page',
choices=[('', 'Choose your option'),
('10', '10'),
('20', '20'),
('30', '30'),
('40', '40'),
('50', '50')])
class InspectDisplayOptionsForm(FlaskForm):
'''
Form for the inspect modal where the user can interact with how the current
match is being represented to him.
'''
expert_mode_inspect = BooleanField('Expert mode')
highlight_sentences = BooleanField('Split sentences')
context_sentences = IntegerField('Context sentences',
validators=[NumberRange(min=0, max=10)],
default=3)
class QueryDownloadForm(FlaskForm):
'''
Form to choose in what file format the analysis results are being
downloaded. WIP.
'''
file_type = SelectField('File type',
choices=[('', 'Choose file type'),
('csv', 'csv'),
('json', 'json'),
('excel', 'excel'),
('html', 'html-table')],
validators=[DataRequired()])
class AddQueryResultForm(FlaskForm):
'''
Form used to import one result json file.
'''
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
file = FileField('File', validators=[DataRequired()])
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
submit = SubmitField()
def validate_file(self, field):
if not field.data.filename.lower().endswith('.json'):
raise ValidationError('File does not have an approved extension: '
'.json')
field.data.filename = secure_filename(field.data.filename)

View File

@ -0,0 +1,21 @@
from flask_wtf import FlaskForm
from werkzeug.utils import secure_filename
from wtforms import FileField, StringField, SubmitField, ValidationError
from wtforms.validators import DataRequired, Length
class AddQueryResultForm(FlaskForm):
'''
Form used to import one result json file.
'''
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
file = FileField('File', validators=[DataRequired()])
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
submit = SubmitField()
def validate_file(self, field):
if not field.data.filename.lower().endswith('.json'):
raise ValidationError('File does not have an approved extension: '
'.json')
field.data.filename = secure_filename(field.data.filename)

View File

@ -0,0 +1,134 @@
from flask import (abort, current_app, flash, make_response, redirect, request,
render_template, url_for, send_from_directory)
from flask_login import current_user, login_required
from . import bp
from . import tasks
from .forms import (AddQueryResultForm, DisplayOptionsForm,
InspectDisplayOptionsForm)
from .. import db
from ..models import QueryResult
import json
import os
@bp.route('/result/add', methods=['GET', 'POST'])
@login_required
def add_query_result():
'''
View to import a result as a json file.
'''
abort(503)
form = AddQueryResultForm(prefix='add-query-result-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
query_result = QueryResult(creator=current_user,
description=form.description.data,
filename=form.file.data.filename,
title=form.title.data)
db.session.add(query_result)
db.session.flush()
db.session.refresh(query_result)
try:
os.makedirs(os.path.dirname(query_result.path))
except OSError:
current_app.logger.error(
'Make dir {} led to an OSError!'.format(query_result.path)
)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
{'redirect_url': url_for('.add_query_result')}, 500)
# save the uploaded file
form.file.data.save(query_result.path)
# parse json from file
with open(query_result.path, 'r') as file:
query_result_file_content = json.load(file)
# parse json schema
# with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa
# schema = json.load(file)
# try:
# # validate imported json file
# validate(instance=query_result_file_content, schema=schema)
# except Exception:
# tasks.delete_query_result(query_result.id)
# flash('Uploaded file is invalid', 'result')
# return make_response(
# {'redirect_url': url_for('.add_query_result')}, 201)
query_result_file_content.pop('matches')
query_result_file_content.pop('cpos_lookup')
query_result.query_metadata = query_result_file_content
db.session.commit()
flash('Query result added!', 'result')
return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
return render_template('corpora/query_results/add_query_result.html.j2',
form=form, title='Add query result')
@bp.route('/result/<int:query_result_id>')
@login_required
def query_result(query_result_id):
abort(503)
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
return render_template('corpora/query_results/query_result.html.j2',
query_result=query_result, title='Query result')
@bp.route('/result/<int:query_result_id>/inspect')
@login_required
def inspect_query_result(query_result_id):
'''
View to inspect imported result file in a corpus analysis like interface
'''
abort(503)
query_result = QueryResult.query.get_or_404(query_result_id)
query_metadata = query_result.query_metadata
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
display_options_form = DisplayOptionsForm(
prefix='display-options-form',
results_per_page=request.args.get('results_per_page', 30),
result_context=request.args.get('context', 20)
)
inspect_display_options_form = InspectDisplayOptionsForm(
prefix='inspect-display-options-form'
)
with open(query_result.path, 'r') as query_result_file:
query_result_file_content = json.load(query_result_file)
return render_template('corpora/query_results/inspect.html.j2',
query_result=query_result,
display_options_form=display_options_form,
inspect_display_options_form=inspect_display_options_form, # noqa
query_result_file_content=query_result_file_content,
query_metadata=query_metadata,
title='Inspect query result')
@bp.route('/result/<int:query_result_id>/delete')
@login_required
def delete_query_result(query_result_id):
abort(503)
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
tasks.delete_query_result(query_result_id)
return redirect(url_for('services.service', service="corpus_analysis"))
@bp.route('/result/<int:query_result_id>/download')
@login_required
def download_query_result(query_result_id):
abort(503)
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
return send_from_directory(as_attachment=True,
directory=os.path.dirname(query_result.path),
filename=query_result.filename)

View File

@ -1,16 +1,12 @@
from flask import (abort, current_app, flash, make_response, redirect, request,
from flask import (abort, current_app, flash, make_response, redirect,
render_template, url_for, send_from_directory)
from flask_login import current_user, login_required
from . import bp
from . import tasks
from .forms import (AddCorpusFileForm, AddCorpusForm, AddQueryResultForm,
EditCorpusFileForm, QueryDownloadForm, QueryForm,
DisplayOptionsForm, InspectDisplayOptionsForm,
from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm,
ImportCorpusForm)
from jsonschema import validate
from .. import db
from ..models import Corpus, CorpusFile, QueryResult
import json
from ..models import Corpus, CorpusFile
import os
import shutil
import glob
@ -22,21 +18,22 @@ from .import_corpus import check_zip_contents
@bp.route('/add', methods=['GET', 'POST'])
@login_required
def add_corpus():
form = AddCorpusForm()
form = AddCorpusForm(prefix='add-corpus-form')
if form.validate_on_submit():
corpus = Corpus(creator=current_user,
description=form.description.data,
title=form.title.data)
corpus = Corpus(
creator=current_user,
description=form.description.data,
title=form.title.data
)
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
os.makedirs(corpus.path)
except OSError:
current_app.logger.error(
'Make dir {} led to an OSError!'.format(corpus.path)
)
except OSError as e:
current_app.logger.error(f'Could not add corpus: {e}')
db.session.rollback()
flash('Internal Server Error', 'error')
abort(500)
else:
db.session.commit()
@ -49,22 +46,23 @@ def add_corpus():
@bp.route('/import', methods=['GET', 'POST'])
@login_required
def import_corpus():
abort(503)
form = ImportCorpusForm()
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
corpus = Corpus(creator=current_user,
description=form.description.data,
title=form.title.data)
corpus = Corpus(
creator=current_user,
description=form.description.data,
title=form.title.data
)
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
os.makedirs(corpus.path)
except OSError:
current_app.logger.error(
'Make dir {} led to an OSError!'.format(corpus.path)
)
except OSError as e:
current_app.logger.error(f'Could not import corpus: {e}')
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
@ -128,9 +126,21 @@ def corpus(corpus_id):
corpus_files=corpus_files, title='Corpus')
@bp.route('/<int:corpus_id>/analyse')
@login_required
def analyse_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
return render_template(
'corpora/analyse_corpus.html.j2',
corpus=corpus,
title=f'Analyse Corpus {corpus.title}'
)
@bp.route('/<int:corpus_id>/download')
@login_required
def download_corpus(corpus_id):
abort(503)
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
@ -142,31 +152,6 @@ def download_corpus(corpus_id):
)
@bp.route('/<int:corpus_id>/analyse')
@login_required
def analyse_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
display_options_form = DisplayOptionsForm(
prefix='display-options-form',
result_context=request.args.get('context', 20),
results_per_page=request.args.get('results_per_page', 30)
)
query_form = QueryForm(prefix='query-form',
query=request.args.get('query'))
query_download_form = QueryDownloadForm(prefix='query-download-form')
inspect_display_options_form = InspectDisplayOptionsForm(
prefix='inspect-display-options-form')
return render_template(
'corpora/analyse_corpus.html.j2',
corpus=corpus,
display_options_form=display_options_form,
inspect_display_options_form=inspect_display_options_form,
query_form=query_form,
query_download_form=query_download_form,
title='Corpus analysis'
)
@bp.route('/<int:corpus_id>/delete')
@login_required
def delete_corpus(corpus_id):
@ -190,20 +175,22 @@ def add_corpus_file(corpus_id):
return make_response(form.errors, 400)
# Save the file
form.file.data.save(os.path.join(corpus.path, form.file.data.filename))
corpus_file = CorpusFile(address=form.address.data,
author=form.author.data,
booktitle=form.booktitle.data,
chapter=form.chapter.data,
corpus=corpus,
editor=form.editor.data,
filename=form.file.data.filename,
institution=form.institution.data,
journal=form.journal.data,
pages=form.pages.data,
publisher=form.publisher.data,
publishing_year=form.publishing_year.data,
school=form.school.data,
title=form.title.data)
corpus_file = CorpusFile(
address=form.address.data,
author=form.author.data,
booktitle=form.booktitle.data,
chapter=form.chapter.data,
corpus=corpus,
editor=form.editor.data,
filename=form.file.data.filename,
institution=form.institution.data,
journal=form.journal.data,
pages=form.pages.data,
publisher=form.publisher.data,
publishing_year=form.publishing_year.data,
school=form.school.data,
title=form.title.data
)
db.session.add(corpus_file)
corpus.status = 'unprepared'
db.session.commit()
@ -298,122 +285,3 @@ def prepare_corpus(corpus_id):
else:
flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error') # noqa
return redirect(url_for('.corpus', corpus_id=corpus_id))
# Following are view functions to add, view etc. exported results.
@bp.route('/result/add', methods=['GET', 'POST'])
@login_required
def add_query_result():
'''
View to import a result as a json file.
'''
form = AddQueryResultForm(prefix='add-query-result-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
query_result = QueryResult(creator=current_user,
description=form.description.data,
filename=form.file.data.filename,
title=form.title.data)
db.session.add(query_result)
db.session.flush()
db.session.refresh(query_result)
try:
os.makedirs(os.path.dirname(query_result.path))
except OSError:
current_app.logger.error(
'Make dir {} led to an OSError!'.format(query_result.path)
)
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
{'redirect_url': url_for('.add_query_result')}, 500)
# save the uploaded file
form.file.data.save(query_result.path)
# parse json from file
with open(query_result.path, 'r') as file:
query_result_file_content = json.load(file)
# parse json schema
# with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa
# schema = json.load(file)
# try:
# # validate imported json file
# validate(instance=query_result_file_content, schema=schema)
# except Exception:
# tasks.delete_query_result(query_result.id)
# flash('Uploaded file is invalid', 'result')
# return make_response(
# {'redirect_url': url_for('.add_query_result')}, 201)
query_result_file_content.pop('matches')
query_result_file_content.pop('cpos_lookup')
query_result.query_metadata = query_result_file_content
db.session.commit()
flash('Query result added!', 'result')
return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
return render_template('corpora/query_results/add_query_result.html.j2',
form=form, title='Add query result')
@bp.route('/result/<int:query_result_id>')
@login_required
def query_result(query_result_id):
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
return render_template('corpora/query_results/query_result.html.j2',
query_result=query_result, title='Query result')
@bp.route('/result/<int:query_result_id>/inspect')
@login_required
def inspect_query_result(query_result_id):
'''
View to inspect imported result file in a corpus analysis like interface
'''
query_result = QueryResult.query.get_or_404(query_result_id)
query_metadata = query_result.query_metadata
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
display_options_form = DisplayOptionsForm(
prefix='display-options-form',
results_per_page=request.args.get('results_per_page', 30),
result_context=request.args.get('context', 20)
)
inspect_display_options_form = InspectDisplayOptionsForm(
prefix='inspect-display-options-form'
)
with open(query_result.path, 'r') as query_result_file:
query_result_file_content = json.load(query_result_file)
return render_template('corpora/query_results/inspect.html.j2',
query_result=query_result,
display_options_form=display_options_form,
inspect_display_options_form=inspect_display_options_form, # noqa
query_result_file_content=query_result_file_content,
query_metadata=query_metadata,
title='Inspect query result')
@bp.route('/result/<int:query_result_id>/delete')
@login_required
def delete_query_result(query_result_id):
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
tasks.delete_query_result(query_result_id)
return redirect(url_for('services.service', service="corpus_analysis"))
@bp.route('/result/<int:query_result_id>/download')
@login_required
def download_query_result(query_result_id):
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
return send_from_directory(as_attachment=True,
directory=os.path.dirname(query_result.path),
filename=query_result.filename)