mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-12 00:50:40 +00:00
Big Corpus analysis update
This commit is contained in:
@ -2,4 +2,4 @@ from flask import Blueprint
|
||||
|
||||
|
||||
bp = Blueprint('corpora', __name__)
|
||||
from . import events, routes # noqa
|
||||
from . import cqi_over_socketio, routes # noqa
|
||||
|
108
app/corpora/cqi_over_socketio/__init__.py
Normal file
108
app/corpora/cqi_over_socketio/__init__.py
Normal file
@ -0,0 +1,108 @@
|
||||
from app import db, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus
|
||||
from flask import session
|
||||
from flask_login import current_user
|
||||
from flask_socketio import ConnectionRefusedError
|
||||
from threading import Lock
|
||||
import cqi
|
||||
|
||||
|
||||
'''
|
||||
This package tunnels the Corpus Query interface (CQi) protocol through
|
||||
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
|
||||
|
||||
This module only handles the SIO connect/disconnect, which handles the setup
|
||||
and teardown of necessary ressources for later use. Each CQi function has a
|
||||
corresponding SIO event. The event handlers are spread across the different
|
||||
modules within this package.
|
||||
|
||||
Basic concept:
|
||||
1. A client connects to the SIO namespace and provides the id of a corpus to be
|
||||
analysed.
|
||||
1.1 The analysis session counter of the corpus is incremented.
|
||||
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
|
||||
1.3 Wait until the CQP server is running.
|
||||
1.4 Connect the CQiClient to the server.
|
||||
1.5 Save the CQiClient and the Lock in the session for subsequential use.
|
||||
2. A client emits an event and may provide a single json object with necessary
|
||||
arguments for the targeted CQi function.
|
||||
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
|
||||
- The event handler function defines all arguments. Hence the client
|
||||
is sent as a single json object, the decorator decomposes it to fit
|
||||
the functions signature. This also includes type checking and proper
|
||||
use of the lock (acquire/release) mechanism.
|
||||
4. Wait for more events
|
||||
5. The client disconnects from the SIO namespace
|
||||
1.1 The analysis session counter of the corpus is decremented.
|
||||
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
||||
'''
|
||||
|
||||
|
||||
NAMESPACE = '/corpora/corpus/corpus_analysis'
|
||||
|
||||
|
||||
# Import all CQi over Socket.IO event handlers
|
||||
from .cqi_corpora_corpus_subcorpora import * # noqa
|
||||
from .cqi_corpora_corpus_structural_attributes import * # noqa
|
||||
from .cqi_corpora_corpus_positional_attributes import * # noqa
|
||||
from .cqi_corpora_corpus_alignment_attributes import * # noqa
|
||||
from .cqi_corpora_corpus import * # noqa
|
||||
from .cqi_corpora import * # noqa
|
||||
from .cqi import * # noqa
|
||||
|
||||
|
||||
@socketio.on('connect', namespace=NAMESPACE)
|
||||
@socketio_login_required
|
||||
def connect(auth):
|
||||
# the auth variable is used in a hacky way. It contains the corpus id for
|
||||
# which a corpus analysis session should be started.
|
||||
corpus_id = auth['corpus_id']
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
# return {'code': 404, 'msg': 'Not Found'}
|
||||
raise ConnectionRefusedError('Not Found')
|
||||
if not (corpus.creator == current_user or current_user.is_administrator()):
|
||||
# return {'code': 403, 'msg': 'Forbidden'}
|
||||
raise ConnectionRefusedError('Forbidden')
|
||||
if corpus.status not in ['prepared', 'start analysis', 'analysing', 'stop analysis']:
|
||||
# return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
raise ConnectionRefusedError('Failed Dependency')
|
||||
if corpus.num_analysis_sessions is None:
|
||||
corpus.num_analysis_sessions = 0
|
||||
db.session.commit()
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||
db.session.commit()
|
||||
retry_counter = 20
|
||||
while corpus.status != 'analysing':
|
||||
if retry_counter == 0:
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
return {'code': 408, 'msg': 'Request Timeout'}
|
||||
socketio.sleep(3)
|
||||
retry_counter -= 1
|
||||
db.session.refresh(corpus)
|
||||
cqi_client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
|
||||
session['d'] = {
|
||||
'corpus_id': corpus_id,
|
||||
'cqi_client': cqi_client,
|
||||
'cqi_client_lock': Lock(),
|
||||
}
|
||||
# return {'code': 200, 'msg': 'OK'}
|
||||
|
||||
|
||||
@socketio.on('disconnect', namespace=NAMESPACE)
|
||||
def disconnect():
|
||||
session['d']['cqi_client_lock'].acquire()
|
||||
try:
|
||||
session['d']['cqi_client'].disconnect()
|
||||
except cqi.errors.CQiException:
|
||||
pass
|
||||
except BrokenPipeError:
|
||||
pass
|
||||
session['d']['cqi_client_lock'].release()
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
session.pop('d')
|
||||
# return {'code': 200, 'msg': 'OK'}
|
43
app/corpora/cqi_over_socketio/cqi.py
Normal file
43
app/corpora/cqi_over_socketio/cqi.py
Normal file
@ -0,0 +1,43 @@
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from socket import gaierror
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
import cqi
|
||||
|
||||
|
||||
@socketio.on('cqi.connect', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_connect(cqi_client: cqi.CQiClient):
|
||||
try:
|
||||
cqi_status = cqi_client.connect()
|
||||
except gaierror as e:
|
||||
return {
|
||||
'code': 500,
|
||||
'msg': 'Internal Server Error',
|
||||
'payload': {'code': e.args[0], 'desc': e.args[1]}
|
||||
}
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.disconnect', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_disconnect(cqi_client: cqi.CQiClient):
|
||||
cqi_status = cqi_client.disconnect()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.ping', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_ping(cqi_client: cqi.CQiClient):
|
||||
cqi_status = cqi_client.ping()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
22
app/corpora/cqi_over_socketio/cqi_corpora.py
Normal file
22
app/corpora/cqi_over_socketio/cqi_corpora.py
Normal file
@ -0,0 +1,22 @@
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
import cqi
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = {**cqi_corpus.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_list(cqi_client: cqi.CQiClient):
|
||||
payload = [{**x.attrs} for x in cqi_client.corpora.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
85
app/corpora/cqi_over_socketio/cqi_corpora_corpus.py
Normal file
85
app/corpora/cqi_over_socketio/cqi_corpora_corpus.py
Normal file
@ -0,0 +1,85 @@
|
||||
from app import db, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus
|
||||
from flask import session
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio, lookups_by_cpos
|
||||
import cqi
|
||||
import math
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_status = cqi_corpus.drop()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_status = cqi_corpus.query(subcorpus_name, query)
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
###############################################################################
|
||||
# nopaque specific CQi extensions #
|
||||
###############################################################################
|
||||
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
corpus.num_tokens = cqi_client.corpora.get('CORPUS').attrs['size']
|
||||
db.session.commit()
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_corpus.attrs['size'] > 0
|
||||
and page > math.ceil(cqi_corpus.attrs['size'] / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
first_cpos = (page - 1) * per_page
|
||||
last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page)
|
||||
cpos_list = [*range(first_cpos, last_cpos)]
|
||||
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = [cpos_list]
|
||||
# the lookups for the items
|
||||
payload['lookups'] = lookups
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_corpus.attrs['size']
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@ -0,0 +1,24 @@
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
import cqi
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
|
||||
payload = {**cqi_alignment_attribute.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@ -0,0 +1,24 @@
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
import cqi
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
|
||||
payload = {**cqi_positional_attribute.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@ -0,0 +1,24 @@
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
import cqi
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
|
||||
payload = {**cqi_structural_attribute.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
123
app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py
Normal file
123
app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py
Normal file
@ -0,0 +1,123 @@
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus
|
||||
from flask import session
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio, export_subcorpus
|
||||
import cqi
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
payload = {**cqi_subcorpus.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_status = cqi_subcorpus.drop()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient):
|
||||
return {'code': 501, 'msg': 'Not Implemented'}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient):
|
||||
return {'code': 501, 'msg': 'Not Implemented'}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient):
|
||||
return {'code': 501, 'msg': 'Not Implemented'}
|
||||
|
||||
|
||||
###############################################################################
|
||||
# nopaque specific CQi extensions #
|
||||
###############################################################################
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_subcorpus.attrs['size'] > 0
|
||||
and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
offset = (page - 1) * per_page
|
||||
cutoff = per_page
|
||||
cqi_results_export = export_subcorpus(
|
||||
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = cqi_results_export.pop('matches')
|
||||
# the lookups for the items
|
||||
payload['lookups'] = cqi_results_export
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_subcorpus.attrs['size']
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
file_path = os.path.join(corpus.path, f'{subcorpus_name}.json')
|
||||
with open(file_path, 'w') as file:
|
||||
json.dump(cqi_subcorpus_export, file)
|
||||
return {'code': 200, 'msg': 'OK'}
|
129
app/corpora/cqi_over_socketio/utils.py
Normal file
129
app/corpora/cqi_over_socketio/utils.py
Normal file
@ -0,0 +1,129 @@
|
||||
from flask import session
|
||||
from functools import wraps
|
||||
from inspect import signature
|
||||
import cqi
|
||||
|
||||
|
||||
def cqi_over_socketio(f):
|
||||
@wraps(f)
|
||||
def wrapped(*args):
|
||||
if 'd' not in session:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
f_args = {}
|
||||
# Check for missing args and if all provided args are of the right type
|
||||
for param in signature(f).parameters.values():
|
||||
if param.annotation == cqi.CQiClient:
|
||||
f_args[param.name] = session['d']['cqi_client']
|
||||
continue
|
||||
if param.default is param.empty:
|
||||
# args
|
||||
if param.name not in args[0]:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
arg = args[0][param.name]
|
||||
if type(arg) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
f_args[param.name] = arg
|
||||
else:
|
||||
# kwargs
|
||||
if param.name not in args[0]:
|
||||
continue
|
||||
arg = args[0][param.name]
|
||||
if type(arg) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
f_args[param.name] = arg
|
||||
session['d']['cqi_client_lock'].acquire()
|
||||
try:
|
||||
return_value = f(**f_args)
|
||||
except BrokenPipeError:
|
||||
pass
|
||||
except cqi.errors.CQiException as e:
|
||||
return_value = {
|
||||
'code': 500,
|
||||
'msg': 'Internal Server Error',
|
||||
'payload': {
|
||||
'code': e.code,
|
||||
'desc': e.description,
|
||||
'msg': e.name
|
||||
}
|
||||
}
|
||||
finally:
|
||||
session['d']['cqi_client_lock'].release()
|
||||
return return_value
|
||||
return wrapped
|
||||
|
||||
|
||||
def lookups_by_cpos(corpus, cpos_list):
|
||||
lookups = {}
|
||||
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
||||
for attr in corpus.positional_attributes.list():
|
||||
cpos_attr_values = attr.values_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
|
||||
cpos_attr_values[i]
|
||||
for attr in corpus.structural_attributes.list():
|
||||
# We only want to iterate over non subattributes, identifiable by
|
||||
# attr.attrs['has_values']==False
|
||||
if attr.attrs['has_values']:
|
||||
continue
|
||||
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
if cpos_attr_ids[i] == -1:
|
||||
continue
|
||||
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
|
||||
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
||||
if not occured_attr_ids:
|
||||
continue
|
||||
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
||||
if not subattrs:
|
||||
continue
|
||||
lookup_name = f'{attr.attrs["name"]}_lookup'
|
||||
lookups[lookup_name] = {}
|
||||
for attr_id in occured_attr_ids:
|
||||
lookups[lookup_name][attr_id] = {}
|
||||
for subattr in subattrs:
|
||||
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
|
||||
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
||||
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
||||
return lookups
|
||||
|
||||
|
||||
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
|
||||
if subcorpus.attrs['size'] == 0:
|
||||
return {"matches": []}
|
||||
first_match = max(0, offset)
|
||||
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
|
||||
match_boundaries = zip(
|
||||
subcorpus.dump(
|
||||
subcorpus.attrs['fields']['match'], first_match, last_match),
|
||||
subcorpus.dump(
|
||||
subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
match_num = offset + 1
|
||||
for match_start, match_end in match_boundaries:
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - 1 - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if (match_end == (subcorpus.collection.corpus.attrs['size'] - 1)
|
||||
or context == 0):
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(match_end + 1 + context,
|
||||
subcorpus.collection.corpus.attrs['size'] - 1)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
match_num += 1
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
@ -1,304 +0,0 @@
|
||||
from datetime import datetime
|
||||
from flask import current_app, request
|
||||
from flask_login import current_user
|
||||
from socket import gaierror
|
||||
from werkzeug.utils import secure_filename
|
||||
from .. import db, socketio
|
||||
from ..decorators import socketio_login_required
|
||||
from ..events.socketio import sessions as socketio_sessions
|
||||
from ..models import Corpus
|
||||
import cqi
|
||||
import math
|
||||
import os
|
||||
import shutil
|
||||
|
||||
|
||||
'''
|
||||
' A dictionary containing lists of, with corpus ids associated, Socket.IO
|
||||
' session ids (sid). {<corpus_id>: [<sid>, ...], ...}
|
||||
'''
|
||||
corpus_analysis_sessions = {}
|
||||
'''
|
||||
' A dictionary containing Socket.IO session id - CQi client pairs.
|
||||
' {<sid>: CQiClient, ...}
|
||||
'''
|
||||
corpus_analysis_clients = {}
|
||||
|
||||
|
||||
@socketio.on('corpus_analysis_init')
|
||||
@socketio_login_required
|
||||
def init_corpus_analysis(corpus_id):
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
response = {'code': 404, 'desc': None, 'msg': 'Not Found'}
|
||||
socketio.emit('corpus_analysis_init', response, room=request.sid)
|
||||
return
|
||||
if not (corpus.creator == current_user or current_user.is_administrator()):
|
||||
response = {'code': 403, 'desc': None, 'msg': 'Forbidden'}
|
||||
socketio.emit('corpus_analysis_init', response, room=request.sid)
|
||||
return
|
||||
if corpus.status not in ['prepared', 'start analysis', 'analysing']:
|
||||
response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
|
||||
socketio.emit('corpus_analysis_init', response, room=request.sid)
|
||||
return
|
||||
if corpus.status == 'prepared':
|
||||
corpus.status = 'start analysis'
|
||||
db.session.commit()
|
||||
socketio.start_background_task(corpus_analysis_session_handler,
|
||||
current_app._get_current_object(),
|
||||
corpus_id, current_user.id, request.sid)
|
||||
|
||||
|
||||
def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
|
||||
with app.app_context():
|
||||
''' Setup analysis session '''
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
retry_counter = 15
|
||||
while corpus.status != 'analysing':
|
||||
db.session.refresh(corpus)
|
||||
retry_counter -= 1
|
||||
if retry_counter == 0:
|
||||
response = {'code': 408, 'desc': None, 'msg': 'Request Timeout'} # noqa
|
||||
socketio.emit('corpus_analysis_init', response, room=session_id) # noqa
|
||||
corpus.status = 'stop analysis'
|
||||
db.session.commit()
|
||||
return
|
||||
socketio.sleep(3)
|
||||
client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
|
||||
try:
|
||||
connect_status = client.connect()
|
||||
payload = {'code': connect_status, 'msg': cqi.api.specification.lookup[connect_status]} # noqa
|
||||
except cqi.errors.CQiException as e:
|
||||
handle_cqi_exception('corpus_analysis_init', e, session_id)
|
||||
corpus.status = 'stop analysis'
|
||||
db.session.commit()
|
||||
return
|
||||
except gaierror:
|
||||
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error'} # noqa
|
||||
socketio.emit('corpus_analysis_init', response, room=session_id)
|
||||
corpus.status = 'stop analysis'
|
||||
db.session.commit()
|
||||
return
|
||||
corpus_analysis_clients[session_id] = client
|
||||
if corpus_id in corpus_analysis_sessions:
|
||||
corpus_analysis_sessions[corpus_id].append(session_id)
|
||||
else:
|
||||
corpus_analysis_sessions[corpus_id] = [session_id]
|
||||
client.status = 'ready'
|
||||
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
|
||||
socketio.emit('corpus_analysis_init', response, room=session_id)
|
||||
''' Observe analysis session '''
|
||||
while session_id in socketio_sessions:
|
||||
socketio.sleep(3)
|
||||
''' Teardown analysis session '''
|
||||
if client.status == 'running':
|
||||
client.status = 'abort'
|
||||
while client.status != 'ready':
|
||||
socketio.sleep(0.3)
|
||||
try:
|
||||
client.disconnect()
|
||||
except cqi.errors.CQiException:
|
||||
pass
|
||||
corpus_analysis_clients.pop(session_id, None)
|
||||
corpus_analysis_sessions[corpus_id].remove(session_id)
|
||||
if not corpus_analysis_sessions[corpus_id]:
|
||||
corpus_analysis_sessions.pop(corpus_id, None)
|
||||
corpus.status = 'stop analysis'
|
||||
db.session.commit()
|
||||
|
||||
|
||||
@socketio.on('corpus_analysis_meta_data')
|
||||
@socketio_login_required
|
||||
def corpus_analysis_get_meta_data(corpus_id):
|
||||
# get meta data from db
|
||||
db_corpus = Corpus.query.get(corpus_id)
|
||||
metadata = {}
|
||||
metadata['corpus_name'] = db_corpus.title
|
||||
metadata['corpus_description'] = db_corpus.description
|
||||
metadata['corpus_creation_date'] = db_corpus.creation_date.isoformat() + 'Z'
|
||||
metadata['corpus_last_edited_date'] = \
|
||||
db_corpus.last_edited_date.isoformat() + 'Z'
|
||||
client = corpus_analysis_clients.get(request.sid)
|
||||
if client is None:
|
||||
response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
|
||||
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
|
||||
return
|
||||
# check if client is busy or not
|
||||
if client.status == 'running':
|
||||
client.status = 'abort'
|
||||
while client.status != 'ready':
|
||||
socketio.sleep(0.3)
|
||||
# get meta data from corpus in cqp server
|
||||
client.status = 'running'
|
||||
try:
|
||||
cwb_corpus = client.corpora.get('CORPUS')
|
||||
metadata['corpus_properties'] = cwb_corpus.attrs['properties']
|
||||
metadata['corpus_size_tokens'] = cwb_corpus.attrs['size']
|
||||
|
||||
text_attr = cwb_corpus.structural_attributes.get('text')
|
||||
struct_attrs = cwb_corpus.structural_attributes.list(
|
||||
filters={'part_of': text_attr})
|
||||
text_ids = range(0, (text_attr.attrs['size']))
|
||||
texts_metadata = {}
|
||||
for text_id in text_ids:
|
||||
texts_metadata[text_id] = {}
|
||||
for struct_attr in struct_attrs:
|
||||
texts_metadata[text_id][struct_attr.attrs['name'][(len(text_attr.attrs['name']) + 1):]] = struct_attr.values_by_ids(list(range(struct_attr.attrs['size'])))[text_id] # noqa
|
||||
metadata['corpus_all_texts'] = texts_metadata
|
||||
metadata['corpus_analysis_date'] = datetime.utcnow().isoformat() + 'Z'
|
||||
metadata['corpus_cqi_py_protocol_version'] = client.api.version
|
||||
metadata['corpus_cqi_py_package_version'] = cqi.__version__
|
||||
# TODO: make this dynamically
|
||||
metadata['corpus_cqpserver_version'] = 'CQPserver v3.4.22'
|
||||
|
||||
# write some metadata to the db
|
||||
db_corpus.current_nr_of_tokens = metadata['corpus_size_tokens']
|
||||
db.session.commit()
|
||||
|
||||
# emit data
|
||||
payload = metadata
|
||||
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
|
||||
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
|
||||
except cqi.errors.CQiException as e:
|
||||
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
|
||||
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
|
||||
'payload': payload}
|
||||
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
|
||||
client.status = 'ready'
|
||||
|
||||
|
||||
@socketio.on('corpus_analysis_query')
|
||||
@socketio_login_required
|
||||
def corpus_analysis_query(query):
|
||||
client = corpus_analysis_clients.get(request.sid)
|
||||
if client is None:
|
||||
response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
|
||||
socketio.emit('corpus_analysis_query', response, room=request.sid)
|
||||
return
|
||||
if client.status == 'running':
|
||||
client.status = 'abort'
|
||||
while client.status != 'ready':
|
||||
socketio.sleep(0.3)
|
||||
client.status = 'running'
|
||||
try:
|
||||
corpus = client.corpora.get('CORPUS')
|
||||
query_status = corpus.query(query)
|
||||
results = corpus.subcorpora.get('Results')
|
||||
except cqi.errors.CQiException as e:
|
||||
client.status = 'ready'
|
||||
handle_cqi_exception('corpus_analysis_query', e, request.sid)
|
||||
return
|
||||
payload = {'status': query_status,
|
||||
'msg': cqi.api.specification.lookup[query_status],
|
||||
'match_count': results.attrs['size']}
|
||||
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
|
||||
socketio.emit('corpus_analysis_query', response, room=request.sid)
|
||||
chunk_size = 100
|
||||
chunk_start = 0
|
||||
context = 50
|
||||
progress = 0
|
||||
while chunk_start <= results.attrs['size']:
|
||||
if client.status == 'abort':
|
||||
break
|
||||
try:
|
||||
chunk = results.export(context=context, cutoff=chunk_size, offset=chunk_start) # noqa
|
||||
except cqi.errors.CQiException as e:
|
||||
handle_cqi_exception('corpus_analysis_query', e, request.sid)
|
||||
break
|
||||
if (results.attrs['size'] == 0):
|
||||
progress = 100
|
||||
else:
|
||||
progress = ((chunk_start + chunk_size) / results.attrs['size']) * 100 # noqa
|
||||
progress = min(100, int(math.ceil(progress)))
|
||||
payload = {'chunk': chunk, 'progress': progress}
|
||||
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
|
||||
socketio.emit('corpus_analysis_query_results', response, room=request.sid) # noqa
|
||||
chunk_start += chunk_size
|
||||
client.status = 'ready'
|
||||
|
||||
|
||||
@socketio.on('corpus_analysis_get_match_with_full_context')
|
||||
@socketio_login_required
|
||||
def corpus_analysis_get_match_with_full_context(payload):
|
||||
type = payload['type']
|
||||
data_indexes = payload['data_indexes']
|
||||
first_cpos = payload['first_cpos']
|
||||
last_cpos = payload['last_cpos']
|
||||
client = corpus_analysis_clients.get(request.sid)
|
||||
if client is None:
|
||||
response = {'code': 424, 'desc': 'No client found for this session',
|
||||
'msg': 'Failed Dependency'}
|
||||
socketio.emit('corpus_analysis_get_match_with_full_context', response,
|
||||
room=request.sid)
|
||||
return
|
||||
if client.status == 'running':
|
||||
client.status = 'abort'
|
||||
while client.status != 'ready':
|
||||
socketio.sleep(0.3)
|
||||
client.status = 'running'
|
||||
try:
|
||||
corpus = client.corpora.get('CORPUS')
|
||||
s = corpus.structural_attributes.get('s')
|
||||
except cqi.errors.CQiException as e:
|
||||
handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa
|
||||
return
|
||||
i = 0
|
||||
# Send data one match at a time.
|
||||
for index, f_cpos, l_cpos in zip(data_indexes, first_cpos, last_cpos):
|
||||
if client.status == 'abort':
|
||||
break
|
||||
i += 1
|
||||
matches = []
|
||||
cpos_lookup = text_lookup = {}
|
||||
try:
|
||||
tmp = s.export(f_cpos, l_cpos, context=10)
|
||||
except cqi.errors.CQiException as e:
|
||||
handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa
|
||||
break
|
||||
matches.append(tmp['matches'][0])
|
||||
cpos_lookup.update(tmp['cpos_lookup'])
|
||||
text_lookup.update(tmp['text_lookup'])
|
||||
progress = i / len(data_indexes) * 100
|
||||
payload = {'matches': matches, 'progress': progress,
|
||||
'cpos_lookup': cpos_lookup, 'text_lookup': text_lookup}
|
||||
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload,
|
||||
'type': type, 'data_indexes': data_indexes}
|
||||
socketio.emit('corpus_analysis_get_match_with_full_context',
|
||||
response, room=request.sid)
|
||||
client.status = 'ready'
|
||||
|
||||
|
||||
@socketio.on('export_corpus')
|
||||
@socketio_login_required
|
||||
def export_corpus(corpus_id):
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
response = {'code': 404, 'msg': 'Not found'}
|
||||
socketio.emit('export_corpus', response, room=request.sid)
|
||||
return
|
||||
if corpus.status != 'prepared':
|
||||
response = {'code': 412, 'msg': 'Precondition Failed'}
|
||||
socketio.emit('export_corpus', response, room=request.sid)
|
||||
return
|
||||
# delete old corpus archive if it exists/has been build before
|
||||
if corpus.archive_file is not None and os.path.isfile(corpus.archive_file):
|
||||
os.remove(corpus.archive_file)
|
||||
archive_file_base_name = '[corpus]_' + secure_filename(corpus.title)
|
||||
corpus.archive_file = archive_file_base_name + '.zip'
|
||||
db.session.commit()
|
||||
shutil.make_archive(
|
||||
os.path.join(corpus.creator.path, 'corpora', archive_file_base_name),
|
||||
'zip',
|
||||
corpus.path
|
||||
)
|
||||
socketio.emit('export_corpus_{}'.format(corpus.id), room=request.sid)
|
||||
|
||||
|
||||
def handle_cqi_exception(event, exception, room):
|
||||
response = {'code': 500,
|
||||
'desc': None,
|
||||
'msg': 'Internal Server Error',
|
||||
'payload': {'code': exception.code,
|
||||
'desc': exception.description,
|
||||
'msg': exception.name}}
|
||||
socketio.emit(event, response, room=room)
|
@ -1,8 +1,8 @@
|
||||
from flask_wtf import FlaskForm
|
||||
from werkzeug.utils import secure_filename
|
||||
from wtforms import (BooleanField, FileField, StringField, SubmitField,
|
||||
ValidationError, IntegerField, SelectField)
|
||||
from wtforms.validators import DataRequired, Length, NumberRange
|
||||
from wtforms import (FileField, StringField, SubmitField,
|
||||
ValidationError, IntegerField)
|
||||
from wtforms.validators import DataRequired, Length
|
||||
|
||||
|
||||
class AddCorpusFileForm(FlaskForm):
|
||||
@ -91,76 +91,3 @@ class ImportCorpusForm(FlaskForm):
|
||||
raise ValidationError('File does not have an approved extension: '
|
||||
'.zip')
|
||||
field.data.filename = secure_filename(field.data.filename)
|
||||
|
||||
|
||||
class QueryForm(FlaskForm):
|
||||
'''
|
||||
Form to submit a query to the server which is executed via cqi-py.
|
||||
'''
|
||||
query = StringField('Query',
|
||||
validators=[DataRequired(), Length(1, 1024)])
|
||||
submit = SubmitField('Search')
|
||||
|
||||
|
||||
class DisplayOptionsForm(FlaskForm):
|
||||
'''
|
||||
Form to alter how the matches are represented to the user by the user.
|
||||
'''
|
||||
expert_mode = BooleanField('Expert mode')
|
||||
result_context = SelectField('Result context',
|
||||
choices=[('', 'Choose your option'),
|
||||
('10', '10'),
|
||||
('20', '20'),
|
||||
('30', '30'),
|
||||
('40', '40'),
|
||||
('50', '50')])
|
||||
results_per_page = SelectField('Results per page',
|
||||
choices=[('', 'Choose your option'),
|
||||
('10', '10'),
|
||||
('20', '20'),
|
||||
('30', '30'),
|
||||
('40', '40'),
|
||||
('50', '50')])
|
||||
|
||||
|
||||
class InspectDisplayOptionsForm(FlaskForm):
|
||||
'''
|
||||
Form for the inspect modal where the user can interact with how the current
|
||||
match is being represented to him.
|
||||
'''
|
||||
expert_mode_inspect = BooleanField('Expert mode')
|
||||
highlight_sentences = BooleanField('Split sentences')
|
||||
context_sentences = IntegerField('Context sentences',
|
||||
validators=[NumberRange(min=0, max=10)],
|
||||
default=3)
|
||||
|
||||
|
||||
class QueryDownloadForm(FlaskForm):
|
||||
'''
|
||||
Form to choose in what file format the analysis results are being
|
||||
downloaded. WIP.
|
||||
'''
|
||||
file_type = SelectField('File type',
|
||||
choices=[('', 'Choose file type'),
|
||||
('csv', 'csv'),
|
||||
('json', 'json'),
|
||||
('excel', 'excel'),
|
||||
('html', 'html-table')],
|
||||
validators=[DataRequired()])
|
||||
|
||||
|
||||
class AddQueryResultForm(FlaskForm):
|
||||
'''
|
||||
Form used to import one result json file.
|
||||
'''
|
||||
description = StringField('Description',
|
||||
validators=[DataRequired(), Length(1, 255)])
|
||||
file = FileField('File', validators=[DataRequired()])
|
||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||
submit = SubmitField()
|
||||
|
||||
def validate_file(self, field):
|
||||
if not field.data.filename.lower().endswith('.json'):
|
||||
raise ValidationError('File does not have an approved extension: '
|
||||
'.json')
|
||||
field.data.filename = secure_filename(field.data.filename)
|
||||
|
21
app/corpora/query_results_forms.py
Normal file
21
app/corpora/query_results_forms.py
Normal file
@ -0,0 +1,21 @@
|
||||
from flask_wtf import FlaskForm
|
||||
from werkzeug.utils import secure_filename
|
||||
from wtforms import FileField, StringField, SubmitField, ValidationError
|
||||
from wtforms.validators import DataRequired, Length
|
||||
|
||||
|
||||
class AddQueryResultForm(FlaskForm):
|
||||
'''
|
||||
Form used to import one result json file.
|
||||
'''
|
||||
description = StringField('Description',
|
||||
validators=[DataRequired(), Length(1, 255)])
|
||||
file = FileField('File', validators=[DataRequired()])
|
||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||
submit = SubmitField()
|
||||
|
||||
def validate_file(self, field):
|
||||
if not field.data.filename.lower().endswith('.json'):
|
||||
raise ValidationError('File does not have an approved extension: '
|
||||
'.json')
|
||||
field.data.filename = secure_filename(field.data.filename)
|
134
app/corpora/query_results_routes.py
Normal file
134
app/corpora/query_results_routes.py
Normal file
@ -0,0 +1,134 @@
|
||||
from flask import (abort, current_app, flash, make_response, redirect, request,
|
||||
render_template, url_for, send_from_directory)
|
||||
from flask_login import current_user, login_required
|
||||
from . import bp
|
||||
from . import tasks
|
||||
from .forms import (AddQueryResultForm, DisplayOptionsForm,
|
||||
InspectDisplayOptionsForm)
|
||||
from .. import db
|
||||
from ..models import QueryResult
|
||||
import json
|
||||
import os
|
||||
|
||||
|
||||
@bp.route('/result/add', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def add_query_result():
|
||||
'''
|
||||
View to import a result as a json file.
|
||||
'''
|
||||
abort(503)
|
||||
form = AddQueryResultForm(prefix='add-query-result-form')
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
query_result = QueryResult(creator=current_user,
|
||||
description=form.description.data,
|
||||
filename=form.file.data.filename,
|
||||
title=form.title.data)
|
||||
db.session.add(query_result)
|
||||
db.session.flush()
|
||||
db.session.refresh(query_result)
|
||||
try:
|
||||
os.makedirs(os.path.dirname(query_result.path))
|
||||
except OSError:
|
||||
current_app.logger.error(
|
||||
'Make dir {} led to an OSError!'.format(query_result.path)
|
||||
)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response(
|
||||
{'redirect_url': url_for('.add_query_result')}, 500)
|
||||
# save the uploaded file
|
||||
form.file.data.save(query_result.path)
|
||||
# parse json from file
|
||||
with open(query_result.path, 'r') as file:
|
||||
query_result_file_content = json.load(file)
|
||||
# parse json schema
|
||||
# with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa
|
||||
# schema = json.load(file)
|
||||
# try:
|
||||
# # validate imported json file
|
||||
# validate(instance=query_result_file_content, schema=schema)
|
||||
# except Exception:
|
||||
# tasks.delete_query_result(query_result.id)
|
||||
# flash('Uploaded file is invalid', 'result')
|
||||
# return make_response(
|
||||
# {'redirect_url': url_for('.add_query_result')}, 201)
|
||||
query_result_file_content.pop('matches')
|
||||
query_result_file_content.pop('cpos_lookup')
|
||||
query_result.query_metadata = query_result_file_content
|
||||
db.session.commit()
|
||||
flash('Query result added!', 'result')
|
||||
return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
|
||||
return render_template('corpora/query_results/add_query_result.html.j2',
|
||||
form=form, title='Add query result')
|
||||
|
||||
|
||||
@bp.route('/result/<int:query_result_id>')
|
||||
@login_required
|
||||
def query_result(query_result_id):
|
||||
abort(503)
|
||||
query_result = QueryResult.query.get_or_404(query_result_id)
|
||||
if not (query_result.creator == current_user
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return render_template('corpora/query_results/query_result.html.j2',
|
||||
query_result=query_result, title='Query result')
|
||||
|
||||
|
||||
@bp.route('/result/<int:query_result_id>/inspect')
|
||||
@login_required
|
||||
def inspect_query_result(query_result_id):
|
||||
'''
|
||||
View to inspect imported result file in a corpus analysis like interface
|
||||
'''
|
||||
abort(503)
|
||||
query_result = QueryResult.query.get_or_404(query_result_id)
|
||||
query_metadata = query_result.query_metadata
|
||||
if not (query_result.creator == current_user
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
display_options_form = DisplayOptionsForm(
|
||||
prefix='display-options-form',
|
||||
results_per_page=request.args.get('results_per_page', 30),
|
||||
result_context=request.args.get('context', 20)
|
||||
)
|
||||
inspect_display_options_form = InspectDisplayOptionsForm(
|
||||
prefix='inspect-display-options-form'
|
||||
)
|
||||
with open(query_result.path, 'r') as query_result_file:
|
||||
query_result_file_content = json.load(query_result_file)
|
||||
return render_template('corpora/query_results/inspect.html.j2',
|
||||
query_result=query_result,
|
||||
display_options_form=display_options_form,
|
||||
inspect_display_options_form=inspect_display_options_form, # noqa
|
||||
query_result_file_content=query_result_file_content,
|
||||
query_metadata=query_metadata,
|
||||
title='Inspect query result')
|
||||
|
||||
|
||||
@bp.route('/result/<int:query_result_id>/delete')
|
||||
@login_required
|
||||
def delete_query_result(query_result_id):
|
||||
abort(503)
|
||||
query_result = QueryResult.query.get_or_404(query_result_id)
|
||||
if not (query_result.creator == current_user
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
|
||||
tasks.delete_query_result(query_result_id)
|
||||
return redirect(url_for('services.service', service="corpus_analysis"))
|
||||
|
||||
|
||||
@bp.route('/result/<int:query_result_id>/download')
|
||||
@login_required
|
||||
def download_query_result(query_result_id):
|
||||
abort(503)
|
||||
query_result = QueryResult.query.get_or_404(query_result_id)
|
||||
if not (query_result.creator == current_user
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return send_from_directory(as_attachment=True,
|
||||
directory=os.path.dirname(query_result.path),
|
||||
filename=query_result.filename)
|
@ -1,16 +1,12 @@
|
||||
from flask import (abort, current_app, flash, make_response, redirect, request,
|
||||
from flask import (abort, current_app, flash, make_response, redirect,
|
||||
render_template, url_for, send_from_directory)
|
||||
from flask_login import current_user, login_required
|
||||
from . import bp
|
||||
from . import tasks
|
||||
from .forms import (AddCorpusFileForm, AddCorpusForm, AddQueryResultForm,
|
||||
EditCorpusFileForm, QueryDownloadForm, QueryForm,
|
||||
DisplayOptionsForm, InspectDisplayOptionsForm,
|
||||
from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm,
|
||||
ImportCorpusForm)
|
||||
from jsonschema import validate
|
||||
from .. import db
|
||||
from ..models import Corpus, CorpusFile, QueryResult
|
||||
import json
|
||||
from ..models import Corpus, CorpusFile
|
||||
import os
|
||||
import shutil
|
||||
import glob
|
||||
@ -22,21 +18,22 @@ from .import_corpus import check_zip_contents
|
||||
@bp.route('/add', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def add_corpus():
|
||||
form = AddCorpusForm()
|
||||
form = AddCorpusForm(prefix='add-corpus-form')
|
||||
if form.validate_on_submit():
|
||||
corpus = Corpus(creator=current_user,
|
||||
description=form.description.data,
|
||||
title=form.title.data)
|
||||
corpus = Corpus(
|
||||
creator=current_user,
|
||||
description=form.description.data,
|
||||
title=form.title.data
|
||||
)
|
||||
db.session.add(corpus)
|
||||
db.session.flush()
|
||||
db.session.refresh(corpus)
|
||||
try:
|
||||
os.makedirs(corpus.path)
|
||||
except OSError:
|
||||
current_app.logger.error(
|
||||
'Make dir {} led to an OSError!'.format(corpus.path)
|
||||
)
|
||||
except OSError as e:
|
||||
current_app.logger.error(f'Could not add corpus: {e}')
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
abort(500)
|
||||
else:
|
||||
db.session.commit()
|
||||
@ -49,22 +46,23 @@ def add_corpus():
|
||||
@bp.route('/import', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def import_corpus():
|
||||
abort(503)
|
||||
form = ImportCorpusForm()
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
corpus = Corpus(creator=current_user,
|
||||
description=form.description.data,
|
||||
title=form.title.data)
|
||||
corpus = Corpus(
|
||||
creator=current_user,
|
||||
description=form.description.data,
|
||||
title=form.title.data
|
||||
)
|
||||
db.session.add(corpus)
|
||||
db.session.flush()
|
||||
db.session.refresh(corpus)
|
||||
try:
|
||||
os.makedirs(corpus.path)
|
||||
except OSError:
|
||||
current_app.logger.error(
|
||||
'Make dir {} led to an OSError!'.format(corpus.path)
|
||||
)
|
||||
except OSError as e:
|
||||
current_app.logger.error(f'Could not import corpus: {e}')
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response(
|
||||
@ -128,9 +126,21 @@ def corpus(corpus_id):
|
||||
corpus_files=corpus_files, title='Corpus')
|
||||
|
||||
|
||||
@bp.route('/<int:corpus_id>/analyse')
|
||||
@login_required
|
||||
def analyse_corpus(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
return render_template(
|
||||
'corpora/analyse_corpus.html.j2',
|
||||
corpus=corpus,
|
||||
title=f'Analyse Corpus {corpus.title}'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<int:corpus_id>/download')
|
||||
@login_required
|
||||
def download_corpus(corpus_id):
|
||||
abort(503)
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if not (corpus.creator == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
@ -142,31 +152,6 @@ def download_corpus(corpus_id):
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<int:corpus_id>/analyse')
|
||||
@login_required
|
||||
def analyse_corpus(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
display_options_form = DisplayOptionsForm(
|
||||
prefix='display-options-form',
|
||||
result_context=request.args.get('context', 20),
|
||||
results_per_page=request.args.get('results_per_page', 30)
|
||||
)
|
||||
query_form = QueryForm(prefix='query-form',
|
||||
query=request.args.get('query'))
|
||||
query_download_form = QueryDownloadForm(prefix='query-download-form')
|
||||
inspect_display_options_form = InspectDisplayOptionsForm(
|
||||
prefix='inspect-display-options-form')
|
||||
return render_template(
|
||||
'corpora/analyse_corpus.html.j2',
|
||||
corpus=corpus,
|
||||
display_options_form=display_options_form,
|
||||
inspect_display_options_form=inspect_display_options_form,
|
||||
query_form=query_form,
|
||||
query_download_form=query_download_form,
|
||||
title='Corpus analysis'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<int:corpus_id>/delete')
|
||||
@login_required
|
||||
def delete_corpus(corpus_id):
|
||||
@ -190,20 +175,22 @@ def add_corpus_file(corpus_id):
|
||||
return make_response(form.errors, 400)
|
||||
# Save the file
|
||||
form.file.data.save(os.path.join(corpus.path, form.file.data.filename))
|
||||
corpus_file = CorpusFile(address=form.address.data,
|
||||
author=form.author.data,
|
||||
booktitle=form.booktitle.data,
|
||||
chapter=form.chapter.data,
|
||||
corpus=corpus,
|
||||
editor=form.editor.data,
|
||||
filename=form.file.data.filename,
|
||||
institution=form.institution.data,
|
||||
journal=form.journal.data,
|
||||
pages=form.pages.data,
|
||||
publisher=form.publisher.data,
|
||||
publishing_year=form.publishing_year.data,
|
||||
school=form.school.data,
|
||||
title=form.title.data)
|
||||
corpus_file = CorpusFile(
|
||||
address=form.address.data,
|
||||
author=form.author.data,
|
||||
booktitle=form.booktitle.data,
|
||||
chapter=form.chapter.data,
|
||||
corpus=corpus,
|
||||
editor=form.editor.data,
|
||||
filename=form.file.data.filename,
|
||||
institution=form.institution.data,
|
||||
journal=form.journal.data,
|
||||
pages=form.pages.data,
|
||||
publisher=form.publisher.data,
|
||||
publishing_year=form.publishing_year.data,
|
||||
school=form.school.data,
|
||||
title=form.title.data
|
||||
)
|
||||
db.session.add(corpus_file)
|
||||
corpus.status = 'unprepared'
|
||||
db.session.commit()
|
||||
@ -298,122 +285,3 @@ def prepare_corpus(corpus_id):
|
||||
else:
|
||||
flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error') # noqa
|
||||
return redirect(url_for('.corpus', corpus_id=corpus_id))
|
||||
|
||||
|
||||
# Following are view functions to add, view etc. exported results.
|
||||
@bp.route('/result/add', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def add_query_result():
|
||||
'''
|
||||
View to import a result as a json file.
|
||||
'''
|
||||
form = AddQueryResultForm(prefix='add-query-result-form')
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
query_result = QueryResult(creator=current_user,
|
||||
description=form.description.data,
|
||||
filename=form.file.data.filename,
|
||||
title=form.title.data)
|
||||
db.session.add(query_result)
|
||||
db.session.flush()
|
||||
db.session.refresh(query_result)
|
||||
try:
|
||||
os.makedirs(os.path.dirname(query_result.path))
|
||||
except OSError:
|
||||
current_app.logger.error(
|
||||
'Make dir {} led to an OSError!'.format(query_result.path)
|
||||
)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response(
|
||||
{'redirect_url': url_for('.add_query_result')}, 500)
|
||||
# save the uploaded file
|
||||
form.file.data.save(query_result.path)
|
||||
# parse json from file
|
||||
with open(query_result.path, 'r') as file:
|
||||
query_result_file_content = json.load(file)
|
||||
# parse json schema
|
||||
# with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa
|
||||
# schema = json.load(file)
|
||||
# try:
|
||||
# # validate imported json file
|
||||
# validate(instance=query_result_file_content, schema=schema)
|
||||
# except Exception:
|
||||
# tasks.delete_query_result(query_result.id)
|
||||
# flash('Uploaded file is invalid', 'result')
|
||||
# return make_response(
|
||||
# {'redirect_url': url_for('.add_query_result')}, 201)
|
||||
query_result_file_content.pop('matches')
|
||||
query_result_file_content.pop('cpos_lookup')
|
||||
query_result.query_metadata = query_result_file_content
|
||||
db.session.commit()
|
||||
flash('Query result added!', 'result')
|
||||
return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
|
||||
return render_template('corpora/query_results/add_query_result.html.j2',
|
||||
form=form, title='Add query result')
|
||||
|
||||
|
||||
@bp.route('/result/<int:query_result_id>')
|
||||
@login_required
|
||||
def query_result(query_result_id):
|
||||
query_result = QueryResult.query.get_or_404(query_result_id)
|
||||
if not (query_result.creator == current_user
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return render_template('corpora/query_results/query_result.html.j2',
|
||||
query_result=query_result, title='Query result')
|
||||
|
||||
|
||||
@bp.route('/result/<int:query_result_id>/inspect')
|
||||
@login_required
|
||||
def inspect_query_result(query_result_id):
|
||||
'''
|
||||
View to inspect imported result file in a corpus analysis like interface
|
||||
'''
|
||||
query_result = QueryResult.query.get_or_404(query_result_id)
|
||||
query_metadata = query_result.query_metadata
|
||||
if not (query_result.creator == current_user
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
display_options_form = DisplayOptionsForm(
|
||||
prefix='display-options-form',
|
||||
results_per_page=request.args.get('results_per_page', 30),
|
||||
result_context=request.args.get('context', 20)
|
||||
)
|
||||
inspect_display_options_form = InspectDisplayOptionsForm(
|
||||
prefix='inspect-display-options-form'
|
||||
)
|
||||
with open(query_result.path, 'r') as query_result_file:
|
||||
query_result_file_content = json.load(query_result_file)
|
||||
return render_template('corpora/query_results/inspect.html.j2',
|
||||
query_result=query_result,
|
||||
display_options_form=display_options_form,
|
||||
inspect_display_options_form=inspect_display_options_form, # noqa
|
||||
query_result_file_content=query_result_file_content,
|
||||
query_metadata=query_metadata,
|
||||
title='Inspect query result')
|
||||
|
||||
|
||||
@bp.route('/result/<int:query_result_id>/delete')
|
||||
@login_required
|
||||
def delete_query_result(query_result_id):
|
||||
query_result = QueryResult.query.get_or_404(query_result_id)
|
||||
if not (query_result.creator == current_user
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
|
||||
tasks.delete_query_result(query_result_id)
|
||||
return redirect(url_for('services.service', service="corpus_analysis"))
|
||||
|
||||
|
||||
@bp.route('/result/<int:query_result_id>/download')
|
||||
@login_required
|
||||
def download_query_result(query_result_id):
|
||||
query_result = QueryResult.query.get_or_404(query_result_id)
|
||||
if not (query_result.creator == current_user
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return send_from_directory(as_attachment=True,
|
||||
directory=os.path.dirname(query_result.path),
|
||||
filename=query_result.filename)
|
||||
|
Reference in New Issue
Block a user