mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-03 20:02:47 +00:00 
			
		
		
		
	Big Corpus analysis update
This commit is contained in:
		@@ -2,4 +2,4 @@ from flask import Blueprint
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
bp = Blueprint('corpora', __name__)
 | 
			
		||||
from . import events, routes  # noqa
 | 
			
		||||
from . import cqi_over_socketio, routes  # noqa
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										108
									
								
								app/corpora/cqi_over_socketio/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								app/corpora/cqi_over_socketio/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,108 @@
 | 
			
		||||
from app import db, socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from app.models import Corpus
 | 
			
		||||
from flask import session
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from flask_socketio import ConnectionRefusedError
 | 
			
		||||
from threading import Lock
 | 
			
		||||
import cqi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
This package tunnels the Corpus Query interface (CQi) protocol through
 | 
			
		||||
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
 | 
			
		||||
 | 
			
		||||
This module only handles the SIO connect/disconnect, which handles the setup
 | 
			
		||||
and teardown of necessary ressources for later use. Each CQi function has a
 | 
			
		||||
corresponding SIO event. The event handlers are spread across the different
 | 
			
		||||
modules within this package.
 | 
			
		||||
 | 
			
		||||
Basic concept:
 | 
			
		||||
1. A client connects to the SIO namespace and provides the id of a corpus to be
 | 
			
		||||
   analysed.
 | 
			
		||||
     1.1 The analysis session counter of the corpus is incremented.
 | 
			
		||||
     1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
 | 
			
		||||
     1.3 Wait until the CQP server is running.
 | 
			
		||||
     1.4 Connect the CQiClient to the server.
 | 
			
		||||
     1.5 Save the CQiClient and the Lock in the session for subsequential use.
 | 
			
		||||
2. A client emits an event and may provide a single json object with necessary
 | 
			
		||||
   arguments for the targeted CQi function.
 | 
			
		||||
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
 | 
			
		||||
     - The event handler function defines all arguments. Hence the client
 | 
			
		||||
       is sent as a single json object, the decorator decomposes it to fit
 | 
			
		||||
       the functions signature. This also includes type checking and proper
 | 
			
		||||
       use of the lock (acquire/release) mechanism.
 | 
			
		||||
4. Wait for more events
 | 
			
		||||
5. The client disconnects from the SIO namespace
 | 
			
		||||
     1.1 The analysis session counter of the corpus is decremented.
 | 
			
		||||
     1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
NAMESPACE = '/corpora/corpus/corpus_analysis'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Import all CQi over Socket.IO event handlers
 | 
			
		||||
from .cqi_corpora_corpus_subcorpora import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus_structural_attributes import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus_positional_attributes import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus_alignment_attributes import *  # noqa
 | 
			
		||||
from .cqi_corpora_corpus import *  # noqa
 | 
			
		||||
from .cqi_corpora import *  # noqa
 | 
			
		||||
from .cqi import *  # noqa
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('connect', namespace=NAMESPACE)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
def connect(auth):
 | 
			
		||||
    # the auth variable is used in a hacky way. It contains the corpus id for
 | 
			
		||||
    # which a corpus analysis session should be started.
 | 
			
		||||
    corpus_id = auth['corpus_id']
 | 
			
		||||
    corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
    if corpus is None:
 | 
			
		||||
        # return {'code': 404, 'msg': 'Not Found'}
 | 
			
		||||
        raise ConnectionRefusedError('Not Found')
 | 
			
		||||
    if not (corpus.creator == current_user or current_user.is_administrator()):
 | 
			
		||||
        # return {'code': 403, 'msg': 'Forbidden'}
 | 
			
		||||
        raise ConnectionRefusedError('Forbidden')
 | 
			
		||||
    if corpus.status not in ['prepared', 'start analysis', 'analysing', 'stop analysis']:
 | 
			
		||||
        # return {'code': 424, 'msg': 'Failed Dependency'}
 | 
			
		||||
        raise ConnectionRefusedError('Failed Dependency')
 | 
			
		||||
    if corpus.num_analysis_sessions is None:
 | 
			
		||||
        corpus.num_analysis_sessions = 0
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
    corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
    retry_counter = 20
 | 
			
		||||
    while corpus.status != 'analysing':
 | 
			
		||||
        if retry_counter == 0:
 | 
			
		||||
            corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
            return {'code': 408, 'msg': 'Request Timeout'}
 | 
			
		||||
        socketio.sleep(3)
 | 
			
		||||
        retry_counter -= 1
 | 
			
		||||
        db.session.refresh(corpus)
 | 
			
		||||
    cqi_client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
 | 
			
		||||
    session['d'] = {
 | 
			
		||||
        'corpus_id': corpus_id,
 | 
			
		||||
        'cqi_client': cqi_client,
 | 
			
		||||
        'cqi_client_lock': Lock(),
 | 
			
		||||
    }
 | 
			
		||||
    # return {'code': 200, 'msg': 'OK'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('disconnect', namespace=NAMESPACE)
 | 
			
		||||
def disconnect():
 | 
			
		||||
    session['d']['cqi_client_lock'].acquire()
 | 
			
		||||
    try:
 | 
			
		||||
        session['d']['cqi_client'].disconnect()
 | 
			
		||||
    except cqi.errors.CQiException:
 | 
			
		||||
        pass
 | 
			
		||||
    except BrokenPipeError:
 | 
			
		||||
        pass
 | 
			
		||||
    session['d']['cqi_client_lock'].release()
 | 
			
		||||
    corpus = Corpus.query.get(session['d']['corpus_id'])
 | 
			
		||||
    corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
    session.pop('d')
 | 
			
		||||
    # return {'code': 200, 'msg': 'OK'}
 | 
			
		||||
							
								
								
									
										43
									
								
								app/corpora/cqi_over_socketio/cqi.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								app/corpora/cqi_over_socketio/cqi.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,43 @@
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from socket import gaierror
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
import cqi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.connect', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_connect(cqi_client: cqi.CQiClient):
 | 
			
		||||
    try:
 | 
			
		||||
        cqi_status = cqi_client.connect()
 | 
			
		||||
    except gaierror as e:
 | 
			
		||||
        return {
 | 
			
		||||
            'code': 500,
 | 
			
		||||
            'msg': 'Internal Server Error',
 | 
			
		||||
            'payload': {'code': e.args[0], 'desc': e.args[1]}
 | 
			
		||||
        }
 | 
			
		||||
    payload = {'code': cqi_status,
 | 
			
		||||
               'msg': cqi.api.specification.lookup[cqi_status]}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.disconnect', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_disconnect(cqi_client: cqi.CQiClient):
 | 
			
		||||
    cqi_status = cqi_client.disconnect()
 | 
			
		||||
    payload = {'code': cqi_status,
 | 
			
		||||
               'msg': cqi.api.specification.lookup[cqi_status]}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.ping', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_ping(cqi_client: cqi.CQiClient):
 | 
			
		||||
    cqi_status = cqi_client.ping()
 | 
			
		||||
    payload = {'code': cqi_status,
 | 
			
		||||
               'msg': cqi.api.specification.lookup[cqi_status]}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
							
								
								
									
										22
									
								
								app/corpora/cqi_over_socketio/cqi_corpora.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								app/corpora/cqi_over_socketio/cqi_corpora.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,22 @@
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
import cqi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = {**cqi_corpus.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_list(cqi_client: cqi.CQiClient):
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_client.corpora.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
							
								
								
									
										85
									
								
								app/corpora/cqi_over_socketio/cqi_corpora_corpus.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								app/corpora/cqi_over_socketio/cqi_corpora_corpus.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,85 @@
 | 
			
		||||
from app import db, socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from app.models import Corpus
 | 
			
		||||
from flask import session
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio, lookups_by_cpos
 | 
			
		||||
import cqi
 | 
			
		||||
import math
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_status = cqi_corpus.drop()
 | 
			
		||||
    payload = {'code': cqi_status,
 | 
			
		||||
               'msg': cqi.api.specification.lookup[cqi_status]}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_status = cqi_corpus.query(subcorpus_name, query)
 | 
			
		||||
    payload = {'code': cqi_status,
 | 
			
		||||
               'msg': cqi.api.specification.lookup[cqi_status]}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
# nopaque specific CQi extensions                                             #
 | 
			
		||||
###############################################################################
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
 | 
			
		||||
    corpus = Corpus.query.get(session['d']['corpus_id'])
 | 
			
		||||
    corpus.num_tokens = cqi_client.corpora.get('CORPUS').attrs['size']
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    # Sanity checks
 | 
			
		||||
    if (
 | 
			
		||||
        per_page < 1
 | 
			
		||||
        or page < 1
 | 
			
		||||
        or (
 | 
			
		||||
            cqi_corpus.attrs['size'] > 0
 | 
			
		||||
            and page > math.ceil(cqi_corpus.attrs['size'] / per_page)
 | 
			
		||||
        )
 | 
			
		||||
    ):
 | 
			
		||||
        return {'code': 416, 'msg': 'Range Not Satisfiable'}
 | 
			
		||||
    first_cpos = (page - 1) * per_page
 | 
			
		||||
    last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page)
 | 
			
		||||
    cpos_list = [*range(first_cpos, last_cpos)]
 | 
			
		||||
    lookups = lookups_by_cpos(cqi_corpus, cpos_list)
 | 
			
		||||
    payload = {}
 | 
			
		||||
    # the items for the current page
 | 
			
		||||
    payload['items'] = [cpos_list]
 | 
			
		||||
    # the lookups for the items
 | 
			
		||||
    payload['lookups'] = lookups
 | 
			
		||||
    # the total number of items matching the query
 | 
			
		||||
    payload['total'] = cqi_corpus.attrs['size']
 | 
			
		||||
    # the number of items to be displayed on a page.
 | 
			
		||||
    payload['per_page'] = per_page
 | 
			
		||||
    # The total number of pages
 | 
			
		||||
    payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
 | 
			
		||||
    # the current page number (1 indexed)
 | 
			
		||||
    payload['page'] = page if payload['pages'] > 0 else None
 | 
			
		||||
    # True if a previous page exists
 | 
			
		||||
    payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
 | 
			
		||||
    # True if a next page exists.
 | 
			
		||||
    payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False  # noqa
 | 
			
		||||
    # Number of the previous page.
 | 
			
		||||
    payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
 | 
			
		||||
    # Number of the next page
 | 
			
		||||
    payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -0,0 +1,24 @@
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
import cqi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name)  # noqa
 | 
			
		||||
    payload = {**cqi_alignment_attribute.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -0,0 +1,24 @@
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
import cqi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name)  # noqa
 | 
			
		||||
    payload = {**cqi_positional_attribute.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
@@ -0,0 +1,24 @@
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio
 | 
			
		||||
import cqi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name)  # noqa
 | 
			
		||||
    payload = {**cqi_structural_attribute.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
							
								
								
									
										123
									
								
								app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								app/corpora/cqi_over_socketio/cqi_corpora_corpus_subcorpora.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,123 @@
 | 
			
		||||
from app import socketio
 | 
			
		||||
from app.decorators import socketio_login_required
 | 
			
		||||
from app.models import Corpus
 | 
			
		||||
from flask import session
 | 
			
		||||
from . import NAMESPACE as ns
 | 
			
		||||
from .utils import cqi_over_socketio, export_subcorpus
 | 
			
		||||
import cqi
 | 
			
		||||
import json
 | 
			
		||||
import math
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    payload = {**cqi_subcorpus.attrs}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_status = cqi_subcorpus.drop()
 | 
			
		||||
    payload = {'code': cqi_status,
 | 
			
		||||
               'msg': cqi.api.specification.lookup[cqi_status]}
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient):
 | 
			
		||||
    return {'code': 501, 'msg': 'Not Implemented'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient):
 | 
			
		||||
    return {'code': 501, 'msg': 'Not Implemented'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient):
 | 
			
		||||
    return {'code': 501, 'msg': 'Not Implemented'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
# nopaque specific CQi extensions                                             #
 | 
			
		||||
###############################################################################
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    # Sanity checks
 | 
			
		||||
    if (
 | 
			
		||||
        per_page < 1
 | 
			
		||||
        or page < 1
 | 
			
		||||
        or (
 | 
			
		||||
            cqi_subcorpus.attrs['size'] > 0
 | 
			
		||||
            and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
 | 
			
		||||
        )
 | 
			
		||||
    ):
 | 
			
		||||
        return {'code': 416, 'msg': 'Range Not Satisfiable'}
 | 
			
		||||
    offset = (page - 1) * per_page
 | 
			
		||||
    cutoff = per_page
 | 
			
		||||
    cqi_results_export = export_subcorpus(
 | 
			
		||||
        cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
 | 
			
		||||
    payload = {}
 | 
			
		||||
    # the items for the current page
 | 
			
		||||
    payload['items'] = cqi_results_export.pop('matches')
 | 
			
		||||
    # the lookups for the items
 | 
			
		||||
    payload['lookups'] = cqi_results_export
 | 
			
		||||
    # the total number of items matching the query
 | 
			
		||||
    payload['total'] = cqi_subcorpus.attrs['size']
 | 
			
		||||
    # the number of items to be displayed on a page.
 | 
			
		||||
    payload['per_page'] = per_page
 | 
			
		||||
    # The total number of pages
 | 
			
		||||
    payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
 | 
			
		||||
    # the current page number (1 indexed)
 | 
			
		||||
    payload['page'] = page if payload['pages'] > 0 else None
 | 
			
		||||
    # True if a previous page exists
 | 
			
		||||
    payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
 | 
			
		||||
    # True if a next page exists.
 | 
			
		||||
    payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False  # noqa
 | 
			
		||||
    # Number of the previous page.
 | 
			
		||||
    payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
 | 
			
		||||
    # Number of the next page
 | 
			
		||||
    payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
 | 
			
		||||
    return {'code': 200, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
@cqi_over_socketio
 | 
			
		||||
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50):  # noqa
 | 
			
		||||
    cqi_corpus = cqi_client.corpora.get(corpus_name)
 | 
			
		||||
    cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
 | 
			
		||||
    cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
 | 
			
		||||
    corpus = Corpus.query.get(session['d']['corpus_id'])
 | 
			
		||||
    file_path = os.path.join(corpus.path, f'{subcorpus_name}.json')
 | 
			
		||||
    with open(file_path, 'w') as file:
 | 
			
		||||
        json.dump(cqi_subcorpus_export, file)
 | 
			
		||||
    return {'code': 200, 'msg': 'OK'}
 | 
			
		||||
							
								
								
									
										129
									
								
								app/corpora/cqi_over_socketio/utils.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								app/corpora/cqi_over_socketio/utils.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,129 @@
 | 
			
		||||
from flask import session
 | 
			
		||||
from functools import wraps
 | 
			
		||||
from inspect import signature
 | 
			
		||||
import cqi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cqi_over_socketio(f):
 | 
			
		||||
    @wraps(f)
 | 
			
		||||
    def wrapped(*args):
 | 
			
		||||
        if 'd' not in session:
 | 
			
		||||
            return {'code': 424, 'msg': 'Failed Dependency'}
 | 
			
		||||
        f_args = {}
 | 
			
		||||
        # Check for missing args and if all provided args are of the right type
 | 
			
		||||
        for param in signature(f).parameters.values():
 | 
			
		||||
            if param.annotation == cqi.CQiClient:
 | 
			
		||||
                f_args[param.name] = session['d']['cqi_client']
 | 
			
		||||
                continue
 | 
			
		||||
            if param.default is param.empty:
 | 
			
		||||
                # args
 | 
			
		||||
                if param.name not in args[0]:
 | 
			
		||||
                    return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
                arg = args[0][param.name]
 | 
			
		||||
                if type(arg) is not param.annotation:
 | 
			
		||||
                    return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
                f_args[param.name] = arg
 | 
			
		||||
            else:
 | 
			
		||||
                # kwargs
 | 
			
		||||
                if param.name not in args[0]:
 | 
			
		||||
                    continue
 | 
			
		||||
                arg = args[0][param.name]
 | 
			
		||||
                if type(arg) is not param.annotation:
 | 
			
		||||
                    return {'code': 400, 'msg': 'Bad Request'}
 | 
			
		||||
                f_args[param.name] = arg
 | 
			
		||||
        session['d']['cqi_client_lock'].acquire()
 | 
			
		||||
        try:
 | 
			
		||||
            return_value = f(**f_args)
 | 
			
		||||
        except BrokenPipeError:
 | 
			
		||||
            pass
 | 
			
		||||
        except cqi.errors.CQiException as e:
 | 
			
		||||
            return_value = {
 | 
			
		||||
                'code': 500,
 | 
			
		||||
                'msg': 'Internal Server Error',
 | 
			
		||||
                'payload': {
 | 
			
		||||
                    'code': e.code,
 | 
			
		||||
                    'desc': e.description,
 | 
			
		||||
                    'msg': e.name
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        finally:
 | 
			
		||||
            session['d']['cqi_client_lock'].release()
 | 
			
		||||
        return return_value
 | 
			
		||||
    return wrapped
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def lookups_by_cpos(corpus, cpos_list):
 | 
			
		||||
    lookups = {}
 | 
			
		||||
    lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
 | 
			
		||||
    for attr in corpus.positional_attributes.list():
 | 
			
		||||
        cpos_attr_values = attr.values_by_cpos(cpos_list)
 | 
			
		||||
        for i, cpos in enumerate(cpos_list):
 | 
			
		||||
            lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
 | 
			
		||||
                cpos_attr_values[i]
 | 
			
		||||
    for attr in corpus.structural_attributes.list():
 | 
			
		||||
        # We only want to iterate over non subattributes, identifiable by
 | 
			
		||||
        # attr.attrs['has_values']==False
 | 
			
		||||
        if attr.attrs['has_values']:
 | 
			
		||||
            continue
 | 
			
		||||
        cpos_attr_ids = attr.ids_by_cpos(cpos_list)
 | 
			
		||||
        for i, cpos in enumerate(cpos_list):
 | 
			
		||||
            if cpos_attr_ids[i] == -1:
 | 
			
		||||
                continue
 | 
			
		||||
            lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
 | 
			
		||||
        occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
 | 
			
		||||
        if not occured_attr_ids:
 | 
			
		||||
            continue
 | 
			
		||||
        subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
 | 
			
		||||
        if not subattrs:
 | 
			
		||||
            continue
 | 
			
		||||
        lookup_name = f'{attr.attrs["name"]}_lookup'
 | 
			
		||||
        lookups[lookup_name] = {}
 | 
			
		||||
        for attr_id in occured_attr_ids:
 | 
			
		||||
            lookups[lookup_name][attr_id] = {}
 | 
			
		||||
        for subattr in subattrs:
 | 
			
		||||
            subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):]  # noqa
 | 
			
		||||
            for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)):  # noqa
 | 
			
		||||
                lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value  # noqa
 | 
			
		||||
    return lookups
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
 | 
			
		||||
    if subcorpus.attrs['size'] == 0:
 | 
			
		||||
        return {"matches": []}
 | 
			
		||||
    first_match = max(0, offset)
 | 
			
		||||
    last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
 | 
			
		||||
    match_boundaries = zip(
 | 
			
		||||
        subcorpus.dump(
 | 
			
		||||
            subcorpus.attrs['fields']['match'], first_match, last_match),
 | 
			
		||||
        subcorpus.dump(
 | 
			
		||||
            subcorpus.attrs['fields']['matchend'], first_match, last_match)
 | 
			
		||||
    )
 | 
			
		||||
    cpos_set = set()
 | 
			
		||||
    matches = []
 | 
			
		||||
    match_num = offset + 1
 | 
			
		||||
    for match_start, match_end in match_boundaries:
 | 
			
		||||
        c = (match_start, match_end)
 | 
			
		||||
        if match_start == 0 or context == 0:
 | 
			
		||||
            lc = None
 | 
			
		||||
            cpos_list_lbound = match_start
 | 
			
		||||
        else:
 | 
			
		||||
            lc_lbound = max(0, (match_start - 1 - context))
 | 
			
		||||
            lc_rbound = match_start - 1
 | 
			
		||||
            lc = (lc_lbound, lc_rbound)
 | 
			
		||||
            cpos_list_lbound = lc_lbound
 | 
			
		||||
        if (match_end == (subcorpus.collection.corpus.attrs['size'] - 1)
 | 
			
		||||
                or context == 0):
 | 
			
		||||
            rc = None
 | 
			
		||||
            cpos_list_rbound = match_end
 | 
			
		||||
        else:
 | 
			
		||||
            rc_lbound = match_end + 1
 | 
			
		||||
            rc_rbound = min(match_end + 1 + context,
 | 
			
		||||
                            subcorpus.collection.corpus.attrs['size'] - 1)
 | 
			
		||||
            rc = (rc_lbound, rc_rbound)
 | 
			
		||||
            cpos_list_rbound = rc_rbound
 | 
			
		||||
        match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
 | 
			
		||||
        matches.append(match)
 | 
			
		||||
        cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
 | 
			
		||||
        match_num += 1
 | 
			
		||||
    lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
 | 
			
		||||
    return {'matches': matches, **lookups}
 | 
			
		||||
@@ -1,304 +0,0 @@
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from flask import current_app, request
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from socket import gaierror
 | 
			
		||||
from werkzeug.utils import secure_filename
 | 
			
		||||
from .. import db, socketio
 | 
			
		||||
from ..decorators import socketio_login_required
 | 
			
		||||
from ..events.socketio import sessions as socketio_sessions
 | 
			
		||||
from ..models import Corpus
 | 
			
		||||
import cqi
 | 
			
		||||
import math
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
' A dictionary containing lists of, with corpus ids associated, Socket.IO
 | 
			
		||||
' session ids (sid). {<corpus_id>: [<sid>, ...], ...}
 | 
			
		||||
'''
 | 
			
		||||
corpus_analysis_sessions = {}
 | 
			
		||||
'''
 | 
			
		||||
' A dictionary containing Socket.IO session id - CQi client pairs.
 | 
			
		||||
' {<sid>: CQiClient, ...}
 | 
			
		||||
'''
 | 
			
		||||
corpus_analysis_clients = {}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('corpus_analysis_init')
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
def init_corpus_analysis(corpus_id):
 | 
			
		||||
    corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
    if corpus is None:
 | 
			
		||||
        response = {'code': 404, 'desc': None, 'msg': 'Not Found'}
 | 
			
		||||
        socketio.emit('corpus_analysis_init', response, room=request.sid)
 | 
			
		||||
        return
 | 
			
		||||
    if not (corpus.creator == current_user or current_user.is_administrator()):
 | 
			
		||||
        response = {'code': 403, 'desc': None, 'msg': 'Forbidden'}
 | 
			
		||||
        socketio.emit('corpus_analysis_init', response, room=request.sid)
 | 
			
		||||
        return
 | 
			
		||||
    if corpus.status not in ['prepared', 'start analysis', 'analysing']:
 | 
			
		||||
        response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
 | 
			
		||||
        socketio.emit('corpus_analysis_init', response, room=request.sid)
 | 
			
		||||
        return
 | 
			
		||||
    if corpus.status == 'prepared':
 | 
			
		||||
        corpus.status = 'start analysis'
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
    socketio.start_background_task(corpus_analysis_session_handler,
 | 
			
		||||
                                   current_app._get_current_object(),
 | 
			
		||||
                                   corpus_id, current_user.id, request.sid)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
        ''' Setup analysis session '''
 | 
			
		||||
        corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
        retry_counter = 15
 | 
			
		||||
        while corpus.status != 'analysing':
 | 
			
		||||
            db.session.refresh(corpus)
 | 
			
		||||
            retry_counter -= 1
 | 
			
		||||
            if retry_counter == 0:
 | 
			
		||||
                response = {'code': 408, 'desc': None, 'msg': 'Request Timeout'}  # noqa
 | 
			
		||||
                socketio.emit('corpus_analysis_init', response, room=session_id)  # noqa
 | 
			
		||||
                corpus.status = 'stop analysis'
 | 
			
		||||
                db.session.commit()
 | 
			
		||||
                return
 | 
			
		||||
            socketio.sleep(3)
 | 
			
		||||
        client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
 | 
			
		||||
        try:
 | 
			
		||||
            connect_status = client.connect()
 | 
			
		||||
            payload = {'code': connect_status, 'msg': cqi.api.specification.lookup[connect_status]}  # noqa
 | 
			
		||||
        except cqi.errors.CQiException as e:
 | 
			
		||||
            handle_cqi_exception('corpus_analysis_init', e, session_id)
 | 
			
		||||
            corpus.status = 'stop analysis'
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
            return
 | 
			
		||||
        except gaierror:
 | 
			
		||||
            response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error'}  # noqa
 | 
			
		||||
            socketio.emit('corpus_analysis_init', response, room=session_id)
 | 
			
		||||
            corpus.status = 'stop analysis'
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
            return
 | 
			
		||||
        corpus_analysis_clients[session_id] = client
 | 
			
		||||
        if corpus_id in corpus_analysis_sessions:
 | 
			
		||||
            corpus_analysis_sessions[corpus_id].append(session_id)
 | 
			
		||||
        else:
 | 
			
		||||
            corpus_analysis_sessions[corpus_id] = [session_id]
 | 
			
		||||
        client.status = 'ready'
 | 
			
		||||
        response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
        socketio.emit('corpus_analysis_init', response, room=session_id)
 | 
			
		||||
        ''' Observe analysis session '''
 | 
			
		||||
        while session_id in socketio_sessions:
 | 
			
		||||
            socketio.sleep(3)
 | 
			
		||||
        ''' Teardown analysis session '''
 | 
			
		||||
        if client.status == 'running':
 | 
			
		||||
            client.status = 'abort'
 | 
			
		||||
            while client.status != 'ready':
 | 
			
		||||
                socketio.sleep(0.3)
 | 
			
		||||
        try:
 | 
			
		||||
            client.disconnect()
 | 
			
		||||
        except cqi.errors.CQiException:
 | 
			
		||||
            pass
 | 
			
		||||
        corpus_analysis_clients.pop(session_id, None)
 | 
			
		||||
        corpus_analysis_sessions[corpus_id].remove(session_id)
 | 
			
		||||
        if not corpus_analysis_sessions[corpus_id]:
 | 
			
		||||
            corpus_analysis_sessions.pop(corpus_id, None)
 | 
			
		||||
            corpus.status = 'stop analysis'
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('corpus_analysis_meta_data')
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
def corpus_analysis_get_meta_data(corpus_id):
 | 
			
		||||
    # get meta data from db
 | 
			
		||||
    db_corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
    metadata = {}
 | 
			
		||||
    metadata['corpus_name'] = db_corpus.title
 | 
			
		||||
    metadata['corpus_description'] = db_corpus.description
 | 
			
		||||
    metadata['corpus_creation_date'] = db_corpus.creation_date.isoformat() + 'Z'
 | 
			
		||||
    metadata['corpus_last_edited_date'] = \
 | 
			
		||||
        db_corpus.last_edited_date.isoformat() + 'Z'
 | 
			
		||||
    client = corpus_analysis_clients.get(request.sid)
 | 
			
		||||
    if client is None:
 | 
			
		||||
        response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
 | 
			
		||||
        socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
 | 
			
		||||
        return
 | 
			
		||||
    # check if client is busy or not
 | 
			
		||||
    if client.status == 'running':
 | 
			
		||||
        client.status = 'abort'
 | 
			
		||||
        while client.status != 'ready':
 | 
			
		||||
            socketio.sleep(0.3)
 | 
			
		||||
    # get meta data from corpus in cqp server
 | 
			
		||||
    client.status = 'running'
 | 
			
		||||
    try:
 | 
			
		||||
        cwb_corpus = client.corpora.get('CORPUS')
 | 
			
		||||
        metadata['corpus_properties'] = cwb_corpus.attrs['properties']
 | 
			
		||||
        metadata['corpus_size_tokens'] = cwb_corpus.attrs['size']
 | 
			
		||||
 | 
			
		||||
        text_attr = cwb_corpus.structural_attributes.get('text')
 | 
			
		||||
        struct_attrs = cwb_corpus.structural_attributes.list(
 | 
			
		||||
            filters={'part_of': text_attr})
 | 
			
		||||
        text_ids = range(0, (text_attr.attrs['size']))
 | 
			
		||||
        texts_metadata = {}
 | 
			
		||||
        for text_id in text_ids:
 | 
			
		||||
            texts_metadata[text_id] = {}
 | 
			
		||||
            for struct_attr in struct_attrs:
 | 
			
		||||
                texts_metadata[text_id][struct_attr.attrs['name'][(len(text_attr.attrs['name']) + 1):]] = struct_attr.values_by_ids(list(range(struct_attr.attrs['size'])))[text_id]  # noqa
 | 
			
		||||
        metadata['corpus_all_texts'] = texts_metadata
 | 
			
		||||
        metadata['corpus_analysis_date'] = datetime.utcnow().isoformat() + 'Z'
 | 
			
		||||
        metadata['corpus_cqi_py_protocol_version'] = client.api.version
 | 
			
		||||
        metadata['corpus_cqi_py_package_version'] = cqi.__version__
 | 
			
		||||
        # TODO: make this dynamically
 | 
			
		||||
        metadata['corpus_cqpserver_version'] = 'CQPserver v3.4.22'
 | 
			
		||||
 | 
			
		||||
        # write some metadata to the db
 | 
			
		||||
        db_corpus.current_nr_of_tokens = metadata['corpus_size_tokens']
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
 | 
			
		||||
        # emit data
 | 
			
		||||
        payload = metadata
 | 
			
		||||
        response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
        socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
 | 
			
		||||
    except cqi.errors.CQiException as e:
 | 
			
		||||
        payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
 | 
			
		||||
        response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
 | 
			
		||||
                    'payload': payload}
 | 
			
		||||
        socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
 | 
			
		||||
    client.status = 'ready'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('corpus_analysis_query')
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
def corpus_analysis_query(query):
 | 
			
		||||
    client = corpus_analysis_clients.get(request.sid)
 | 
			
		||||
    if client is None:
 | 
			
		||||
        response = {'code': 424, 'desc': None, 'msg': 'Failed Dependency'}
 | 
			
		||||
        socketio.emit('corpus_analysis_query', response, room=request.sid)
 | 
			
		||||
        return
 | 
			
		||||
    if client.status == 'running':
 | 
			
		||||
        client.status = 'abort'
 | 
			
		||||
        while client.status != 'ready':
 | 
			
		||||
            socketio.sleep(0.3)
 | 
			
		||||
    client.status = 'running'
 | 
			
		||||
    try:
 | 
			
		||||
        corpus = client.corpora.get('CORPUS')
 | 
			
		||||
        query_status = corpus.query(query)
 | 
			
		||||
        results = corpus.subcorpora.get('Results')
 | 
			
		||||
    except cqi.errors.CQiException as e:
 | 
			
		||||
        client.status = 'ready'
 | 
			
		||||
        handle_cqi_exception('corpus_analysis_query', e, request.sid)
 | 
			
		||||
        return
 | 
			
		||||
    payload = {'status': query_status,
 | 
			
		||||
               'msg': cqi.api.specification.lookup[query_status],
 | 
			
		||||
               'match_count': results.attrs['size']}
 | 
			
		||||
    response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
    socketio.emit('corpus_analysis_query', response, room=request.sid)
 | 
			
		||||
    chunk_size = 100
 | 
			
		||||
    chunk_start = 0
 | 
			
		||||
    context = 50
 | 
			
		||||
    progress = 0
 | 
			
		||||
    while chunk_start <= results.attrs['size']:
 | 
			
		||||
        if client.status == 'abort':
 | 
			
		||||
            break
 | 
			
		||||
        try:
 | 
			
		||||
            chunk = results.export(context=context, cutoff=chunk_size, offset=chunk_start)  # noqa
 | 
			
		||||
        except cqi.errors.CQiException as e:
 | 
			
		||||
            handle_cqi_exception('corpus_analysis_query', e, request.sid)
 | 
			
		||||
            break
 | 
			
		||||
        if (results.attrs['size'] == 0):
 | 
			
		||||
            progress = 100
 | 
			
		||||
        else:
 | 
			
		||||
            progress = ((chunk_start + chunk_size) / results.attrs['size']) * 100  # noqa
 | 
			
		||||
            progress = min(100, int(math.ceil(progress)))
 | 
			
		||||
        payload = {'chunk': chunk, 'progress': progress}
 | 
			
		||||
        response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
 | 
			
		||||
        socketio.emit('corpus_analysis_query_results', response, room=request.sid)  # noqa
 | 
			
		||||
        chunk_start += chunk_size
 | 
			
		||||
    client.status = 'ready'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('corpus_analysis_get_match_with_full_context')
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
def corpus_analysis_get_match_with_full_context(payload):
 | 
			
		||||
    type = payload['type']
 | 
			
		||||
    data_indexes = payload['data_indexes']
 | 
			
		||||
    first_cpos = payload['first_cpos']
 | 
			
		||||
    last_cpos = payload['last_cpos']
 | 
			
		||||
    client = corpus_analysis_clients.get(request.sid)
 | 
			
		||||
    if client is None:
 | 
			
		||||
        response = {'code': 424, 'desc': 'No client found for this session',
 | 
			
		||||
                    'msg': 'Failed Dependency'}
 | 
			
		||||
        socketio.emit('corpus_analysis_get_match_with_full_context', response,
 | 
			
		||||
                      room=request.sid)
 | 
			
		||||
        return
 | 
			
		||||
    if client.status == 'running':
 | 
			
		||||
        client.status = 'abort'
 | 
			
		||||
        while client.status != 'ready':
 | 
			
		||||
            socketio.sleep(0.3)
 | 
			
		||||
    client.status = 'running'
 | 
			
		||||
    try:
 | 
			
		||||
        corpus = client.corpora.get('CORPUS')
 | 
			
		||||
        s = corpus.structural_attributes.get('s')
 | 
			
		||||
    except cqi.errors.CQiException as e:
 | 
			
		||||
        handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid)  # noqa
 | 
			
		||||
        return
 | 
			
		||||
    i = 0
 | 
			
		||||
    # Send data one match at a time.
 | 
			
		||||
    for index, f_cpos, l_cpos in zip(data_indexes, first_cpos, last_cpos):
 | 
			
		||||
        if client.status == 'abort':
 | 
			
		||||
            break
 | 
			
		||||
        i += 1
 | 
			
		||||
        matches = []
 | 
			
		||||
        cpos_lookup = text_lookup = {}
 | 
			
		||||
        try:
 | 
			
		||||
            tmp = s.export(f_cpos, l_cpos, context=10)
 | 
			
		||||
        except cqi.errors.CQiException as e:
 | 
			
		||||
            handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid)  # noqa
 | 
			
		||||
            break
 | 
			
		||||
        matches.append(tmp['matches'][0])
 | 
			
		||||
        cpos_lookup.update(tmp['cpos_lookup'])
 | 
			
		||||
        text_lookup.update(tmp['text_lookup'])
 | 
			
		||||
        progress = i / len(data_indexes) * 100
 | 
			
		||||
        payload = {'matches': matches, 'progress': progress,
 | 
			
		||||
                   'cpos_lookup': cpos_lookup, 'text_lookup': text_lookup}
 | 
			
		||||
        response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload,
 | 
			
		||||
                    'type': type, 'data_indexes': data_indexes}
 | 
			
		||||
        socketio.emit('corpus_analysis_get_match_with_full_context',
 | 
			
		||||
                      response, room=request.sid)
 | 
			
		||||
    client.status = 'ready'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('export_corpus')
 | 
			
		||||
@socketio_login_required
 | 
			
		||||
def export_corpus(corpus_id):
 | 
			
		||||
    corpus = Corpus.query.get(corpus_id)
 | 
			
		||||
    if corpus is None:
 | 
			
		||||
        response = {'code': 404, 'msg': 'Not found'}
 | 
			
		||||
        socketio.emit('export_corpus', response, room=request.sid)
 | 
			
		||||
        return
 | 
			
		||||
    if corpus.status != 'prepared':
 | 
			
		||||
        response = {'code': 412, 'msg': 'Precondition Failed'}
 | 
			
		||||
        socketio.emit('export_corpus', response, room=request.sid)
 | 
			
		||||
        return
 | 
			
		||||
    # delete old corpus archive if it exists/has been build before
 | 
			
		||||
    if corpus.archive_file is not None and os.path.isfile(corpus.archive_file):
 | 
			
		||||
        os.remove(corpus.archive_file)
 | 
			
		||||
    archive_file_base_name = '[corpus]_' + secure_filename(corpus.title)
 | 
			
		||||
    corpus.archive_file = archive_file_base_name + '.zip'
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
    shutil.make_archive(
 | 
			
		||||
        os.path.join(corpus.creator.path, 'corpora', archive_file_base_name),
 | 
			
		||||
        'zip',
 | 
			
		||||
        corpus.path
 | 
			
		||||
    )
 | 
			
		||||
    socketio.emit('export_corpus_{}'.format(corpus.id), room=request.sid)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def handle_cqi_exception(event, exception, room):
 | 
			
		||||
    response = {'code': 500,
 | 
			
		||||
                'desc': None,
 | 
			
		||||
                'msg': 'Internal Server Error',
 | 
			
		||||
                'payload': {'code': exception.code,
 | 
			
		||||
                            'desc': exception.description,
 | 
			
		||||
                            'msg': exception.name}}
 | 
			
		||||
    socketio.emit(event, response, room=room)
 | 
			
		||||
@@ -1,8 +1,8 @@
 | 
			
		||||
from flask_wtf import FlaskForm
 | 
			
		||||
from werkzeug.utils import secure_filename
 | 
			
		||||
from wtforms import (BooleanField, FileField, StringField, SubmitField,
 | 
			
		||||
                     ValidationError, IntegerField, SelectField)
 | 
			
		||||
from wtforms.validators import DataRequired, Length, NumberRange
 | 
			
		||||
from wtforms import (FileField, StringField, SubmitField,
 | 
			
		||||
                     ValidationError, IntegerField)
 | 
			
		||||
from wtforms.validators import DataRequired, Length
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AddCorpusFileForm(FlaskForm):
 | 
			
		||||
@@ -91,76 +91,3 @@ class ImportCorpusForm(FlaskForm):
 | 
			
		||||
            raise ValidationError('File does not have an approved extension: '
 | 
			
		||||
                                  '.zip')
 | 
			
		||||
        field.data.filename = secure_filename(field.data.filename)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class QueryForm(FlaskForm):
 | 
			
		||||
    '''
 | 
			
		||||
    Form to submit a query to the server which is executed via cqi-py.
 | 
			
		||||
    '''
 | 
			
		||||
    query = StringField('Query',
 | 
			
		||||
                        validators=[DataRequired(), Length(1, 1024)])
 | 
			
		||||
    submit = SubmitField('Search')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DisplayOptionsForm(FlaskForm):
 | 
			
		||||
    '''
 | 
			
		||||
    Form to alter how the matches are represented to the user by the user.
 | 
			
		||||
    '''
 | 
			
		||||
    expert_mode = BooleanField('Expert mode')
 | 
			
		||||
    result_context = SelectField('Result context',
 | 
			
		||||
                                 choices=[('', 'Choose your option'),
 | 
			
		||||
                                          ('10', '10'),
 | 
			
		||||
                                          ('20', '20'),
 | 
			
		||||
                                          ('30', '30'),
 | 
			
		||||
                                          ('40', '40'),
 | 
			
		||||
                                          ('50', '50')])
 | 
			
		||||
    results_per_page = SelectField('Results per page',
 | 
			
		||||
                                   choices=[('', 'Choose your option'),
 | 
			
		||||
                                            ('10', '10'),
 | 
			
		||||
                                            ('20', '20'),
 | 
			
		||||
                                            ('30', '30'),
 | 
			
		||||
                                            ('40', '40'),
 | 
			
		||||
                                            ('50', '50')])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class InspectDisplayOptionsForm(FlaskForm):
 | 
			
		||||
    '''
 | 
			
		||||
    Form for the inspect modal where the user can interact with how the current
 | 
			
		||||
    match is being represented to him.
 | 
			
		||||
    '''
 | 
			
		||||
    expert_mode_inspect = BooleanField('Expert mode')
 | 
			
		||||
    highlight_sentences = BooleanField('Split sentences')
 | 
			
		||||
    context_sentences = IntegerField('Context sentences',
 | 
			
		||||
                                     validators=[NumberRange(min=0, max=10)],
 | 
			
		||||
                                     default=3)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class QueryDownloadForm(FlaskForm):
 | 
			
		||||
    '''
 | 
			
		||||
    Form to choose in what file format the analysis results are being
 | 
			
		||||
    downloaded. WIP.
 | 
			
		||||
    '''
 | 
			
		||||
    file_type = SelectField('File type',
 | 
			
		||||
                            choices=[('', 'Choose file type'),
 | 
			
		||||
                                     ('csv', 'csv'),
 | 
			
		||||
                                     ('json', 'json'),
 | 
			
		||||
                                     ('excel', 'excel'),
 | 
			
		||||
                                     ('html', 'html-table')],
 | 
			
		||||
                            validators=[DataRequired()])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AddQueryResultForm(FlaskForm):
 | 
			
		||||
    '''
 | 
			
		||||
    Form used to import one result json file.
 | 
			
		||||
    '''
 | 
			
		||||
    description = StringField('Description',
 | 
			
		||||
                              validators=[DataRequired(), Length(1, 255)])
 | 
			
		||||
    file = FileField('File', validators=[DataRequired()])
 | 
			
		||||
    title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
 | 
			
		||||
    submit = SubmitField()
 | 
			
		||||
 | 
			
		||||
    def validate_file(self, field):
 | 
			
		||||
        if not field.data.filename.lower().endswith('.json'):
 | 
			
		||||
            raise ValidationError('File does not have an approved extension: '
 | 
			
		||||
                                  '.json')
 | 
			
		||||
        field.data.filename = secure_filename(field.data.filename)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										21
									
								
								app/corpora/query_results_forms.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								app/corpora/query_results_forms.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,21 @@
 | 
			
		||||
from flask_wtf import FlaskForm
 | 
			
		||||
from werkzeug.utils import secure_filename
 | 
			
		||||
from wtforms import FileField, StringField, SubmitField, ValidationError
 | 
			
		||||
from wtforms.validators import DataRequired, Length
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AddQueryResultForm(FlaskForm):
 | 
			
		||||
    '''
 | 
			
		||||
    Form used to import one result json file.
 | 
			
		||||
    '''
 | 
			
		||||
    description = StringField('Description',
 | 
			
		||||
                              validators=[DataRequired(), Length(1, 255)])
 | 
			
		||||
    file = FileField('File', validators=[DataRequired()])
 | 
			
		||||
    title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
 | 
			
		||||
    submit = SubmitField()
 | 
			
		||||
 | 
			
		||||
    def validate_file(self, field):
 | 
			
		||||
        if not field.data.filename.lower().endswith('.json'):
 | 
			
		||||
            raise ValidationError('File does not have an approved extension: '
 | 
			
		||||
                                  '.json')
 | 
			
		||||
        field.data.filename = secure_filename(field.data.filename)
 | 
			
		||||
							
								
								
									
										134
									
								
								app/corpora/query_results_routes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								app/corpora/query_results_routes.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,134 @@
 | 
			
		||||
from flask import (abort, current_app, flash, make_response, redirect, request,
 | 
			
		||||
                   render_template, url_for, send_from_directory)
 | 
			
		||||
from flask_login import current_user, login_required
 | 
			
		||||
from . import bp
 | 
			
		||||
from . import tasks
 | 
			
		||||
from .forms import (AddQueryResultForm, DisplayOptionsForm,
 | 
			
		||||
                    InspectDisplayOptionsForm)
 | 
			
		||||
from .. import db
 | 
			
		||||
from ..models import QueryResult
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/result/add', methods=['GET', 'POST'])
 | 
			
		||||
@login_required
 | 
			
		||||
def add_query_result():
 | 
			
		||||
    '''
 | 
			
		||||
    View to import a result as a json file.
 | 
			
		||||
    '''
 | 
			
		||||
    abort(503)
 | 
			
		||||
    form = AddQueryResultForm(prefix='add-query-result-form')
 | 
			
		||||
    if form.is_submitted():
 | 
			
		||||
        if not form.validate():
 | 
			
		||||
            return make_response(form.errors, 400)
 | 
			
		||||
        query_result = QueryResult(creator=current_user,
 | 
			
		||||
                                   description=form.description.data,
 | 
			
		||||
                                   filename=form.file.data.filename,
 | 
			
		||||
                                   title=form.title.data)
 | 
			
		||||
        db.session.add(query_result)
 | 
			
		||||
        db.session.flush()
 | 
			
		||||
        db.session.refresh(query_result)
 | 
			
		||||
        try:
 | 
			
		||||
            os.makedirs(os.path.dirname(query_result.path))
 | 
			
		||||
        except OSError:
 | 
			
		||||
            current_app.logger.error(
 | 
			
		||||
                'Make dir {} led to an OSError!'.format(query_result.path)
 | 
			
		||||
            )
 | 
			
		||||
            db.session.rollback()
 | 
			
		||||
            flash('Internal Server Error', 'error')
 | 
			
		||||
            return make_response(
 | 
			
		||||
                {'redirect_url': url_for('.add_query_result')}, 500)
 | 
			
		||||
        # save the uploaded file
 | 
			
		||||
        form.file.data.save(query_result.path)
 | 
			
		||||
        # parse json from file
 | 
			
		||||
        with open(query_result.path, 'r') as file:
 | 
			
		||||
            query_result_file_content = json.load(file)
 | 
			
		||||
        # parse json schema
 | 
			
		||||
        # with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file:  # noqa
 | 
			
		||||
        #     schema = json.load(file)
 | 
			
		||||
        # try:
 | 
			
		||||
        #     # validate imported json file
 | 
			
		||||
        #     validate(instance=query_result_file_content, schema=schema)
 | 
			
		||||
        # except Exception:
 | 
			
		||||
        #     tasks.delete_query_result(query_result.id)
 | 
			
		||||
        #     flash('Uploaded file is invalid', 'result')
 | 
			
		||||
        #     return make_response(
 | 
			
		||||
        #         {'redirect_url': url_for('.add_query_result')}, 201)
 | 
			
		||||
        query_result_file_content.pop('matches')
 | 
			
		||||
        query_result_file_content.pop('cpos_lookup')
 | 
			
		||||
        query_result.query_metadata = query_result_file_content
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
        flash('Query result added!', 'result')
 | 
			
		||||
        return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201)  # noqa
 | 
			
		||||
    return render_template('corpora/query_results/add_query_result.html.j2',
 | 
			
		||||
                           form=form, title='Add query result')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/result/<int:query_result_id>')
 | 
			
		||||
@login_required
 | 
			
		||||
def query_result(query_result_id):
 | 
			
		||||
    abort(503)
 | 
			
		||||
    query_result = QueryResult.query.get_or_404(query_result_id)
 | 
			
		||||
    if not (query_result.creator == current_user
 | 
			
		||||
            or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    return render_template('corpora/query_results/query_result.html.j2',
 | 
			
		||||
                           query_result=query_result, title='Query result')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/result/<int:query_result_id>/inspect')
 | 
			
		||||
@login_required
 | 
			
		||||
def inspect_query_result(query_result_id):
 | 
			
		||||
    '''
 | 
			
		||||
    View to inspect imported result file in a corpus analysis like interface
 | 
			
		||||
    '''
 | 
			
		||||
    abort(503)
 | 
			
		||||
    query_result = QueryResult.query.get_or_404(query_result_id)
 | 
			
		||||
    query_metadata = query_result.query_metadata
 | 
			
		||||
    if not (query_result.creator == current_user
 | 
			
		||||
            or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    display_options_form = DisplayOptionsForm(
 | 
			
		||||
        prefix='display-options-form',
 | 
			
		||||
        results_per_page=request.args.get('results_per_page', 30),
 | 
			
		||||
        result_context=request.args.get('context', 20)
 | 
			
		||||
    )
 | 
			
		||||
    inspect_display_options_form = InspectDisplayOptionsForm(
 | 
			
		||||
        prefix='inspect-display-options-form'
 | 
			
		||||
    )
 | 
			
		||||
    with open(query_result.path, 'r') as query_result_file:
 | 
			
		||||
        query_result_file_content = json.load(query_result_file)
 | 
			
		||||
    return render_template('corpora/query_results/inspect.html.j2',
 | 
			
		||||
                           query_result=query_result,
 | 
			
		||||
                           display_options_form=display_options_form,
 | 
			
		||||
                           inspect_display_options_form=inspect_display_options_form,  # noqa
 | 
			
		||||
                           query_result_file_content=query_result_file_content,
 | 
			
		||||
                           query_metadata=query_metadata,
 | 
			
		||||
                           title='Inspect query result')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/result/<int:query_result_id>/delete')
 | 
			
		||||
@login_required
 | 
			
		||||
def delete_query_result(query_result_id):
 | 
			
		||||
    abort(503)
 | 
			
		||||
    query_result = QueryResult.query.get_or_404(query_result_id)
 | 
			
		||||
    if not (query_result.creator == current_user
 | 
			
		||||
            or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result')  # noqa
 | 
			
		||||
    tasks.delete_query_result(query_result_id)
 | 
			
		||||
    return redirect(url_for('services.service', service="corpus_analysis"))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/result/<int:query_result_id>/download')
 | 
			
		||||
@login_required
 | 
			
		||||
def download_query_result(query_result_id):
 | 
			
		||||
    abort(503)
 | 
			
		||||
    query_result = QueryResult.query.get_or_404(query_result_id)
 | 
			
		||||
    if not (query_result.creator == current_user
 | 
			
		||||
            or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    return send_from_directory(as_attachment=True,
 | 
			
		||||
                               directory=os.path.dirname(query_result.path),
 | 
			
		||||
                               filename=query_result.filename)
 | 
			
		||||
@@ -1,16 +1,12 @@
 | 
			
		||||
from flask import (abort, current_app, flash, make_response, redirect, request,
 | 
			
		||||
from flask import (abort, current_app, flash, make_response, redirect,
 | 
			
		||||
                   render_template, url_for, send_from_directory)
 | 
			
		||||
from flask_login import current_user, login_required
 | 
			
		||||
from . import bp
 | 
			
		||||
from . import tasks
 | 
			
		||||
from .forms import (AddCorpusFileForm, AddCorpusForm, AddQueryResultForm,
 | 
			
		||||
                    EditCorpusFileForm, QueryDownloadForm, QueryForm,
 | 
			
		||||
                    DisplayOptionsForm, InspectDisplayOptionsForm,
 | 
			
		||||
from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm,
 | 
			
		||||
                    ImportCorpusForm)
 | 
			
		||||
from jsonschema import validate
 | 
			
		||||
from .. import db
 | 
			
		||||
from ..models import Corpus, CorpusFile, QueryResult
 | 
			
		||||
import json
 | 
			
		||||
from ..models import Corpus, CorpusFile
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
import glob
 | 
			
		||||
@@ -22,21 +18,22 @@ from .import_corpus import check_zip_contents
 | 
			
		||||
@bp.route('/add', methods=['GET', 'POST'])
 | 
			
		||||
@login_required
 | 
			
		||||
def add_corpus():
 | 
			
		||||
    form = AddCorpusForm()
 | 
			
		||||
    form = AddCorpusForm(prefix='add-corpus-form')
 | 
			
		||||
    if form.validate_on_submit():
 | 
			
		||||
        corpus = Corpus(creator=current_user,
 | 
			
		||||
                        description=form.description.data,
 | 
			
		||||
                        title=form.title.data)
 | 
			
		||||
        corpus = Corpus(
 | 
			
		||||
            creator=current_user,
 | 
			
		||||
            description=form.description.data,
 | 
			
		||||
            title=form.title.data
 | 
			
		||||
        )
 | 
			
		||||
        db.session.add(corpus)
 | 
			
		||||
        db.session.flush()
 | 
			
		||||
        db.session.refresh(corpus)
 | 
			
		||||
        try:
 | 
			
		||||
            os.makedirs(corpus.path)
 | 
			
		||||
        except OSError:
 | 
			
		||||
            current_app.logger.error(
 | 
			
		||||
                'Make dir {} led to an OSError!'.format(corpus.path)
 | 
			
		||||
            )
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            current_app.logger.error(f'Could not add corpus: {e}')
 | 
			
		||||
            db.session.rollback()
 | 
			
		||||
            flash('Internal Server Error', 'error')
 | 
			
		||||
            abort(500)
 | 
			
		||||
        else:
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
@@ -49,22 +46,23 @@ def add_corpus():
 | 
			
		||||
@bp.route('/import', methods=['GET', 'POST'])
 | 
			
		||||
@login_required
 | 
			
		||||
def import_corpus():
 | 
			
		||||
    abort(503)
 | 
			
		||||
    form = ImportCorpusForm()
 | 
			
		||||
    if form.is_submitted():
 | 
			
		||||
        if not form.validate():
 | 
			
		||||
            return make_response(form.errors, 400)
 | 
			
		||||
        corpus = Corpus(creator=current_user,
 | 
			
		||||
                        description=form.description.data,
 | 
			
		||||
                        title=form.title.data)
 | 
			
		||||
        corpus = Corpus(
 | 
			
		||||
            creator=current_user,
 | 
			
		||||
            description=form.description.data,
 | 
			
		||||
            title=form.title.data
 | 
			
		||||
        )
 | 
			
		||||
        db.session.add(corpus)
 | 
			
		||||
        db.session.flush()
 | 
			
		||||
        db.session.refresh(corpus)
 | 
			
		||||
        try:
 | 
			
		||||
            os.makedirs(corpus.path)
 | 
			
		||||
        except OSError:
 | 
			
		||||
            current_app.logger.error(
 | 
			
		||||
                'Make dir {} led to an OSError!'.format(corpus.path)
 | 
			
		||||
            )
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            current_app.logger.error(f'Could not import corpus: {e}')
 | 
			
		||||
            db.session.rollback()
 | 
			
		||||
            flash('Internal Server Error', 'error')
 | 
			
		||||
            return make_response(
 | 
			
		||||
@@ -128,9 +126,21 @@ def corpus(corpus_id):
 | 
			
		||||
                           corpus_files=corpus_files, title='Corpus')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<int:corpus_id>/analyse')
 | 
			
		||||
@login_required
 | 
			
		||||
def analyse_corpus(corpus_id):
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'corpora/analyse_corpus.html.j2',
 | 
			
		||||
        corpus=corpus,
 | 
			
		||||
        title=f'Analyse Corpus {corpus.title}'
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<int:corpus_id>/download')
 | 
			
		||||
@login_required
 | 
			
		||||
def download_corpus(corpus_id):
 | 
			
		||||
    abort(503)
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
    if not (corpus.creator == current_user or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
@@ -142,31 +152,6 @@ def download_corpus(corpus_id):
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<int:corpus_id>/analyse')
 | 
			
		||||
@login_required
 | 
			
		||||
def analyse_corpus(corpus_id):
 | 
			
		||||
    corpus = Corpus.query.get_or_404(corpus_id)
 | 
			
		||||
    display_options_form = DisplayOptionsForm(
 | 
			
		||||
        prefix='display-options-form',
 | 
			
		||||
        result_context=request.args.get('context', 20),
 | 
			
		||||
        results_per_page=request.args.get('results_per_page', 30)
 | 
			
		||||
    )
 | 
			
		||||
    query_form = QueryForm(prefix='query-form',
 | 
			
		||||
                           query=request.args.get('query'))
 | 
			
		||||
    query_download_form = QueryDownloadForm(prefix='query-download-form')
 | 
			
		||||
    inspect_display_options_form = InspectDisplayOptionsForm(
 | 
			
		||||
        prefix='inspect-display-options-form')
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'corpora/analyse_corpus.html.j2',
 | 
			
		||||
        corpus=corpus,
 | 
			
		||||
        display_options_form=display_options_form,
 | 
			
		||||
        inspect_display_options_form=inspect_display_options_form,
 | 
			
		||||
        query_form=query_form,
 | 
			
		||||
        query_download_form=query_download_form,
 | 
			
		||||
        title='Corpus analysis'
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<int:corpus_id>/delete')
 | 
			
		||||
@login_required
 | 
			
		||||
def delete_corpus(corpus_id):
 | 
			
		||||
@@ -190,20 +175,22 @@ def add_corpus_file(corpus_id):
 | 
			
		||||
            return make_response(form.errors, 400)
 | 
			
		||||
        # Save the file
 | 
			
		||||
        form.file.data.save(os.path.join(corpus.path, form.file.data.filename))
 | 
			
		||||
        corpus_file = CorpusFile(address=form.address.data,
 | 
			
		||||
                                 author=form.author.data,
 | 
			
		||||
                                 booktitle=form.booktitle.data,
 | 
			
		||||
                                 chapter=form.chapter.data,
 | 
			
		||||
                                 corpus=corpus,
 | 
			
		||||
                                 editor=form.editor.data,
 | 
			
		||||
                                 filename=form.file.data.filename,
 | 
			
		||||
                                 institution=form.institution.data,
 | 
			
		||||
                                 journal=form.journal.data,
 | 
			
		||||
                                 pages=form.pages.data,
 | 
			
		||||
                                 publisher=form.publisher.data,
 | 
			
		||||
                                 publishing_year=form.publishing_year.data,
 | 
			
		||||
                                 school=form.school.data,
 | 
			
		||||
                                 title=form.title.data)
 | 
			
		||||
        corpus_file = CorpusFile(
 | 
			
		||||
            address=form.address.data,
 | 
			
		||||
            author=form.author.data,
 | 
			
		||||
            booktitle=form.booktitle.data,
 | 
			
		||||
            chapter=form.chapter.data,
 | 
			
		||||
            corpus=corpus,
 | 
			
		||||
            editor=form.editor.data,
 | 
			
		||||
            filename=form.file.data.filename,
 | 
			
		||||
            institution=form.institution.data,
 | 
			
		||||
            journal=form.journal.data,
 | 
			
		||||
            pages=form.pages.data,
 | 
			
		||||
            publisher=form.publisher.data,
 | 
			
		||||
            publishing_year=form.publishing_year.data,
 | 
			
		||||
            school=form.school.data,
 | 
			
		||||
            title=form.title.data
 | 
			
		||||
        )
 | 
			
		||||
        db.session.add(corpus_file)
 | 
			
		||||
        corpus.status = 'unprepared'
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
@@ -298,122 +285,3 @@ def prepare_corpus(corpus_id):
 | 
			
		||||
    else:
 | 
			
		||||
        flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error')  # noqa
 | 
			
		||||
    return redirect(url_for('.corpus', corpus_id=corpus_id))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Following are view functions to add, view etc. exported results.
 | 
			
		||||
@bp.route('/result/add', methods=['GET', 'POST'])
 | 
			
		||||
@login_required
 | 
			
		||||
def add_query_result():
 | 
			
		||||
    '''
 | 
			
		||||
    View to import a result as a json file.
 | 
			
		||||
    '''
 | 
			
		||||
    form = AddQueryResultForm(prefix='add-query-result-form')
 | 
			
		||||
    if form.is_submitted():
 | 
			
		||||
        if not form.validate():
 | 
			
		||||
            return make_response(form.errors, 400)
 | 
			
		||||
        query_result = QueryResult(creator=current_user,
 | 
			
		||||
                                   description=form.description.data,
 | 
			
		||||
                                   filename=form.file.data.filename,
 | 
			
		||||
                                   title=form.title.data)
 | 
			
		||||
        db.session.add(query_result)
 | 
			
		||||
        db.session.flush()
 | 
			
		||||
        db.session.refresh(query_result)
 | 
			
		||||
        try:
 | 
			
		||||
            os.makedirs(os.path.dirname(query_result.path))
 | 
			
		||||
        except OSError:
 | 
			
		||||
            current_app.logger.error(
 | 
			
		||||
                'Make dir {} led to an OSError!'.format(query_result.path)
 | 
			
		||||
            )
 | 
			
		||||
            db.session.rollback()
 | 
			
		||||
            flash('Internal Server Error', 'error')
 | 
			
		||||
            return make_response(
 | 
			
		||||
                {'redirect_url': url_for('.add_query_result')}, 500)
 | 
			
		||||
        # save the uploaded file
 | 
			
		||||
        form.file.data.save(query_result.path)
 | 
			
		||||
        # parse json from file
 | 
			
		||||
        with open(query_result.path, 'r') as file:
 | 
			
		||||
            query_result_file_content = json.load(file)
 | 
			
		||||
        # parse json schema
 | 
			
		||||
        # with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file:  # noqa
 | 
			
		||||
        #     schema = json.load(file)
 | 
			
		||||
        # try:
 | 
			
		||||
        #     # validate imported json file
 | 
			
		||||
        #     validate(instance=query_result_file_content, schema=schema)
 | 
			
		||||
        # except Exception:
 | 
			
		||||
        #     tasks.delete_query_result(query_result.id)
 | 
			
		||||
        #     flash('Uploaded file is invalid', 'result')
 | 
			
		||||
        #     return make_response(
 | 
			
		||||
        #         {'redirect_url': url_for('.add_query_result')}, 201)
 | 
			
		||||
        query_result_file_content.pop('matches')
 | 
			
		||||
        query_result_file_content.pop('cpos_lookup')
 | 
			
		||||
        query_result.query_metadata = query_result_file_content
 | 
			
		||||
        db.session.commit()
 | 
			
		||||
        flash('Query result added!', 'result')
 | 
			
		||||
        return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201)  # noqa
 | 
			
		||||
    return render_template('corpora/query_results/add_query_result.html.j2',
 | 
			
		||||
                           form=form, title='Add query result')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/result/<int:query_result_id>')
 | 
			
		||||
@login_required
 | 
			
		||||
def query_result(query_result_id):
 | 
			
		||||
    query_result = QueryResult.query.get_or_404(query_result_id)
 | 
			
		||||
    if not (query_result.creator == current_user
 | 
			
		||||
            or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    return render_template('corpora/query_results/query_result.html.j2',
 | 
			
		||||
                           query_result=query_result, title='Query result')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/result/<int:query_result_id>/inspect')
 | 
			
		||||
@login_required
 | 
			
		||||
def inspect_query_result(query_result_id):
 | 
			
		||||
    '''
 | 
			
		||||
    View to inspect imported result file in a corpus analysis like interface
 | 
			
		||||
    '''
 | 
			
		||||
    query_result = QueryResult.query.get_or_404(query_result_id)
 | 
			
		||||
    query_metadata = query_result.query_metadata
 | 
			
		||||
    if not (query_result.creator == current_user
 | 
			
		||||
            or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    display_options_form = DisplayOptionsForm(
 | 
			
		||||
        prefix='display-options-form',
 | 
			
		||||
        results_per_page=request.args.get('results_per_page', 30),
 | 
			
		||||
        result_context=request.args.get('context', 20)
 | 
			
		||||
    )
 | 
			
		||||
    inspect_display_options_form = InspectDisplayOptionsForm(
 | 
			
		||||
        prefix='inspect-display-options-form'
 | 
			
		||||
    )
 | 
			
		||||
    with open(query_result.path, 'r') as query_result_file:
 | 
			
		||||
        query_result_file_content = json.load(query_result_file)
 | 
			
		||||
    return render_template('corpora/query_results/inspect.html.j2',
 | 
			
		||||
                           query_result=query_result,
 | 
			
		||||
                           display_options_form=display_options_form,
 | 
			
		||||
                           inspect_display_options_form=inspect_display_options_form,  # noqa
 | 
			
		||||
                           query_result_file_content=query_result_file_content,
 | 
			
		||||
                           query_metadata=query_metadata,
 | 
			
		||||
                           title='Inspect query result')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/result/<int:query_result_id>/delete')
 | 
			
		||||
@login_required
 | 
			
		||||
def delete_query_result(query_result_id):
 | 
			
		||||
    query_result = QueryResult.query.get_or_404(query_result_id)
 | 
			
		||||
    if not (query_result.creator == current_user
 | 
			
		||||
            or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result')  # noqa
 | 
			
		||||
    tasks.delete_query_result(query_result_id)
 | 
			
		||||
    return redirect(url_for('services.service', service="corpus_analysis"))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/result/<int:query_result_id>/download')
 | 
			
		||||
@login_required
 | 
			
		||||
def download_query_result(query_result_id):
 | 
			
		||||
    query_result = QueryResult.query.get_or_404(query_result_id)
 | 
			
		||||
    if not (query_result.creator == current_user
 | 
			
		||||
            or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    return send_from_directory(as_attachment=True,
 | 
			
		||||
                               directory=os.path.dirname(query_result.path),
 | 
			
		||||
                               filename=query_result.filename)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user