Change directory structure (move ./nopaque/* to ./)

This commit is contained in:
Patrick Jentsch
2021-07-20 15:07:42 +02:00
parent ff39d8d650
commit d6ab379418
231 changed files with 26 additions and 23 deletions

5
app/corpora/__init__.py Normal file
View File

@ -0,0 +1,5 @@
from flask import Blueprint
corpora = Blueprint('corpora', __name__)
from . import events, views # noqa

307
app/corpora/events.py Normal file
View File

@ -0,0 +1,307 @@
from datetime import datetime
from flask import current_app, request
from flask_login import current_user
from socket import gaierror
from .. import db, socketio
from ..decorators import socketio_login_required
from ..events import socketio_sessions
from ..models import Corpus
import cqi
import math
import os
import shutil
'''
' A dictionary containing lists of, with corpus ids associated, Socket.IO
' session ids (sid). {<corpus_id>: [<sid>, ...], ...}
'''
corpus_analysis_sessions = {}
'''
' A dictionary containing Socket.IO session id - CQi client pairs.
' {<sid>: CQiClient, ...}
'''
corpus_analysis_clients = {}
@socketio.on('corpus_analysis_init')
@socketio_login_required
def init_corpus_analysis(corpus_id):
corpus = Corpus.query.get(corpus_id)
if corpus is None:
response = {'code': 404, 'desc': None, 'msg': 'Not Found'}
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if not (corpus.creator == current_user or current_user.is_administrator()): # noqa
response = {'code': 403, 'desc': None, 'msg': 'Forbidden'}
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if corpus.status not in ['prepared', 'start analysis', 'analysing']:
response = {'code': 424, 'desc': 'Corpus status is not "prepared", "start analysis" or "analying"', 'msg': 'Failed Dependency'} # noqa
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if corpus.status == 'prepared':
corpus.status = 'start analysis'
db.session.commit()
event = 'user_{}_patch'.format(current_user.id)
jsonpatch = [{'op': 'replace', 'path': '/corpora/{}/status'.format(corpus.id), 'value': corpus.status}] # noqa
room = 'user_{}'.format(corpus.user_id)
socketio.emit(event, jsonpatch, room=room)
socketio.start_background_task(corpus_analysis_session_handler,
current_app._get_current_object(),
corpus_id, current_user.id, request.sid)
def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
with app.app_context():
''' Setup analysis session '''
corpus = Corpus.query.get(corpus_id)
retry_counter = 15
while corpus.status != 'analysing':
db.session.refresh(corpus)
retry_counter -= 1
if retry_counter == 0:
response = {'code': 408, 'desc': 'Corpus analysis session took to long to start', 'msg': 'Request Timeout'} # noqa
socketio.emit('corpus_analysis_init', response, room=request.sid) # noqa
socketio.sleep(3)
client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
try:
connect_status = client.connect()
payload = {'code': connect_status, 'msg': cqi.api.specification.lookup[connect_status]} # noqa
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_init', e, session_id)
return
except gaierror:
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error'} # noqa
socketio.emit('corpus_analysis_init', response, room=session_id)
return
corpus_analysis_clients[session_id] = client
if corpus_id in corpus_analysis_sessions:
corpus_analysis_sessions[corpus_id].append(session_id)
else:
corpus_analysis_sessions[corpus_id] = [session_id]
client.status = 'ready'
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_init', response, room=session_id)
''' Observe analysis session '''
while session_id in socketio_sessions:
socketio.sleep(3)
''' Teardown analysis session '''
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.1)
try:
client.disconnect()
except cqi.errors.CQiException:
pass
corpus_analysis_clients.pop(session_id, None)
corpus_analysis_sessions[corpus_id].remove(session_id)
if not corpus_analysis_sessions[corpus_id]:
corpus_analysis_sessions.pop(corpus_id, None)
corpus.status = 'stop analysis'
db.session.commit()
event = 'user_{}_patch'.format(corpus.user_id)
jsonpatch = [{'op': 'replace', 'path': '/corpora/{}/status'.format(corpus.id), 'value': corpus.status}] # noqa
room = 'user_{}'.format(corpus.user_id)
socketio.emit(event, jsonpatch, room=room)
@socketio.on('corpus_analysis_meta_data')
@socketio_login_required
def corpus_analysis_get_meta_data(corpus_id):
# get meta data from db
db_corpus = Corpus.query.get(corpus_id)
metadata = {}
metadata['corpus_name'] = db_corpus.title
metadata['corpus_description'] = db_corpus.description
metadata['corpus_creation_date'] = db_corpus.creation_date.isoformat()
metadata['corpus_last_edited_date'] = \
db_corpus.last_edited_date.isoformat()
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': 'No client found for this session',
'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
return
# check if client is busy or not
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.1)
# get meta data from corpus in cqp server
client.status = 'running'
try:
client_corpus = client.corpora.get('CORPUS')
metadata['corpus_properties'] = client_corpus.attrs['properties']
metadata['corpus_size_tokens'] = client_corpus.attrs['size']
text_attr = client_corpus.structural_attributes.get('text')
struct_attrs = client_corpus.structural_attributes.list(
filters={'part_of': text_attr})
text_ids = range(0, (text_attr.attrs['size']))
texts_metadata = {}
for text_id in text_ids:
texts_metadata[text_id] = {}
for struct_attr in struct_attrs:
texts_metadata[text_id][struct_attr.attrs['name'][(len(text_attr.attrs['name']) + 1):]] = struct_attr.values_by_ids(list(range(struct_attr.attrs['size'])))[text_id] # noqa
metadata['corpus_all_texts'] = texts_metadata
metadata['corpus_analysis_date'] = datetime.utcnow().isoformat()
metadata['corpus_cqi_py_protocol_version'] = client.api.version
metadata['corpus_cqi_py_package_version'] = cqi.__version__
# TODO: make this dynamically
metadata['corpus_cqpserver_version'] = 'CQPserver v3.4.22'
# write some metadata to the db
db_corpus.current_nr_of_tokens = metadata['corpus_size_tokens']
db.session.commit()
event = 'user_{}_patch'.format(db_corpus.user_id)
jsonpatch = [{'op': 'replace', 'path': '/corpora/{}/current_nr_of_tokens'.format(db_corpus.id), 'value': db_corpus.current_nr_of_tokens}] # noqa
room = 'user_{}'.format(db_corpus.user_id)
socketio.emit(event, jsonpatch, room=room)
# emit data
payload = metadata
response = {'code': 200, 'desc': 'Corpus meta data', 'msg': 'OK',
'payload': payload}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
except cqi.errors.CQiException as e:
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
'payload': payload}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
client.status = 'ready'
@socketio.on('corpus_analysis_query')
@socketio_login_required
def corpus_analysis_query(query):
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': 'No client found for this session',
'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_query', response, room=request.sid)
return
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.1)
client.status = 'running'
try:
corpus = client.corpora.get('CORPUS')
query_status = corpus.query(query)
results = corpus.subcorpora.get('Results')
except cqi.errors.CQiException as e:
client.status = 'ready'
handle_cqi_exception('corpus_analysis_query', e, request.sid)
return
payload = {'status': query_status,
'msg': cqi.api.specification.lookup[query_status],
'match_count': results.attrs['size']}
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_query', response, room=request.sid)
chunk_size = 100
chunk_start = 0
context = 50
progress = 0
while chunk_start <= results.attrs['size']:
if client.status == 'abort':
break
try:
chunk = results.export(context=context, cutoff=chunk_size, offset=chunk_start) # noqa
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_query', e, request.sid)
break
if (results.attrs['size'] == 0):
progress = 100
else:
progress = ((chunk_start + chunk_size) / results.attrs['size']) * 100 # noqa
progress = min(100, int(math.ceil(progress)))
payload = {'chunk': chunk, 'progress': progress}
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_query_results', response, room=request.sid) # noqa
chunk_start += chunk_size
client.status = 'ready'
@socketio.on('corpus_analysis_get_match_with_full_context')
@socketio_login_required
def corpus_analysis_get_match_with_full_context(payload):
type = payload['type']
data_indexes = payload['data_indexes']
first_cpos = payload['first_cpos']
last_cpos = payload['last_cpos']
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': 'No client found for this session',
'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_get_match_with_full_context', response,
room=request.sid)
return
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.1)
client.status = 'running'
try:
corpus = client.corpora.get('CORPUS')
s = corpus.structural_attributes.get('s')
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa
return
i = 0
# Send data one match at a time.
for index, f_cpos, l_cpos in zip(data_indexes, first_cpos, last_cpos):
i += 1
matches = []
cpos_lookup = text_lookup = {}
try:
tmp = s.export(f_cpos, l_cpos, context=10)
except cqi.errors.CQiException as e:
handle_cqi_exception('corpus_analysis_get_match_with_full_context', e, request.sid) # noqa
break
matches.append(tmp['matches'][0])
cpos_lookup.update(tmp['cpos_lookup'])
text_lookup.update(tmp['text_lookup'])
progress = i / len(data_indexes) * 100
payload = {'matches': matches, 'progress': progress,
'cpos_lookup': cpos_lookup, 'text_lookup': text_lookup}
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload,
'type': type, 'data_indexes': data_indexes}
socketio.emit('corpus_analysis_get_match_with_full_context',
response, room=request.sid)
client.status = 'ready'
@socketio.on('export_corpus')
@socketio_login_required
def export_corpus(corpus_id):
corpus = Corpus.query.get(corpus_id)
if corpus is None:
response = {'code': 404, 'msg': 'Not found'}
socketio.emit('export_corpus', response, room=request.sid)
return
if corpus.status != 'prepared':
response = {'code': 412, 'msg': 'Precondition Failed'}
socketio.emit('export_corpus', response, room=request.sid)
return
# delete old corpus archive if it exists/has been build before
if corpus.archive_file is not None and os.path.isfile(corpus.archive_file):
os.remove(corpus.archive_file)
zip_name = corpus.title
zip_path = os.path.join(current_user.path, 'corpora', zip_name)
corpus.archive_file = os.path.join(corpus.path, zip_name) + '.zip'
db.session.commit()
shutil.make_archive(zip_path, 'zip', corpus.path)
shutil.move(zip_path + '.zip', corpus.archive_file)
socketio.emit('export_corpus_' + str(corpus.id), room=request.sid)
def handle_cqi_exception(event, exception, room):
response = {'code': 500,
'desc': None,
'msg': 'Internal Server Error',
'payload': {'code': exception.code,
'desc': exception.description,
'msg': exception.name}}
socketio.emit(event, response, room=room)

363
app/corpora/events2.py Normal file
View File

@ -0,0 +1,363 @@
from datetime import datetime
from flask import current_app, request
from flask_login import current_user
from socket import gaierror
from .. import db, socketio
from ..decorators import socketio_login_required
from ..events import socketio_sessions
from ..models import Corpus, User
import cqi
import math
import os
import shutil
import logging
'''
' A dictionary containing lists of, with corpus ids associated, Socket.IO
' session ids (sid). {<corpus_id>: [<sid>, ...], ...}
'''
corpus_analysis_sessions = {}
'''
' A dictionary containing Socket.IO session id - CQi client pairs.
' {<sid>: CQiClient, ...}
'''
corpus_analysis_clients = {}
@socketio.on('corpus_analysis_init')
@socketio_login_required
def init_corpus_analysis(corpus_id):
corpus = Corpus.query.get(corpus_id)
if corpus is None:
response = {'code': 404, 'desc': None, 'msg': 'Not Found'}
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if not (corpus.creator == current_user or current_user.is_administrator()): # noqa
response = {'code': 403, 'desc': None, 'msg': 'Forbidden'}
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if corpus.status not in ['prepared', 'start analysis', 'analysing']:
response = {'code': 424, 'desc': 'Corpus status is not "prepared", "start analysis" or "analying"', 'msg': 'Failed Dependency'} # noqa
socketio.emit('corpus_analysis_init', response, room=request.sid)
return
if corpus.status == 'prepared':
corpus.status = 'start analysis'
db.session.commit()
event = 'user_{}_patch'.format(current_user.id)
jsonpatch = [{'op': 'replace', 'path': '/corpora/{}/status'.format(corpus.id), 'value': corpus.status}] # noqa
room = 'user_{}'.format(corpus.user_id)
socketio.emit(event, jsonpatch, room=room)
socketio.start_background_task(corpus_analysis_session_handler,
current_app._get_current_object(),
corpus_id, current_user.id, request.sid)
def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
with app.app_context():
''' Setup analysis session '''
corpus = Corpus.query.get(corpus_id)
retry_counter = 15
while corpus.status != 'analysing':
db.session.refresh(corpus)
retry_counter -= 1
if retry_counter == 0:
response = {'code': 408, 'desc': 'Corpus analysis session took to long to start', 'msg': 'Request Timeout'} # noqa
socketio.emit('corpus_analysis_init', response, room=request.sid) # noqa
socketio.sleep(3)
client = cqi.CQiClient('cqpserver_{}'.format(corpus_id))
try:
connect_status = client.connect()
payload = {'code': connect_status, 'msg': cqi.api.specification.lookup[connect_status]} # noqa
except cqi.errors.CQiException as e:
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
response = {'code': 500, 'desc': None,
'msg': 'Internal Server Error', 'payload': payload}
socketio.emit('corpus_analysis_init', response, room=session_id)
return
except gaierror:
response = {'code': 500, 'desc': None,
'msg': 'Internal Server Error'}
socketio.emit('corpus_analysis_init', response, room=session_id)
return
corpus_analysis_clients[session_id] = client
if corpus_id in corpus_analysis_sessions:
corpus_analysis_sessions[corpus_id].append(session_id)
else:
corpus_analysis_sessions[corpus_id] = [session_id]
client.status = 'ready'
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_init', response, room=session_id)
''' Observe analysis session '''
while session_id in socketio_sessions:
socketio.sleep(3)
''' Teardown analysis session '''
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.1)
try:
client.disconnect()
except cqi.errors.CQiException:
pass
corpus_analysis_clients.pop(session_id, None)
corpus_analysis_sessions[corpus_id].remove(session_id)
if not corpus_analysis_sessions[corpus_id]:
corpus_analysis_sessions.pop(corpus_id, None)
corpus.status = 'stop analysis'
db.session.commit()
event = 'user_{}_patch'.format(corpus.user_id)
jsonpatch = [{'op': 'replace', 'path': '/corpora/{}/status'.format(corpus.id), 'value': corpus.status}] # noqa
room = 'user_{}'.format(corpus.user_id)
socketio.emit(event, jsonpatch, room=room)
@socketio.on('corpus_analysis_meta_data')
@socketio_login_required
def corpus_analysis_get_meta_data(corpus_id):
# get meta data from db
db_corpus = Corpus.query.get(corpus_id)
metadata = {}
metadata['corpus_name'] = db_corpus.title
metadata['corpus_description'] = db_corpus.description
metadata['corpus_creation_date'] = db_corpus.creation_date.isoformat()
metadata['corpus_last_edited_date'] = \
db_corpus.last_edited_date.isoformat()
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': 'No client found for this session',
'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
return
# check if client is busy or not
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.1)
# get meta data from corpus in cqp server
client.status = 'running'
try:
client_corpus = client.corpora.get('CORPUS')
metadata['corpus_properties'] = client_corpus.attrs['properties']
metadata['corpus_size_tokens'] = client_corpus.attrs['size']
text_attr = client_corpus.structural_attributes.get('text')
struct_attrs = client_corpus.structural_attributes.list(
filters={'part_of': text_attr})
text_ids = range(0, (text_attr.attrs['size']))
texts_metadata = {}
for text_id in text_ids:
texts_metadata[text_id] = {}
for struct_attr in struct_attrs:
texts_metadata[text_id][struct_attr.attrs['name'][(len(text_attr.attrs['name']) + 1):]] = struct_attr.values_by_ids(list(range(struct_attr.attrs['size'])))[text_id] # noqa
metadata['corpus_all_texts'] = texts_metadata
metadata['corpus_analysis_date'] = datetime.utcnow().isoformat()
metadata['corpus_cqi_py_protocol_version'] = client.api.version
metadata['corpus_cqi_py_package_version'] = cqi.__version__
# TODO: make this dynamically
metadata['corpus_cqpserver_version'] = 'CQPserver v3.4.22'
# write some metadata to the db
db_corpus.current_nr_of_tokens = metadata['corpus_size_tokens']
db.session.commit()
event = 'user_{}_patch'.format(db_corpus.user_id)
jsonpatch = [{'op': 'replace', 'path': '/corpora/{}/current_nr_of_tokens'.format(db_corpus.id), 'value': db_corpus.current_nr_of_tokens}] # noqa
room = 'user_{}'.format(db_corpus.user_id)
socketio.emit(event, jsonpatch, room=room)
# emit data
payload = metadata
response = {'code': 200, 'desc': 'Corpus meta data', 'msg': 'OK',
'payload': payload}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
except cqi.errors.CQiException as e:
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
'payload': payload}
socketio.emit('corpus_analysis_meta_data', response, room=request.sid)
client.status = 'ready'
@socketio.on('corpus_analysis_query')
@socketio_login_required
def corpus_analysis_query(query):
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': 'No client found for this session',
'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_query', response, room=request.sid)
return
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.1)
client.status = 'running'
try:
corpus = client.corpora.get('CORPUS')
query_status = corpus.query(query)
results = corpus.subcorpora.get('Results')
except cqi.errors.CQiException as e:
client.status = 'ready'
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
'payload': payload}
socketio.emit('corpus_analysis_query', response, room=request.sid)
return
payload = {'status': query_status,
'msg': cqi.api.specification.lookup[query_status],
'match_count': results.attrs['size']}
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_query', response, room=request.sid)
chunk_size = 100
chunk_start = 0
context = 50
progress = 0
while chunk_start <= results.attrs['size']:
if client.status == 'abort':
break
try:
chunk = results.export(context=context, cutoff=chunk_size, offset=chunk_start) # noqa
except cqi.errors.CQiException as e:
client.status = 'ready'
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
'payload': payload}
socketio.emit('corpus_analysis_query', response, room=request.sid)
return
if (results.attrs['size'] == 0):
progress = 100
else:
progress = ((chunk_start + chunk_size) / results.attrs['size']) * 100 # noqa
progress = min(100, int(math.ceil(progress)))
response = {'code': 200, 'desc': None, 'msg': 'OK',
'payload': {'chunk': chunk, 'progress': progress}}
socketio.emit('corpus_analysis_query_results', response,
room=request.sid)
chunk_start += chunk_size
client.status = 'ready'
@socketio.on('corpus_analysis_inspect_match')
@socketio_login_required
def corpus_analysis_inspect_match(payload):
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': 'No client found for this session',
'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_inspect_match', response, room=request.sid) # noqa
return
match_id = payload['match_id']
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.1)
client.status = 'running'
try:
corpus = client.corpora.get('CORPUS')
results = corpus.subcorpora.get('Results')
except cqi.errors.CQiException as e:
client.status = 'ready'
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
'payload': payload}
socketio.emit('corpus_analysis_inspect_match', response, room=request.sid) # noqa
return
context = 200
try:
payload = results.export(context=context, cutoff=1, offset=match_id)
except cqi.errors.CQiException as e:
client.status = 'ready'
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
response = {'code': 500, 'desc': None, 'msg': 'Internal Server Error',
'payload': payload}
socketio.emit('corpus_analysis_inspect_match', response, room=request.sid) # noqa
return
response = {'code': 200, 'desc': None, 'msg': 'OK', 'payload': payload}
socketio.emit('corpus_analysis_inspect_match', response, room=request.sid)
client.status = 'ready'
@socketio.on('corpus_analysis_get_match_with_full_context')
@socketio_login_required
def corpus_analysis_get_match_with_full_context(payload):
type = payload['type']
data_indexes = payload['data_indexes']
first_cpos = payload['first_cpos']
last_cpos = payload['last_cpos']
client = corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 424, 'desc': 'No client found for this session',
'msg': 'Failed Dependency'}
socketio.emit('corpus_analysis_get_match_with_full_context', response,
room=request.sid)
return
if client.status == 'running':
client.status = 'abort'
while client.status != 'ready':
socketio.sleep(0.1)
client.status = 'running'
try:
corpus = client.corpora.get('CORPUS')
s = corpus.structural_attributes.get('s')
payload = {}
payload['matches'] = []
payload['cpos_lookup'] = {}
payload['text_lookup'] = {}
payload['progress'] = 0
i = 0
# Send data one match at a time.
for index, f_cpos, l_cpos in zip(data_indexes, first_cpos, last_cpos):
i += 1
tmp_match = s.export(f_cpos, l_cpos, context=10)
payload['matches'].append(tmp_match['matches'][0])
payload['cpos_lookup'].update(tmp_match['cpos_lookup'])
payload['text_lookup'].update(tmp_match['text_lookup'])
payload['progress'] = i/len(data_indexes)*100
response = {'code': 200,
'desc': None,
'msg': 'OK',
'payload': payload,
'type': type,
'data_indexes': data_indexes}
socketio.emit('corpus_analysis_get_match_with_full_context',
response, room=request.sid)
payload['matches'] = []
payload['cpos_lookup'] = {}
payload['text_lookup'] = {}
except cqi.errors.CQiException as e:
payload = {'code': e.code, 'desc': e.description, 'msg': e.name}
response = {'code': 500,
'desc': None,
'msg': 'Internal Server Error',
'payload': payload,
'type': type,
'data_indexes': data_indexes}
socketio.emit('corpus_analysis_get_match_with_full_context',
response,
room=request.sid)
client.status = 'ready'
@socketio.on('export_corpus')
@socketio_login_required
def export_corpus(corpus_id):
corpus = Corpus.query.get(corpus_id)
if corpus is None:
response = {'code': 404, 'msg': 'Not found'}
socketio.emit('export_corpus', response, room=request.sid)
return
if corpus.status != 'prepared':
response = {'code': 412, 'msg': 'Precondition Failed'}
socketio.emit('export_corpus', response, room=request.sid)
return
# delete old corpus archive if it exists/has been build before
if corpus.archive_file is not None and os.path.isfile(corpus.archive_file):
os.remove(corpus.archive_file)
zip_name = corpus.title
zip_path = os.path.join(current_user.path, 'corpora', zip_name)
corpus.archive_file = os.path.join(corpus.path, zip_name) + '.zip'
db.session.commit()
shutil.make_archive(zip_path, 'zip', corpus.path)
shutil.move(zip_path + '.zip', corpus.archive_file)
socketio.emit('export_corpus_' + str(corpus.id), room=request.sid)

166
app/corpora/forms.py Normal file
View File

@ -0,0 +1,166 @@
from flask_wtf import FlaskForm
from werkzeug.utils import secure_filename
from wtforms import (BooleanField, FileField, StringField, SubmitField,
ValidationError, IntegerField, SelectField)
from wtforms.validators import DataRequired, Length, NumberRange
class AddCorpusFileForm(FlaskForm):
'''
Form to add a .vrt corpus file to the current corpus.
'''
# Required fields
author = StringField('Author', validators=[DataRequired(), Length(1, 255)])
file = FileField('File', validators=[DataRequired()])
publishing_year = IntegerField('Publishing year',
validators=[DataRequired()])
title = StringField('Title', validators=[DataRequired(), Length(1, 255)])
# Optional fields
address = StringField('Adress', validators=[Length(0, 255)])
booktitle = StringField('Booktitle', validators=[Length(0, 255)])
chapter = StringField('Chapter', validators=[Length(0, 255)])
editor = StringField('Editor', validators=[Length(0, 255)])
institution = StringField('Institution', validators=[Length(0, 255)])
journal = StringField('Journal', validators=[Length(0, 255)])
pages = StringField('Pages', validators=[Length(0, 255)])
publisher = StringField('Publisher', validators=[Length(0, 255)])
school = StringField('School', validators=[Length(0, 255)])
submit = SubmitField()
def __init__(self, corpus, *args, **kwargs):
super().__init__(*args, **kwargs)
self.corpus = corpus
def validate_file(self, field):
if not field.data.filename.lower().endswith('.vrt'):
raise ValidationError('File does not have an approved extension: '
'.vrt')
field.data.filename = secure_filename(field.data.filename)
for corpus_file in self.corpus.files:
if field.data.filename == corpus_file.filename:
raise ValidationError('File already registered to corpus.')
class EditCorpusFileForm(FlaskForm):
'''
Form to edit meta data of one corpus file.
'''
# Required fields
author = StringField('Author', validators=[DataRequired(), Length(1, 255)])
publishing_year = IntegerField('Publishing year',
validators=[DataRequired()])
title = StringField('Title', validators=[DataRequired(), Length(1, 255)])
# Optional fields
address = StringField('Adress', validators=[Length(0, 255)])
booktitle = StringField('Booktitle', validators=[Length(0, 255)])
chapter = StringField('Chapter', validators=[Length(0, 255)])
editor = StringField('Editor', validators=[Length(0, 255)])
institution = StringField('Institution', validators=[Length(0, 255)])
journal = StringField('Journal', validators=[Length(0, 255)])
pages = StringField('Pages', validators=[Length(0, 255)])
publisher = StringField('Publisher', validators=[Length(0, 255)])
school = StringField('School', validators=[Length(0, 255)])
submit = SubmitField()
class AddCorpusForm(FlaskForm):
'''
Form to add a a new corpus.
'''
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
class ImportCorpusForm(FlaskForm):
'''
Form to import a corpus.
'''
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
file = FileField('File', validators=[DataRequired()])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def validate_file(self, field):
if not field.data.filename.lower().endswith('.zip'):
raise ValidationError('File does not have an approved extension: '
'.zip')
field.data.filename = secure_filename(field.data.filename)
class QueryForm(FlaskForm):
'''
Form to submit a query to the server which is executed via cqi-py.
'''
query = StringField('Query',
validators=[DataRequired(), Length(1, 1024)])
submit = SubmitField('Search')
class DisplayOptionsForm(FlaskForm):
'''
Form to alter how the matches are represented to the user by the user.
'''
expert_mode = BooleanField('Expert mode')
result_context = SelectField('Result context',
choices=[('', 'Choose your option'),
('10', '10'),
('20', '20'),
('30', '30'),
('40', '40'),
('50', '50')])
results_per_page = SelectField('Results per page',
choices=[('', 'Choose your option'),
('10', '10'),
('20', '20'),
('30', '30'),
('40', '40'),
('50', '50')])
class InspectDisplayOptionsForm(FlaskForm):
'''
Form for the inspect modal where the user can interact with how the current
match is being represented to him.
'''
expert_mode_inspect = BooleanField('Expert mode')
highlight_sentences = BooleanField('Split sentences')
context_sentences = IntegerField('Context sentences',
validators=[NumberRange(min=0, max=10)],
default=3)
class QueryDownloadForm(FlaskForm):
'''
Form to choose in what file format the analysis results are being
downloaded. WIP.
'''
file_type = SelectField('File type',
choices=[('', 'Choose file type'),
('csv', 'csv'),
('json', 'json'),
('excel', 'excel'),
('html', 'html-table')],
validators=[DataRequired()])
class AddQueryResultForm(FlaskForm):
'''
Form used to import one result json file.
'''
description = StringField('Description',
validators=[DataRequired(), Length(1, 255)])
file = FileField('File', validators=[DataRequired()])
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
submit = SubmitField()
def validate_file(self, field):
if not field.data.filename.lower().endswith('.json'):
raise ValidationError('File does not have an approved extension: '
'.json')
field.data.filename = secure_filename(field.data.filename)

View File

@ -0,0 +1,89 @@
check_zip_contents = ['data/',
'merged/',
'registry/',
'registry/corpus',
'data/corpus/',
'data/corpus/text_editor.avs',
'data/corpus/pos.lexicon',
'data/corpus/simple_pos.huf',
'data/corpus/word.huf',
'data/corpus/text_booktitle.avs',
'data/corpus/word.lexicon.srt',
'data/corpus/word.lexicon.idx',
'data/corpus/simple_pos.crx',
'data/corpus/text_pages.rng',
'data/corpus/simple_pos.crc',
'data/corpus/ner.lexicon',
'data/corpus/lemma.huf',
'data/corpus/text_title.rng',
'data/corpus/text_chapter.avx',
'data/corpus/lemma.lexicon.srt',
'data/corpus/lemma.lexicon.idx',
'data/corpus/text_school.rng',
'data/corpus/text_journal.avs',
'data/corpus/simple_pos.lexicon',
'data/corpus/pos.huf',
'data/corpus/text_editor.avx',
'data/corpus/lemma.crc',
'data/corpus/lemma.lexicon',
'data/corpus/pos.hcd',
'data/corpus/text_title.avx',
'data/corpus/text_institution.avs',
'data/corpus/text_address.avx',
'data/corpus/lemma.corpus.cnt',
'data/corpus/word.crx',
'data/corpus/simple_pos.hcd',
'data/corpus/simple_pos.huf.syn',
'data/corpus/simple_pos.lexicon.srt',
'data/corpus/text_author.avx',
'data/corpus/text_publisher.avs',
'data/corpus/text_chapter.avs',
'data/corpus/ner.corpus.cnt',
'data/corpus/pos.huf.syn',
'data/corpus/text_booktitle.rng',
'data/corpus/lemma.huf.syn',
'data/corpus/pos.corpus.cnt',
'data/corpus/word.lexicon',
'data/corpus/text_publishing_year.avs',
'data/corpus/lemma.hcd',
'data/corpus/text_school.avs',
'data/corpus/text_journal.rng',
'data/corpus/word.corpus.cnt',
'data/corpus/text_school.avx',
'data/corpus/text_journal.avx',
'data/corpus/pos.lexicon.srt',
'data/corpus/text_title.avs',
'data/corpus/word.hcd',
'data/corpus/text_chapter.rng',
'data/corpus/text_address.rng',
'data/corpus/ner.hcd',
'data/corpus/text_publisher.avx',
'data/corpus/text_institution.rng',
'data/corpus/lemma.crx',
'data/corpus/pos.crc',
'data/corpus/text_author.rng',
'data/corpus/text_address.avs',
'data/corpus/pos.lexicon.idx',
'data/corpus/ner.huf',
'data/corpus/ner.huf.syn',
'data/corpus/text_pages.avs',
'data/corpus/text_publishing_year.avx',
'data/corpus/ner.lexicon.idx',
'data/corpus/text.rng',
'data/corpus/word.crc',
'data/corpus/ner.crc',
'data/corpus/text_publisher.rng',
'data/corpus/text_editor.rng',
'data/corpus/text_author.avs',
'data/corpus/s.rng',
'data/corpus/text_publishing_year.rng',
'data/corpus/simple_pos.corpus.cnt',
'data/corpus/simple_pos.lexicon.idx',
'data/corpus/word.huf.syn',
'data/corpus/ner.lexicon.srt',
'data/corpus/text_pages.avx',
'data/corpus/text_booktitle.avx',
'data/corpus/pos.crx',
'data/corpus/ner.crx',
'data/corpus/text_institution.avx',
'merged/corpus.vrt']

62
app/corpora/tasks.py Normal file
View File

@ -0,0 +1,62 @@
from .. import db, socketio
from ..decorators import background
from ..models import Corpus, CorpusFile, QueryResult
@background
def build_corpus(corpus_id, *args, **kwargs):
app = kwargs['app']
with app.app_context():
corpus = Corpus.query.get(corpus_id)
if corpus is None:
raise Exception('Corpus {} not found'.format(corpus_id))
corpus.build()
db.session.commit()
event = 'user_{}_patch'.format(corpus.user_id)
jsonpatch = [{'op': 'replace', 'path': '/corpora/{}/last_edited_date'.format(corpus.id), 'value': corpus.last_edited_date.timestamp()}, # noqa
{'op': 'replace', 'path': '/corpora/{}/status'.format(corpus.id), 'value': corpus.status}] # noqa
room = 'user_{}'.format(corpus.user_id)
socketio.emit(event, jsonpatch, room=room)
@background
def delete_corpus(corpus_id, *args, **kwargs):
with kwargs['app'].app_context():
corpus = Corpus.query.get(corpus_id)
if corpus is None:
raise Exception('Corpus {} not found'.format(corpus_id))
event = 'user_{}_patch'.format(corpus.user_id)
jsonpatch = [{'op': 'remove', 'path': '/corpora/{}'.format(corpus.id)}]
room = 'user_{}'.format(corpus.user_id)
corpus.delete()
db.session.commit()
socketio.emit(event, jsonpatch, room=room)
@background
def delete_corpus_file(corpus_file_id, *args, **kwargs):
with kwargs['app'].app_context():
corpus_file = CorpusFile.query.get(corpus_file_id)
if corpus_file is None:
raise Exception('Corpus file {} not found'.format(corpus_file_id))
event = 'user_{}_patch'.format(corpus_file.corpus.user_id)
jsonpatch = [{'op': 'remove', 'path': '/corpora/{}/files/{}'.format(corpus_file.corpus_id, corpus_file.id)}, # noqa
{'op': 'replace', 'path': '/corpora/{}/status'.format(corpus_file.corpus_id), 'value': corpus_file.corpus.status}] # noqa
room = 'user_{}'.format(corpus_file.corpus.user_id)
corpus_file.delete()
db.session.commit()
socketio.emit(event, jsonpatch, room=room)
@background
def delete_query_result(query_result_id, *args, **kwargs):
with kwargs['app'].app_context():
query_result = QueryResult.query.get(query_result_id)
if query_result is None:
raise Exception('QueryResult {} not found'.format(query_result_id))
event = 'user_{}_patch'.format(query_result.user_id)
jsonpatch = [{'op': 'remove', 'path': '/query_results/{}'.format(query_result.id)}] # noqa
room = 'user_{}'.format(query_result.user_id)
query_result.delete()
db.session.commit()
socketio.emit(event, jsonpatch, room=room)

431
app/corpora/views.py Normal file
View File

@ -0,0 +1,431 @@
from flask import (abort, flash, make_response, redirect, request,
render_template, url_for, send_from_directory)
from flask_login import current_user, login_required
from . import corpora
from . import tasks
from .forms import (AddCorpusFileForm, AddCorpusForm, AddQueryResultForm,
EditCorpusFileForm, QueryDownloadForm, QueryForm,
DisplayOptionsForm, InspectDisplayOptionsForm,
ImportCorpusForm)
from jsonschema import validate
from .. import db, socketio
from ..models import Corpus, CorpusFile, QueryResult
import json
import logging
import os
import shutil
import glob
import xml.etree.ElementTree as ET
from zipfile import ZipFile
from .import_corpus import check_zip_contents
@corpora.route('/add', methods=['GET', 'POST'])
@login_required
def add_corpus():
form = AddCorpusForm()
if form.validate_on_submit():
corpus = Corpus(creator=current_user,
description=form.description.data,
title=form.title.data)
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
os.makedirs(corpus.path)
except OSError:
logging.error('Make dir {} led to an OSError!'.format(corpus.path))
db.session.rollback()
abort(500)
else:
db.session.commit()
flash('Corpus "{}" added!'.format(corpus.title), 'corpus')
event = 'user_{}_patch'.format(corpus.user_id)
jsonpatch = [{'op': 'add', 'path': '/corpora/{}'.format(corpus.id), 'value': corpus.to_dict()}] # noqa
room = 'user_{}'.format(corpus.user_id)
socketio.emit(event, jsonpatch, room=room)
return redirect(url_for('.corpus', corpus_id=corpus.id))
return render_template('corpora/add_corpus.html.j2', form=form,
title='Add corpus')
@corpora.route('/import', methods=['GET', 'POST'])
@login_required
def import_corpus():
form = ImportCorpusForm()
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
corpus = Corpus(creator=current_user,
description=form.description.data,
title=form.title.data)
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
os.makedirs(corpus.path)
except OSError:
logging.error('Make dir {} led to an OSError!'.format(corpus.path))
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
{'redirect_url': url_for('.import_corpus')}, 500)
# Upload zip
archive_file = os.path.join(corpus.path, form.file.data.filename)
form.file.data.save(archive_file)
# Some checks to verify it is a valid exported corpus
with ZipFile(archive_file, 'r') as zip:
contents = zip.namelist()
if set(check_zip_contents).issubset(contents):
# Unzip
shutil.unpack_archive(archive_file, corpus.path)
# Register vrt files to corpus
vrts = glob.glob(corpus.path + '/*.vrt')
for file in vrts:
element_tree = ET.parse(file)
text_node = element_tree.find('text')
corpus_file = CorpusFile(
address=text_node.get('address', 'NULL'),
author=text_node.get('author', 'NULL'),
booktitle=text_node.get('booktitle', 'NULL'),
chapter=text_node.get('chapter', 'NULL'),
corpus=corpus,
editor=text_node.get('editor', 'NULL'),
filename=os.path.basename(file),
institution=text_node.get('institution', 'NULL'),
journal=text_node.get('journal', 'NULL'),
pages=text_node.get('pages', 'NULL'),
publisher=text_node.get('publisher', 'NULL'),
publishing_year=text_node.get('publishing_year', ''),
school=text_node.get('school', 'NULL'),
title=text_node.get('title', 'NULL')
)
db.session.add(corpus_file)
# finish import and redirect to imported corpus
corpus.status = 'prepared'
db.session.commit()
os.remove(archive_file)
flash('Corpus "{}" imported!'.format(corpus.title), 'corpus')
event = 'user_{}_patch'.format(corpus.user_id)
jsonpatch = [{'op': 'add', 'path': '/corpora/{}'.format(corpus.id), 'value': corpus.to_dict()}] # noqa
room = 'user_{}'.format(corpus.user_id)
socketio.emit(event, jsonpatch, room=room)
return make_response(
{'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201)
else:
# If imported zip is not valid delete corpus and give feedback
flash('Can not import corpus "{}" not imported: Invalid archive file!', 'error') # noqa
tasks.delete_corpus(corpus.id)
return make_response(
{'redirect_url': url_for('.import_corpus')}, 201)
return render_template('corpora/import_corpus.html.j2', form=form,
title='Import Corpus')
@corpora.route('/<int:corpus_id>')
@login_required
def corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
corpus_files = [corpus_file.to_dict() for corpus_file in corpus.files]
return render_template('corpora/corpus.html.j2', corpus=corpus,
corpus_files=corpus_files, title='Corpus')
@corpora.route('/<int:corpus_id>/download')
@login_required
def download_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
# TODO: Check what happens here
dir = os.path.dirname(corpus.archive_file)
filename = os.path.basename(corpus.archive_file)
return send_from_directory(as_attachment=True, directory=dir,
filename=filename, mimetype='zip')
@corpora.route('/<int:corpus_id>/analyse')
@login_required
def analyse_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
display_options_form = DisplayOptionsForm(
prefix='display-options-form',
result_context=request.args.get('context', 20),
results_per_page=request.args.get('results_per_page', 30)
)
query_form = QueryForm(prefix='query-form',
query=request.args.get('query'))
query_download_form = QueryDownloadForm(prefix='query-download-form')
inspect_display_options_form = InspectDisplayOptionsForm(
prefix='inspect-display-options-form')
return render_template(
'corpora/analyse_corpus.html.j2',
corpus=corpus,
display_options_form=display_options_form,
inspect_display_options_form=inspect_display_options_form,
query_form=query_form,
query_download_form=query_download_form,
title='Corpus analysis'
)
@corpora.route('/<int:corpus_id>/delete')
@login_required
def delete_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
flash('Corpus "{}" marked for deletion!'.format(corpus.title), 'corpus')
tasks.delete_corpus(corpus_id)
return redirect(url_for('main.dashboard'))
@corpora.route('/<int:corpus_id>/files/add', methods=['GET', 'POST'])
@login_required
def add_corpus_file(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
form = AddCorpusFileForm(corpus, prefix='add-corpus-file-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
# Save the file
form.file.data.save(os.path.join(corpus.path, form.file.data.filename))
corpus_file = CorpusFile(address=form.address.data,
author=form.author.data,
booktitle=form.booktitle.data,
chapter=form.chapter.data,
corpus=corpus,
editor=form.editor.data,
filename=form.file.data.filename,
institution=form.institution.data,
journal=form.journal.data,
pages=form.pages.data,
publisher=form.publisher.data,
publishing_year=form.publishing_year.data,
school=form.school.data,
title=form.title.data)
db.session.add(corpus_file)
corpus.status = 'unprepared'
db.session.commit()
flash('Corpus file "{}" added!'.format(corpus_file.filename), 'corpus')
event = 'user_{}_patch'.format(corpus.user_id)
jsonpatch = [{'op': 'replace', 'path': '/corpora/{}/status'.format(corpus.id), 'value': corpus.status}, # noqa
{'op': 'add', 'path': '/corpora/{}/files/{}'.format(corpus.id, corpus_file.id), 'value': corpus_file.to_dict()}] # noqa
room = 'user_{}'.format(corpus.user_id)
socketio.emit(event, jsonpatch, room=room)
return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) # noqa
return render_template('corpora/add_corpus_file.html.j2', corpus=corpus,
form=form, title='Add corpus file')
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>/delete')
@login_required
def delete_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
if not corpus_file.corpus_id == corpus_id:
abort(404)
if not (corpus_file.corpus.creator == current_user
or current_user.is_administrator()):
abort(403)
flash('Corpus file "{}" marked for deletion!'.format(corpus_file.filename), 'corpus') # noqa
tasks.delete_corpus_file(corpus_file_id)
return redirect(url_for('.corpus', corpus_id=corpus_id))
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>/download')
@login_required
def download_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
if not corpus_file.corpus_id == corpus_id:
abort(404)
if not (corpus_file.corpus.creator == current_user
or current_user.is_administrator()):
abort(403)
return send_from_directory(as_attachment=True,
directory=os.path.dirname(corpus_file.path),
filename=corpus_file.filename)
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>',
methods=['GET', 'POST'])
@login_required
def corpus_file(corpus_id, corpus_file_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
if corpus_file.corpus != corpus:
abort(404)
form = EditCorpusFileForm(prefix='edit-corpus-file-form')
if form.validate_on_submit():
corpus_file.address = form.address.data
corpus_file.author = form.author.data
corpus_file.booktitle = form.booktitle.data
corpus_file.chapter = form.chapter.data
corpus_file.editor = form.editor.data
corpus_file.institution = form.institution.data
corpus_file.journal = form.journal.data
corpus_file.pages = form.pages.data
corpus_file.publisher = form.publisher.data
corpus_file.publishing_year = form.publishing_year.data
corpus_file.school = form.school.data
corpus_file.title = form.title.data
corpus.status = 'unprepared'
db.session.commit()
flash('Corpus file "{}" edited!'.format(corpus_file.filename), 'corpus') # noqa
return redirect(url_for('.corpus', corpus_id=corpus_id))
# If no form is submitted or valid, fill out fields with current values
form.address.data = corpus_file.address
form.author.data = corpus_file.author
form.booktitle.data = corpus_file.booktitle
form.chapter.data = corpus_file.chapter
form.editor.data = corpus_file.editor
form.institution.data = corpus_file.institution
form.journal.data = corpus_file.journal
form.pages.data = corpus_file.pages
form.publisher.data = corpus_file.publisher
form.publishing_year.data = corpus_file.publishing_year
form.school.data = corpus_file.school
form.title.data = corpus_file.title
return render_template('corpora/corpus_file.html.j2', corpus=corpus,
corpus_file=corpus_file, form=form,
title='Edit corpus file')
@corpora.route('/<int:corpus_id>/prepare')
@login_required
def prepare_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
if corpus.files.all():
tasks.build_corpus(corpus_id)
flash('Corpus "{}" has been marked to get build!'.format(corpus.title), 'corpus') # noqa
else:
flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error') # noqa
return redirect(url_for('.corpus', corpus_id=corpus_id))
# Following are view functions to add, view etc. exported results.
@corpora.route('/result/add', methods=['GET', 'POST'])
@login_required
def add_query_result():
'''
View to import a result as a json file.
'''
form = AddQueryResultForm(prefix='add-query-result-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
query_result = QueryResult(creator=current_user,
description=form.description.data,
filename=form.file.data.filename,
title=form.title.data)
db.session.add(query_result)
db.session.flush()
db.session.refresh(query_result)
try:
os.makedirs(os.path.dirname(query_result.path))
except OSError:
logging.error('Make dir {} led to an OSError!'.format(query_result.path)) # noqa
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
{'redirect_url': url_for('.add_query_result')}, 500)
# save the uploaded file
form.file.data.save(query_result.path)
# parse json from file
with open(query_result.path, 'r') as file:
query_result_file_content = json.load(file)
# parse json schema
# with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa
# schema = json.load(file)
# try:
# # validate imported json file
# validate(instance=query_result_file_content, schema=schema)
# except Exception:
# tasks.delete_query_result(query_result.id)
# flash('Uploaded file is invalid', 'result')
# return make_response(
# {'redirect_url': url_for('.add_query_result')}, 201)
query_result_file_content.pop('matches')
query_result_file_content.pop('cpos_lookup')
query_result.query_metadata = query_result_file_content
db.session.commit()
event = 'user_{}_patch'.format(query_result.user_id)
jsonpatch = [{'op': 'add', 'path': '/query_results/{}'.format(query_result.id), 'value': query_result.to_dict()}] # noqa
room = 'user_{}'.format(query_result.user_id)
socketio.emit(event, jsonpatch, room=room)
flash('Query result added!', 'result')
return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
return render_template('corpora/query_results/add_query_result.html.j2',
form=form, title='Add query result')
@corpora.route('/result/<int:query_result_id>')
@login_required
def query_result(query_result_id):
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
return render_template('corpora/query_results/query_result.html.j2',
query_result=query_result, title='Query result')
@corpora.route('/result/<int:query_result_id>/inspect')
@login_required
def inspect_query_result(query_result_id):
'''
View to inspect imported result file in a corpus analysis like interface
'''
query_result = QueryResult.query.get_or_404(query_result_id)
query_metadata = query_result.query_metadata
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
display_options_form = DisplayOptionsForm(
prefix='display-options-form',
results_per_page=request.args.get('results_per_page', 30),
result_context=request.args.get('context', 20)
)
inspect_display_options_form = InspectDisplayOptionsForm(
prefix='inspect-display-options-form'
)
with open(query_result.path, 'r') as query_result_file:
query_result_file_content = json.load(query_result_file)
return render_template('corpora/query_results/inspect.html.j2',
query_result=query_result,
display_options_form=display_options_form,
inspect_display_options_form=inspect_display_options_form, # noqa
query_result_file_content=query_result_file_content,
query_metadata=query_metadata,
title='Inspect query result')
@corpora.route('/result/<int:query_result_id>/delete')
@login_required
def delete_query_result(query_result_id):
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
tasks.delete_query_result(query_result_id)
return redirect(url_for('services.service', service="corpus_analysis"))
@corpora.route('/result/<int:query_result_id>/download')
@login_required
def download_query_result(query_result_id):
query_result = QueryResult.query.get_or_404(query_result_id)
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
return send_from_directory(as_attachment=True,
directory=os.path.dirname(query_result.path),
filename=query_result.filename)