compatibility fixes and add reimplementations

This commit is contained in:
Patrick Jentsch
2020-03-28 19:29:19 +01:00
parent 970d7024e0
commit e3fde2d5c9
12 changed files with 499 additions and 44 deletions

View File

@ -3,3 +3,4 @@ from flask import Blueprint
corpora = Blueprint('corpora', __name__)
from . import events, views # noqa
from . import pj_events, pj_views # noqa

View File

@ -59,14 +59,14 @@ class Subcorpus:
else:
lc_rbound = max(0, (match_start - 1))
if lc_rbound != match_start:
lc_lbound = max(0, match_start - context)
lc_lbound = max(0, (match_start - 1 - context))
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
else:
cpos_list_lbound = match_start
rc_lbound = min((match_end + 1), (self.corpus.size - 1))
if rc_lbound != match_end:
rc_rbound = min((match_end + context),
rc_rbound = min((match_end + 1 + context),
(self.corpus.size - 1))
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound

View File

@ -72,9 +72,8 @@ def corpus_analysis_query(query):
logger.warning('Aborting due to status being "abort".')
break
else:
chunk = results.dump_values(context=context,
first_result=chunk_start,
num_results=chunk_size)
chunk = results.export(context=context, cutoff=chunk_size,
expand_lists=True, offset=chunk_start)
if (results.size == 0):
progress = 100
else:

View File

@ -15,7 +15,8 @@ class AddCorpusFileForm(FlaskForm):
journal = StringField('Journal', validators=[Length(0, 255)])
pages = StringField('Pages', validators=[Length(0, 255)])
publisher = StringField('Publisher', validators=[Length(0, 255)])
publishing_year = IntegerField('Publishing year', validators=[DataRequired()])
publishing_year = IntegerField('Publishing year',
validators=[DataRequired()])
school = StringField('School', validators=[Length(0, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 255)])
@ -43,7 +44,8 @@ class EditCorpusFileForm(FlaskForm):
journal = StringField('Journal', validators=[Length(0, 255)])
pages = StringField('Pages', validators=[Length(0, 255)])
publisher = StringField('Publisher', validators=[Length(0, 255)])
publishing_year = IntegerField('Publishing year', validators=[DataRequired()])
publishing_year = IntegerField('Publishing year',
validators=[DataRequired()])
school = StringField('School', validators=[Length(0, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 255)])

112
app/corpora/pj_events.py Normal file
View File

@ -0,0 +1,112 @@
from flask import current_app, request
from flask_login import current_user
from .cqi import CQiClient
from .. import db, socketio
from ..decorators import socketio_login_required
from ..events import connected_sessions
from ..models import Corpus, User
import math
'''
' A dictionary containing lists of, with corpus ids associated, Socket.IO
' session ids (sid). {<corpus_id>: [<sid>, ...], ...}
'''
pj_corpus_analysis_sessions = {}
'''
' A dictionary containing Socket.IO session id - CQi client pairs.
' {<sid>: CQiClient, ...}
'''
pj_corpus_analysis_clients = {}
@socketio.on('pj_corpus_analysis_init')
@socketio_login_required
def pj_init_corpus_analysis(corpus_id):
socketio.start_background_task(pj_corpus_analysis_session_handler,
current_app._get_current_object(),
corpus_id, current_user.id, request.sid)
@socketio.on('pj_corpus_analysis_query')
@socketio_login_required
def pj_corpus_analysis_query(query):
client = pj_corpus_analysis_clients.get(request.sid)
if client is None:
response = {'code': 404, 'msg': 'Failed Dependency'}
socketio.emit('pj_corpus_analysis_query', response, room=request.sid)
return
corpus = client.corpora.get('CORPUS')
try:
results = corpus.query(query)
except Exception as e:
response = {'code': 1, 'msg': str(e)}
socketio.emit('pj_corpus_analysis_query', response, room=request.sid)
else:
response = {'code': 200, 'msg': 'OK',
'data': {'num_matches': results.size}}
socketio.emit('pj_corpus_analysis_query', response, room=request.sid)
chunk_size = 100
chunk_start = 0
context = 100
progress = 0
while chunk_start <= results.size:
chunk = results.export(context=context, offset=chunk_start,
cutoff=chunk_size)
if (results.size == 0):
progress = 100
else:
progress = ((chunk_start + chunk_size) / results.size) * 100
progress = min(100, int(math.ceil(progress)))
socketio.emit('pj_corpus_analysis_query_results',
{'chunk': chunk,
'progress': progress},
room=request.sid)
chunk_start += chunk_size
chunk_size = 250
def pj_corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
with app.app_context():
''' Setup analysis session '''
corpus = Corpus.query.get(corpus_id)
user = User.query.get(user_id)
if corpus is None:
response = {'code': 404, 'msg': 'Not Found'}
socketio.emit('pj_corpus_analysis_init', response, room=session_id)
return
elif not (corpus.creator == user or user.is_administrator()):
response = {'code': 403, 'msg': 'Forbidden'}
socketio.emit('pj_corpus_analysis_init', response, room=session_id)
return
while corpus.status != 'analysing':
db.session.refresh(corpus)
socketio.sleep(3)
client = CQiClient('pj_corpus_{}_analysis'.format(corpus_id))
try:
client.connect()
except Exception:
response = {'code': 500, 'msg': 'Internal Server Error'}
socketio.emit('pj_corpus_analysis_init', response, room=session_id)
return
pj_corpus_analysis_clients[session_id] = client
if corpus_id not in pj_corpus_analysis_sessions:
pj_corpus_analysis_sessions[corpus_id] = [session_id]
else:
pj_corpus_analysis_sessions[corpus_id].append(session_id)
response = {'code': 200, 'msg': 'OK'}
socketio.emit('pj_corpus_analysis_init', response, room=session_id)
''' Observe analysis session '''
while session_id in connected_sessions:
socketio.sleep(3)
''' Teardown analysis session '''
try:
client.disconnect()
except Exception:
pass
pj_corpus_analysis_clients.pop(session_id, None)
pj_corpus_analysis_sessions[corpus_id].remove(session_id)
if not pj_corpus_analysis_sessions[corpus_id]:
pj_corpus_analysis_sessions.pop(corpus_id, None)
corpus.status = 'stop analysis'
db.session.commit()

35
app/corpora/pj_forms.py Normal file
View File

@ -0,0 +1,35 @@
from flask_wtf import FlaskForm
from wtforms import BooleanField, StringField, SubmitField, SelectField
class PJQueryForm(FlaskForm):
query = StringField('Query')
submit = SubmitField('Send query')
class PJDisplayOptionsForm(FlaskForm):
expert_mode = BooleanField('Expert mode')
result_context = SelectField('Result context',
choices=[('', 'Choose your option'),
('10', '10'),
('20', '20'),
('30', '30'),
('40', '40'),
('50', '50'),
('60', '60'),
('70', '70'),
('80', '80'),
('90', '90'),
('100', '100')])
results_per_page = SelectField('Results per page',
choices=[('', 'Choose your option'),
('10', '10'),
('20', '20'),
('30', '30'),
('40', '40'),
('50', '50'),
('60', '60'),
('70', '70'),
('80', '80'),
('90', '90'),
('100', '100')])

25
app/corpora/pj_views.py Normal file
View File

@ -0,0 +1,25 @@
from flask import request, render_template
from flask_login import login_required
from . import corpora
from .pj_forms import PJDisplayOptionsForm, PJQueryForm
from .. import db
from ..models import Corpus
@corpora.route('/<int:corpus_id>/pj_analyse')
@login_required
def pj_analyse_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if corpus.status == 'prepared':
corpus.status = 'start analysis'
db.session.commit()
display_options_form = PJDisplayOptionsForm(
prefix='display-options-form',
result_context=request.args.get('context', 20),
results_per_page=request.args.get('results_per_page', 30))
query_form = PJQueryForm(prefix='query-form',
query=request.args.get('query'))
return render_template('corpora/pj_analyse_corpus.html.j2',
corpus_id=corpus_id,
display_options_form=display_options_form,
query_form=query_form, title='Corpus analysis')