Compare commits

...

6 Commits

12 changed files with 280 additions and 88 deletions

View File

@ -62,13 +62,7 @@ CQI_FUNCTION_NAMES: List[str] = [
@socketio.on('cqi', namespace=ns)
@socketio_login_required
def cqi_over_sio(fn_data):
try:
fn_name: str = fn_data['fn_name']
except KeyError:
return {'code': 400, 'msg': 'Bad Request'}
fn_name: str = fn_data['fn_name']
fn_args: Dict = fn_data.get('fn_args', {})
def cqi_over_sio(fn_name: str, fn_args: Dict = {}):
try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
@ -77,7 +71,6 @@ def cqi_over_sio(fn_data):
if fn_name in CQI_FUNCTION_NAMES:
fn: Callable = getattr(cqi_client.api, fn_name)
elif fn_name in CQI_EXTENSION_FUNCTION_NAMES:
fn_args['cqi_client'] = cqi_client
fn: Callable = getattr(extensions_module, fn_name)
else:
return {'code': 400, 'msg': 'Bad Request'}
@ -92,14 +85,14 @@ def cqi_over_sio(fn_data):
return {'code': 400, 'msg': 'Bad Request'}
cqi_client_lock.acquire()
try:
return_value = fn(**fn_args)
fn_return_value = fn(**fn_args)
except BrokenPipeError:
return_value = {
fn_return_value = {
'code': 500,
'msg': 'Internal Server Error'
}
except CQiException as e:
return_value = {
return {
'code': 502,
'msg': 'Bad Gateway',
'payload': {
@ -110,11 +103,11 @@ def cqi_over_sio(fn_data):
}
finally:
cqi_client_lock.release()
if isinstance(return_value, CQiStatus):
if isinstance(fn_return_value, CQiStatus):
payload = {
'code': return_value.code,
'msg': return_value.__class__.__name__
'code': fn_return_value.code,
'msg': fn_return_value.__class__.__name__
}
else:
payload = return_value
payload = fn_return_value
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -22,20 +22,22 @@ CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
]
def ext_corpus_update_db(cqi_client: CQiClient, corpus: str):
def ext_corpus_update_db(corpus: str):
db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus)
db_corpus.num_tokens = cqi_corpus.size
db.session.commit()
return StatusOk()
def ext_corpus_static_data(cqi_client: CQiClient, corpus: str) -> Dict:
def ext_corpus_static_data(corpus: str) -> Dict:
db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
static_corpus_data_file = os.path.join(db_corpus.path, 'cwb', 'static.json')
if os.path.exists(static_corpus_data_file):
with open(static_corpus_data_file, 'r') as f:
return json.load(f)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus)
##########################################################################
# A faster way to get cpos boundaries for smaller s_attrs #
@ -97,7 +99,24 @@ def ext_corpus_static_data(cqi_client: CQiClient, corpus: str) -> Dict:
static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size
static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
static_corpus_data['values']['s_attrs'][s_attr.name] = {}
if s_attr.name in ['s', 'ent']:
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
first_match = 0
last_match = cqi_subcorpus.size - 1
match_boundaries = zip(
range(first_match, last_match + 1),
cqi_subcorpus.dump(cqi_subcorpus.fields['match'], first_match, last_match),
cqi_subcorpus.dump(cqi_subcorpus.fields['matchend'], first_match, last_match)
)
for id, lbound, rbound in match_boundaries:
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
cqi_subcorpus.drop()
for id in range(0, s_attr.size):
if s_attr.name not in ['s', 'ent']:
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
lbound, rbound = s_attr.cpos_by_id(id)
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
@ -137,11 +156,11 @@ def ext_corpus_static_data(cqi_client: CQiClient, corpus: str) -> Dict:
def ext_corpus_paginate_corpus(
cqi_client: CQiClient,
corpus: str,
page: int = 1,
per_page: int = 20
) -> Dict:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus)
# Sanity checks
if (
@ -182,13 +201,13 @@ def ext_corpus_paginate_corpus(
def ext_cqp_paginate_subcorpus(
cqi_client: CQiClient,
subcorpus: str,
context: int = 50,
page: int = 1,
per_page: int = 20
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
@ -230,12 +249,12 @@ def ext_cqp_paginate_subcorpus(
def ext_cqp_partial_export_subcorpus(
cqi_client: CQiClient,
subcorpus: str,
match_id_list: list,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
@ -243,11 +262,11 @@ def ext_cqp_partial_export_subcorpus(
def ext_cqp_export_subcorpus(
cqi_client: CQiClient,
subcorpus: str,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)

View File

@ -7,6 +7,8 @@ from app.decorators import content_negotiation
from app.models import Corpus, CorpusFollowerRole
from . import bp
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
import nltk
from string import punctuation
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
@ -56,6 +58,27 @@ def build_corpus(corpus_id):
}
return response_data, 202
@bp.route('/stopwords')
@content_negotiation(produces='application/json')
def get_stopwords():
# data = request.json
# if not isinstance(data, dict):
# abort(400)
# language = data.get('language')
# if not isinstance(language, str):
# abort(400)
nltk.download('stopwords')
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
stopwords = {}
for language in languages:
stopwords[language] = nltk.corpus.stopwords.words(language)
stopwords['punctuation'] = list(punctuation) + ['', '|']
stopwords['user_stopwords'] = []
print(stopwords)
response_data = {
'stopwords': stopwords
}
return response_data, 202
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')

View File

@ -42,7 +42,6 @@ def job_log(job_id):
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
log = log_file.read()
response_data = {
'message': '',
'jobLog': log
}
return response_data, 200

View File

@ -1,6 +1,8 @@
class CorpusAnalysisApp {
constructor(corpusId) {
this.data = {};
this.data = {
promises: {getStopwords: []}
};
// HTML elements
this.elements = {
@ -22,6 +24,49 @@ class CorpusAnalysisApp {
};
}
// getStopwords(language) {
// if (language in this.data.promises.getStopwords) {
// console.log('Stopwords already loaded');
// return this.data.promises.getStopwords[language];
// }
// this.data.promises.getStopwords[language] = new Promise((resolve, reject) => {
// Requests.corpora.entity.getStopwords(language)
// .then((response) => {
// response.json()
// .then((json) => {
// let stopwords = json.stopwords;
// resolve(stopwords);
// })
// .catch((error) => {
// reject(error);
// });
// });
// });
// return this.data.promises.getStopwords[language];
// }
getStopwords() {
if (this.data.promises.getStopwords.length !== 0) {
console.log('Stopwords already loaded');
return this.data.promises.getStopwords;
}
this.data.promises.getStopwords = new Promise((resolve, reject) => {
Requests.corpora.entity.getStopwords()
.then((response) => {
response.json()
.then((json) => {
let stopwords = json.stopwords;
resolve(stopwords);
})
.catch((error) => {
reject(error);
});
});
});
return this.data.promises.getStopwords;
}
init() {
this.disableActionElements();
this.elements.m.initModal.open();
@ -55,14 +100,13 @@ class CorpusAnalysisApp {
this.elements.m.initModal.close();
},
(cqiError) => {
let errorString = `${cqiError.code}: ${cqiError.constructor.name}`;
let errorsElement = this.elements.initModal.querySelector('.errors');
let progressElement = this.elements.initModal.querySelector('.progress');
errorsElement.innerText = JSON.stringify(cqiError);
errorsElement.innerText = errorString;
errorsElement.classList.remove('hide');
app.flash(errorString, 'error');
progressElement.classList.add('hide');
if ('payload' in cqiError && 'code' in cqiError.payload && 'msg' in cqiError.payload) {
app.flash(`${cqiError.payload.code}: ${cqiError.payload.msg}`, 'error');
}
}
);
@ -155,9 +199,28 @@ class CorpusAnalysisApp {
type: 'pie'
}
];
let config = {responsive: true};
let graphLayout = {
showlegend: true,
height: 486,
margin: {
l: 10,
r: 10,
b: 10,
t: 10
},
legend: {
"orientation": "h",
font: {
size: 10
}
}
};
let config = {
responsive: true,
displaylogo: false
};
Plotly.newPlot(textProportionsGraphicElement, graphData, config);
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
}
renderFrequenciesGraphic(corpusData) {
@ -165,41 +228,105 @@ class CorpusAnalysisApp {
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
let graphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
this.renderFrequenciesGraphic(corpusData);
});
graphModeButtons.forEach(graphModeButton => {
graphModeButton.addEventListener('click', (event) => {
graphModeButtons.forEach(btn => {
btn.classList.remove('disabled');
});
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
this.renderFrequenciesGraphic(corpusData);
});
});
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData);
this.createFrequenciesGraphData(tokenCategory, texts, corpusData, graphtype)
.then(graphData => {
let graphLayout = {
barmode: 'stack',
type: 'bar'
barmode: graphtype === 'bar' ? 'stack' : '',
margin: {
t: 20,
l: 50
},
yaxis: {
showticklabels: graphtype === 'markers' ? false : true
},
};
let config = {
responsive: true,
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
displaylogo: false
};
let config = {responsive: true};
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
});
}
createFrequenciesGraphData(category, texts, corpusData) {
createFrequenciesGraphData(category, texts, corpusData, graphtype) {
return new Promise((resolve, reject) => {
this.getStopwords()
.then(stopwords => {
this.renderStopwordSettingsModal(stopwords);
let stopwordList = [];
Object.values(stopwords).forEach(stopwordItems => {
stopwordItems.forEach(stopword => {
stopwordList.push(stopword);
});
});
let graphData = [];
let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
for (let item of sortedData) {
let filteredData = Object.entries(corpusData.corpus.freqs[category])
.sort((a, b) => b[1] - a[1])
.filter(item => !stopwordList.includes(corpusData.values.p_attrs[category][item[0]].toLowerCase()))
.slice(0, 5);
if (graphtype !== 'markers') {
for (let item of filteredData) {
let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
y: texts.map(text => text[1].freqs[category][item[0]]),
y: texts.map(text => text[1].freqs[category][item[0]] || 0),
name: corpusData.values.p_attrs[category][item[0]],
type: 'bar'
type: graphtype
};
graphData.push(data);
}
return graphData;
} else {
for (let item of filteredData) {
let size = texts.map(text => text[1].freqs[category][item[0]] || 0);
let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
y: texts.map(text => corpusData.values.p_attrs[category][item[0]]),
name: corpusData.values.p_attrs[category][item[0]],
text: texts.map(text => `${corpusData.values.p_attrs[category][item[0]]}<br>${text[1].freqs[category][item[0]] || 0}`),
mode: 'markers',
marker: {
size: size,
// sizeref: 2.0 * Math.max(...size) / (80**2),
// sizemode: 'area',
sizeref: 0.2
}
};
graphData.push(data);
}
}
resolve(graphData);
})
.catch(error => {
reject(error);
});
});
}
renderStopwordSettingsModal(stopwords) {
let stopwordInputField = document.querySelector('.stopword-input-field');
}
renderBoundsGraphic(corpusData) {
let boundsGraphicElement = document.querySelector('#bounds-graphic');
@ -232,7 +359,11 @@ class CorpusAnalysisApp {
}
};
let config = {responsive: true};
let config = {
responsive: true,
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
displaylogo: false
};
Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
}

View File

@ -68,13 +68,11 @@ class CorpusAnalysisConcordance {
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
},
(cqiStatus) => {
// TODDO: CHECK THIS!
this.elements.error.innerText = JSON.stringify(cqiStatus);
(cqiError) => {
let errorString = `${cqiError.code}: ${cqiError.constructor.name}`;
this.elements.error.innerText = errorString;
this.elements.error.classList.remove('hide');
if ('payload' in cqiStatus && 'code' in cqiStatus.payload && 'msg' in cqiStatus.payload) {
app.flash(`${cqiStatus.payload.code}: ${cqiStatus.payload.msg}`, 'error');
}
app.flash(errorString, 'error');
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
}
@ -313,8 +311,9 @@ class CorpusAnalysisConcordance {
this.clearSubcorpusPagination();
}
},
(cQiError) => {
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
(cqiError) => {
let errorString = `${cqiError.code}: ${cqiError.constructor.name}`;
app.flash(errorString, 'error');
}
);
});

View File

@ -45,11 +45,10 @@ class CorpusAnalysisReader {
this.app.enableActionElements();
},
(cqiError) => {
this.elements.error.innerText = JSON.stringify(error);
let errorString = `${cqiError.code}: ${cqiError.constructor.name}`;
this.elements.error.innerText = errorString;
this.elements.error.classList.remove('hide');
if ('payload' in error && 'code' in error.payload && 'msg' in error.payload) {
app.flash(`${error.payload.code}: ${error.payload.msg}`, 'error');
}
app.flash(errorString, 'error');
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
}

View File

@ -22,9 +22,11 @@ Requests.JSONfetch = (input, init={}) => {
response.json()
.then(
(json) => {
let message = json.message || json;
let message = json.message;
let category = json.category || 'message';
if (message) {
app.flash(message, category);
}
},
(error) => {
app.flash(`[${response.status}]: ${response.statusText}`, 'error');

View File

@ -31,6 +31,14 @@ Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => {
return Requests.JSONfetch(input, init);
};
Requests.corpora.entity.getStopwords = () => {
let input = `/corpora/stopwords`;
let init = {
method: 'GET'
};
return Requests.JSONfetch(input, init);
};
Requests.corpora.entity.isPublic = {};
Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
@ -43,4 +51,3 @@ Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
};

View File

@ -22,21 +22,20 @@ cqi.api.APIClient = class APIClient {
/**
* @param {string} fn_name
* @param {object} [fn_args={}]
* @returns {Promise<cqi.status.StatusConnectOk>}
* @returns {Promise}
*/
#request(fn_name, fn_args = {}) {
return new Promise((resolve, reject) => {
this.socket.timeout(this.timeout).emit('cqi', {fn_name: fn_name, fn_args: fn_args}, (timeoutError, response) => {
if (timeoutError) {
reject(timeoutError);
}
// this.socket.timeout(this.timeout).emit('cqi', {fn_name: fn_name, fn_args: fn_args}, (timeoutError, response) => {
// if (timeoutError) {
// reject(timeoutError);
// }
this.socket.emit('cqi', fn_name, fn_args, (response) => {
if (response.code === 200) {
resolve(response.payload);
}
if (response.code === 500) {
} else if (response.code === 500) {
reject(new Error(`[${response.code}] ${response.msg}`));
}
if (response.code === 502) {
} else if (response.code === 502) {
reject(new cqi.errors.lookup[response.payload.code]());
}
});

View File

@ -98,19 +98,19 @@
</div>
</div>
<div class="row">
<div class="col s6">
<div class="col s4">
<div class="card hoverable">
<div class="card-content">
<span class="card-title">Proportions</span>
<p>of texts within the corpus</p>
<div id="text-proportions-graphic"></div>
<div id="text-proportions-graphic" style="width:100"></div>
</div>
</div>
</div>
<div class="col s6">
<div class="col s8">
<div class="card hoverable">
<div class="card-content">
<span class="card-title"><a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a> Frequencies</span>
<span class="card-title">Frequencies</span>
<ul id="frequencies-token-category-dropdown" class="dropdown-content">
<li><a data-token-category="word">Word</a></li>
<li><a data-token-category="lemma">Lemma</a></li>
@ -119,6 +119,11 @@
</ul>
<p>within the texts of the 5 most frequent words in the corpus</p>
<div id="frequencies-graphic"></div>
<a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a>
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">equalizer</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a>
<a class="btn-flat modal-trigger" href="#frequencies-stopwords-setting-modal"><i class="material-icons grey-text text-darken-2">settings</i></a>
</div>
</div>
</div>
@ -161,6 +166,21 @@
</div>
</div>
<div class="modal" id="frequencies-stopwords-setting-modal">
<div class="modal-content">
<h4>Settings</h4>
<p>Here you can change the stopword-lists. Add your own stopwords or change the already existing below.</p>
<div class="chips chips-placeholder stopword-input-field"></div>
<div class="row">
<div class="input-field col s3">
<select class="stopword-language-selection"></select>
<label>Stopword language select</label>
</div>
</div>
</div>
</div>
{% for extension in extensions %}
{{ extension.modals }}
{% endfor %}

View File

@ -20,6 +20,7 @@ Flask-WTF
hiredis
MarkupSafe==2.0.1
marshmallow-sqlalchemy==0.29.0
nltk
psycopg2
PyJWT
pyScss