mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-24 00:30:35 +00:00
Add export options to subcorpora
This commit is contained in:
parent
8b8df68781
commit
14820643ed
@ -1,13 +1,9 @@
|
||||
from flask import session
|
||||
import cqi
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio, export_subcorpus
|
||||
from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
|
||||
@ -109,6 +105,16 @@ def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient,
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
@ -116,8 +122,4 @@ def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, co
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
file_path = os.path.join(corpus.path, f'{subcorpus_name}.json')
|
||||
with open(file_path, 'w') as file:
|
||||
json.dump(cqi_subcorpus_export, file)
|
||||
return {'code': 200, 'msg': 'OK'}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}
|
||||
|
@ -68,7 +68,7 @@ def lookups_by_cpos(corpus, cpos_list):
|
||||
cpos_attr_values[i]
|
||||
for attr in corpus.structural_attributes.list():
|
||||
# We only want to iterate over non subattributes, identifiable by
|
||||
# attr.attrs['has_values']==False
|
||||
# attr.attrs['has_values'] == False
|
||||
if attr.attrs['has_values']:
|
||||
continue
|
||||
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
|
||||
@ -93,43 +93,86 @@ def lookups_by_cpos(corpus, cpos_list):
|
||||
return lookups
|
||||
|
||||
|
||||
def partial_export_subcorpus(subcorpus, match_id_list, context=25):
|
||||
if subcorpus.attrs['size'] == 0:
|
||||
return {"matches": []}
|
||||
match_boundaries = []
|
||||
for match_id in match_id_list:
|
||||
if match_id < 0 or match_id >= subcorpus.attrs['size']:
|
||||
continue
|
||||
match_boundaries.append(
|
||||
(
|
||||
match_id,
|
||||
subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0],
|
||||
subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0]
|
||||
)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_boundary in match_boundaries:
|
||||
match_num, match_start, match_end = match_boundary
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.attrs['size'] - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
||||
|
||||
|
||||
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
|
||||
if subcorpus.attrs['size'] == 0:
|
||||
return {"matches": []}
|
||||
first_match = max(0, offset)
|
||||
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
|
||||
match_boundaries = zip(
|
||||
subcorpus.dump(
|
||||
subcorpus.attrs['fields']['match'], first_match, last_match),
|
||||
subcorpus.dump(
|
||||
subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
||||
list(range(first_match, last_match + 1)),
|
||||
subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match),
|
||||
subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
match_num = offset + 1
|
||||
for match_start, match_end in match_boundaries:
|
||||
for match_num, match_start, match_end in match_boundaries:
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - 1 - context))
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if (match_end == (subcorpus.collection.corpus.attrs['size'] - 1)
|
||||
or context == 0):
|
||||
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(match_end + 1 + context,
|
||||
subcorpus.collection.corpus.attrs['size'] - 1)
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.attrs['size'] - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
match_num += 1
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
||||
|
@ -401,6 +401,25 @@ class CQiSubcorpus {
|
||||
});
|
||||
}
|
||||
|
||||
partial_export(matchIdList, context=50) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const args = {
|
||||
corpus_name: this.corpus.name,
|
||||
subcorpus_name: this.name,
|
||||
match_id_list: matchIdList,
|
||||
context: context
|
||||
};
|
||||
|
||||
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', args, response => {
|
||||
if (response.code === 200) {
|
||||
resolve(response.payload);
|
||||
} else {
|
||||
reject(response);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fdst_1(cutoff, field, attribute) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const args = {
|
||||
|
@ -47,6 +47,8 @@ class CorpusAnalysisConcordance {
|
||||
this.data.corpus.o.query(subcorpusName, query)
|
||||
.then(cQiStatus => {
|
||||
subcorpus.q = query;
|
||||
subcorpus.selectedItems = new Set();
|
||||
if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;}
|
||||
return this.data.corpus.o.subcorpora.get(subcorpusName);
|
||||
})
|
||||
.then(cQiSubcorpus => {
|
||||
@ -56,8 +58,6 @@ class CorpusAnalysisConcordance {
|
||||
.then(
|
||||
paginatedSubcorpus => {
|
||||
subcorpus.p = paginatedSubcorpus;
|
||||
subcorpus.selectedItems = {};
|
||||
if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;}
|
||||
this.data.subcorpora[subcorpusName] = subcorpus;
|
||||
this.settings.selectedSubcorpus = subcorpusName;
|
||||
this.renderSubcorpusList();
|
||||
@ -154,48 +154,140 @@ class CorpusAnalysisConcordance {
|
||||
renderSubcorpusActions() {
|
||||
this.clearSubcorpusActions();
|
||||
this.elements.subcorpusActions.innerHTML += `
|
||||
<a class="btn-floating btn-small tooltipped waves-effect waves-light corpus-analysis-action download-subcorpus-selection-trigger" data-tooltip="Download subcorpus selection">
|
||||
<i class="material-icons">playlist_add_check</i>
|
||||
<a class="btn-floating btn-small tooltipped waves-effect waves-light corpus-analysis-action subcorpus-export-trigger" data-tooltip="Export subcorpus">
|
||||
<i class="material-icons">export</i>
|
||||
</a>
|
||||
<a class="btn-floating btn-small tooltipped waves-effect waves-light corpus-analysis-action download-subcorpus-trigger" data-tooltip="Download subcorpus">
|
||||
<i class="material-icons">file_download</i>
|
||||
</a>
|
||||
<a class="btn-floating btn-small red tooltipped waves-effect waves-light corpus-analysis-action delete-subcorpus-trigger" data-tooltip="Delete subcorpus">
|
||||
<a class="btn-floating btn-small red tooltipped waves-effect waves-light corpus-analysis-action subcorpus-delete-trigger" data-tooltip="Delete subcorpus">
|
||||
<i class="material-icons">delete</i>
|
||||
</a>
|
||||
`.trim();
|
||||
M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped'));
|
||||
this.elements.subcorpusActions.querySelector('.download-subcorpus-trigger').addEventListener('click', event => {
|
||||
event.preventDefault();
|
||||
app.flash('This feature is currently not available', 'error');
|
||||
});
|
||||
this.elements.subcorpusActions.querySelector('.download-subcorpus-selection-trigger').addEventListener('click', event => {
|
||||
this.elements.subcorpusActions.querySelector('.subcorpus-export-trigger').addEventListener('click', event => {
|
||||
event.preventDefault();
|
||||
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
|
||||
if (JSON.stringify(subcorpus.selectedItems) === '{}') {app.flash('No items selected', 'error'); return;}
|
||||
let csvContent = 'sep=,\r\n';
|
||||
csvContent += '"#Match","Text title","Left context","KWIC","Right context"\r\n';
|
||||
for (let selectedItem of Object.values(subcorpus.selectedItems)) {
|
||||
csvContent += `"${selectedItem.num}",`;
|
||||
csvContent += `"${selectedItem.textTitle.replace('"', '""')}",`;
|
||||
csvContent += `"${selectedItem.leftContext.replace('"', '""')}",`;
|
||||
csvContent += `"${selectedItem.kwic.replace('"', '""')}",`;
|
||||
csvContent += `"${selectedItem.rightContext.replace('"', '""')}"\r\n`;
|
||||
}
|
||||
// Create a blob
|
||||
let blob = new Blob([csvContent], {type: 'text/csv;charset=utf-8;'});
|
||||
let url = URL.createObjectURL(blob);
|
||||
|
||||
// Create a link to download it
|
||||
let pom = document.createElement('a');
|
||||
pom.href = url;
|
||||
pom.setAttribute('download', 'export.csv');
|
||||
pom.click();
|
||||
// console.log(csvContent);
|
||||
// let encodedUri = encodeURI(csvContent);
|
||||
// window.open(encodedUri);
|
||||
let modalElementId = Utils.generateElementId('export-subcorpus-modal-');
|
||||
let exportFormatSelectElementId = Utils.generateElementId('export-format-select-');
|
||||
let exportSelectedMatchesOnlyCheckboxElementId = Utils.generateElementId('export-selected-matches-only-checkbox-');
|
||||
let exportFileNameInputElementId = Utils.generateElementId('export-file-name-input-');
|
||||
let modalElement = Utils.HTMLToElement(
|
||||
`
|
||||
<div class="modal" id="${modalElementId}">
|
||||
<div class="modal-content">
|
||||
<h4>Export subcorpus "${subcorpus.o.name}"</h4>
|
||||
<br>
|
||||
<div class="row">
|
||||
<div class="input-field col s3">
|
||||
<select id="${exportFormatSelectElementId}">
|
||||
<option value="csv" selected>CSV</option>
|
||||
<option value="json">JSON</option>
|
||||
</select>
|
||||
<label>Export format</label>
|
||||
</div>
|
||||
<div class="input-field col s9">
|
||||
<input id="${exportFileNameInputElementId}" type="text" class="validate" value="export">
|
||||
<label class="active" for="${exportFileNameInputElementId}">Export filename without filename extension (.csv/.json/...)</label>
|
||||
</div>
|
||||
<p class="col s12">
|
||||
<label>
|
||||
<input id="${exportSelectedMatchesOnlyCheckboxElementId}" type="checkbox" ${subcorpus.selectedItems.size === 0 ? '' : 'checked'}>
|
||||
<span>Export selected matches only</span>
|
||||
</label>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<a class="btn-flat modal-close waves-effect waves-light">Cancel</a>
|
||||
<a class="action-button btn modal-close waves-effect waves-light" data-action="export">Export</a>
|
||||
</div>
|
||||
</div>
|
||||
`
|
||||
);
|
||||
document.querySelector('#modals').appendChild(modalElement);
|
||||
let exportFormatSelectElement = modalElement.querySelector(`#${exportFormatSelectElementId}`);
|
||||
let exportFormatSelect = M.FormSelect.init(exportFormatSelectElement);
|
||||
let exportSelectedMatchesOnlyCheckboxElement = modalElement.querySelector(`#${exportSelectedMatchesOnlyCheckboxElementId}`);
|
||||
let exportFileNameInputElement = modalElement.querySelector(`#${exportFileNameInputElementId}`);
|
||||
let exportButton = modalElement.querySelector('.action-button[data-action="export"]');
|
||||
let modal = M.Modal.init(
|
||||
modalElement,
|
||||
{
|
||||
dismissible: false,
|
||||
onCloseEnd: () => {
|
||||
exportFormatSelect.destroy();
|
||||
modal.destroy();
|
||||
modalElement.remove();
|
||||
}
|
||||
}
|
||||
);
|
||||
exportButton.addEventListener('click', event => {
|
||||
event.preventDefault();
|
||||
this.app.disableActionElements();
|
||||
this.elements.progress.classList.remove('hide');
|
||||
let exportFormat = exportFormatSelectElement.value;
|
||||
let exportFileName = exportFileNameInputElement.value;
|
||||
let exportFileNameExtension = exportFormat === 'csv' ? 'csv' : 'json';
|
||||
let exportFileNameWithExtension = `${exportFileName}.${exportFileNameExtension}`;
|
||||
let exportSelectedMatchesOnly = exportSelectedMatchesOnlyCheckboxElement.checked;
|
||||
let promise;
|
||||
if (exportSelectedMatchesOnly) {
|
||||
if (subcorpus.selectedItems.size === 0) {
|
||||
this.elements.progress.classList.add('hide');
|
||||
this.app.enableActionElements();
|
||||
app.flash('No matches selected', 'error');
|
||||
return;
|
||||
}
|
||||
promise = subcorpus.o.partial_export([...subcorpus.selectedItems], 50);
|
||||
} else {
|
||||
promise = subcorpus.o.export(50);
|
||||
}
|
||||
promise.then(
|
||||
data => {
|
||||
let blob;
|
||||
if (exportFormat === 'csv') {
|
||||
let csvContent = 'sep=,\r\n';
|
||||
csvContent += '"#Match","Text title","Left context","KWIC","Right context"';
|
||||
for (let match of data.matches) {
|
||||
csvContent += '\r\n';
|
||||
csvContent += `"${match.num}"`;
|
||||
csvContent += ',';
|
||||
let textIds = new Set();
|
||||
for (let cpos = match.c[0]; cpos <= match.c[1]; cpos++) {
|
||||
textIds.add(data.cpos_lookup[cpos].text);
|
||||
}
|
||||
csvContent += '"' + [...textIds].map(x => data.text_lookup[x].title.replace('"', '""')).join(', ') + '"';
|
||||
csvContent += ',';
|
||||
if (match.lc !== null) {
|
||||
let lc_cpos_list = [];
|
||||
for (let cpos = match.lc[0]; cpos <= match.lc[1]; cpos++) {lc_cpos_list.push(cpos);}
|
||||
csvContent += '"' + lc_cpos_list.map(x => data.cpos_lookup[x].word.replace('"', '""')).join(' ') + '"';
|
||||
}
|
||||
csvContent += ',';
|
||||
let c_cpos_list = [];
|
||||
for (let cpos = match.c[0]; cpos <= match.c[1]; cpos++) {c_cpos_list.push(cpos);}
|
||||
csvContent += '"' + c_cpos_list.map(x => data.cpos_lookup[x].word.replace('"', '""')).join(' ') + '"';
|
||||
csvContent += ',';
|
||||
let rc_cpos_list = [];
|
||||
for (let cpos = match.rc[0]; cpos <= match.rc[1]; cpos++) {rc_cpos_list.push(cpos);}
|
||||
if (match.rc !== null) {
|
||||
csvContent += '"' + rc_cpos_list.map(x => data.cpos_lookup[x].word.replace('"', '""')).join(' ') + '"';
|
||||
}
|
||||
}
|
||||
blob = new Blob([csvContent], {type: 'text/csv;charset=utf-8;'});
|
||||
} else {
|
||||
blob = new Blob([JSON.stringify(data, null, 2)], {type: 'application/json;charset=utf-8;'});
|
||||
}
|
||||
let url = URL.createObjectURL(blob);
|
||||
let pom = document.createElement('a');
|
||||
pom.href = url;
|
||||
pom.setAttribute('download', exportFileNameWithExtension);
|
||||
pom.click();
|
||||
this.elements.progress.classList.add('hide');
|
||||
this.app.enableActionElements();
|
||||
});
|
||||
});
|
||||
modal.open();
|
||||
});
|
||||
this.elements.subcorpusActions.querySelector('.delete-subcorpus-trigger').addEventListener('click', event => {
|
||||
this.elements.subcorpusActions.querySelector('.subcorpus-delete-trigger').addEventListener('click', event => {
|
||||
event.preventDefault();
|
||||
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
|
||||
subcorpus.o.drop().then(
|
||||
@ -296,18 +388,12 @@ class CorpusAnalysisConcordance {
|
||||
event.preventDefault();
|
||||
let itemElement = selectTriggerElement.closest('.item');
|
||||
let itemId = parseInt(itemElement.dataset.id);
|
||||
if (itemId in subcorpus.selectedItems) {
|
||||
delete subcorpus.selectedItems[itemId];
|
||||
if (subcorpus.selectedItems.has(itemId)) {
|
||||
subcorpus.selectedItems.delete(itemId);
|
||||
selectTriggerElement.classList.remove('green');
|
||||
selectTriggerElement.querySelector('i').textContent = 'add';
|
||||
} else {
|
||||
subcorpus.selectedItems[itemId] = {
|
||||
num: itemId,
|
||||
textTitle: itemElement.querySelector('.text-title').textContent,
|
||||
leftContext: [...itemElement.querySelectorAll('.left-context .p-attr')].map(x => x.textContent).join(' '),
|
||||
kwic: [...itemElement.querySelectorAll('.kwic .p-attr')].map(x => x.textContent).join(' '),
|
||||
rightContext: [...itemElement.querySelectorAll('.right-context .p-attr')].map(x => x.textContent).join(' ')
|
||||
};
|
||||
subcorpus.selectedItems.add(itemId);
|
||||
selectTriggerElement.classList.add('green');
|
||||
selectTriggerElement.querySelector('i').textContent = 'check';
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user