mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-10-31 02:32:45 +00:00 
			
		
		
		
	Add export options to subcorpora
This commit is contained in:
		| @@ -1,13 +1,9 @@ | ||||
| from flask import session | ||||
| import cqi | ||||
| import json | ||||
| import math | ||||
| import os | ||||
| from app import socketio | ||||
| from app.decorators import socketio_login_required | ||||
| from app.models import Corpus | ||||
| from . import NAMESPACE as ns | ||||
| from .utils import cqi_over_socketio, export_subcorpus | ||||
| from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus | ||||
|  | ||||
|  | ||||
| @socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns) | ||||
| @@ -109,6 +105,16 @@ def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, | ||||
|     return {'code': 200, 'msg': 'OK', 'payload': payload} | ||||
|  | ||||
|  | ||||
| @socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns) | ||||
| @socketio_login_required | ||||
| @cqi_over_socketio | ||||
| def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50):  # noqa | ||||
|     cqi_corpus = cqi_client.corpora.get(corpus_name) | ||||
|     cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) | ||||
|     cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context) | ||||
|     return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export} | ||||
|  | ||||
|  | ||||
| @socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns) | ||||
| @socketio_login_required | ||||
| @cqi_over_socketio | ||||
| @@ -116,8 +122,4 @@ def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, co | ||||
|     cqi_corpus = cqi_client.corpora.get(corpus_name) | ||||
|     cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name) | ||||
|     cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context) | ||||
|     corpus = Corpus.query.get(session['d']['corpus_id']) | ||||
|     file_path = os.path.join(corpus.path, f'{subcorpus_name}.json') | ||||
|     with open(file_path, 'w') as file: | ||||
|         json.dump(cqi_subcorpus_export, file) | ||||
|     return {'code': 200, 'msg': 'OK'} | ||||
|     return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export} | ||||
|   | ||||
| @@ -93,43 +93,86 @@ def lookups_by_cpos(corpus, cpos_list): | ||||
|     return lookups | ||||
|  | ||||
|  | ||||
| def partial_export_subcorpus(subcorpus, match_id_list, context=25): | ||||
|     if subcorpus.attrs['size'] == 0: | ||||
|         return {"matches": []} | ||||
|     match_boundaries = [] | ||||
|     for match_id in match_id_list: | ||||
|         if match_id < 0 or match_id >= subcorpus.attrs['size']: | ||||
|             continue | ||||
|         match_boundaries.append( | ||||
|             ( | ||||
|                 match_id, | ||||
|                 subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0], | ||||
|                 subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0] | ||||
|             ) | ||||
|         ) | ||||
|     cpos_set = set() | ||||
|     matches = [] | ||||
|     for match_boundary in match_boundaries: | ||||
|         match_num, match_start, match_end = match_boundary | ||||
|         c = (match_start, match_end) | ||||
|         if match_start == 0 or context == 0: | ||||
|             lc = None | ||||
|             cpos_list_lbound = match_start | ||||
|         else: | ||||
|             lc_lbound = max(0, (match_start - context)) | ||||
|             lc_rbound = match_start - 1 | ||||
|             lc = (lc_lbound, lc_rbound) | ||||
|             cpos_list_lbound = lc_lbound | ||||
|         if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0: | ||||
|             rc = None | ||||
|             cpos_list_rbound = match_end | ||||
|         else: | ||||
|             rc_lbound = match_end + 1 | ||||
|             rc_rbound = min( | ||||
|                 (match_end + context), | ||||
|                 (subcorpus.collection.corpus.attrs['size'] - 1) | ||||
|             ) | ||||
|             rc = (rc_lbound, rc_rbound) | ||||
|             cpos_list_rbound = rc_rbound | ||||
|         match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc} | ||||
|         matches.append(match) | ||||
|         cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1)) | ||||
|     lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set)) | ||||
|     return {'matches': matches, **lookups} | ||||
|  | ||||
|  | ||||
| def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0): | ||||
|     if subcorpus.attrs['size'] == 0: | ||||
|         return {"matches": []} | ||||
|     first_match = max(0, offset) | ||||
|     last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1)) | ||||
|     match_boundaries = zip( | ||||
|         subcorpus.dump( | ||||
|             subcorpus.attrs['fields']['match'], first_match, last_match), | ||||
|         subcorpus.dump( | ||||
|             subcorpus.attrs['fields']['matchend'], first_match, last_match) | ||||
|         list(range(first_match, last_match + 1)), | ||||
|         subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match), | ||||
|         subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match) | ||||
|     ) | ||||
|     cpos_set = set() | ||||
|     matches = [] | ||||
|     match_num = offset + 1 | ||||
|     for match_start, match_end in match_boundaries: | ||||
|     for match_num, match_start, match_end in match_boundaries: | ||||
|         c = (match_start, match_end) | ||||
|         if match_start == 0 or context == 0: | ||||
|             lc = None | ||||
|             cpos_list_lbound = match_start | ||||
|         else: | ||||
|             lc_lbound = max(0, (match_start - 1 - context)) | ||||
|             lc_lbound = max(0, (match_start - context)) | ||||
|             lc_rbound = match_start - 1 | ||||
|             lc = (lc_lbound, lc_rbound) | ||||
|             cpos_list_lbound = lc_lbound | ||||
|         if (match_end == (subcorpus.collection.corpus.attrs['size'] - 1) | ||||
|                 or context == 0): | ||||
|         if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0: | ||||
|             rc = None | ||||
|             cpos_list_rbound = match_end | ||||
|         else: | ||||
|             rc_lbound = match_end + 1 | ||||
|             rc_rbound = min(match_end + 1 + context, | ||||
|                             subcorpus.collection.corpus.attrs['size'] - 1) | ||||
|             rc_rbound = min( | ||||
|                 (match_end + context), | ||||
|                 (subcorpus.collection.corpus.attrs['size'] - 1) | ||||
|             ) | ||||
|             rc = (rc_lbound, rc_rbound) | ||||
|             cpos_list_rbound = rc_rbound | ||||
|         match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc} | ||||
|         matches.append(match) | ||||
|         cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1)) | ||||
|         match_num += 1 | ||||
|     lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set)) | ||||
|     return {'matches': matches, **lookups} | ||||
|   | ||||
| @@ -401,6 +401,25 @@ class CQiSubcorpus { | ||||
|     }); | ||||
|   } | ||||
|  | ||||
|   partial_export(matchIdList, context=50) { | ||||
|     return new Promise((resolve, reject) => { | ||||
|       const args = { | ||||
|         corpus_name: this.corpus.name, | ||||
|         subcorpus_name: this.name, | ||||
|         match_id_list: matchIdList, | ||||
|         context: context | ||||
|       }; | ||||
|  | ||||
|       this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', args, response => { | ||||
|         if (response.code === 200) { | ||||
|           resolve(response.payload); | ||||
|         } else { | ||||
|           reject(response); | ||||
|         } | ||||
|       }); | ||||
|     }); | ||||
|   } | ||||
|  | ||||
|   fdst_1(cutoff, field, attribute) { | ||||
|     return new Promise((resolve, reject) => { | ||||
|       const args = { | ||||
|   | ||||
| @@ -47,6 +47,8 @@ class CorpusAnalysisConcordance { | ||||
|       this.data.corpus.o.query(subcorpusName, query) | ||||
|         .then(cQiStatus => { | ||||
|           subcorpus.q = query; | ||||
|           subcorpus.selectedItems = new Set(); | ||||
|           if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;} | ||||
|           return this.data.corpus.o.subcorpora.get(subcorpusName); | ||||
|         }) | ||||
|         .then(cQiSubcorpus => { | ||||
| @@ -56,8 +58,6 @@ class CorpusAnalysisConcordance { | ||||
|         .then( | ||||
|           paginatedSubcorpus => { | ||||
|             subcorpus.p = paginatedSubcorpus; | ||||
|             subcorpus.selectedItems = {}; | ||||
|             if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;} | ||||
|             this.data.subcorpora[subcorpusName] = subcorpus; | ||||
|             this.settings.selectedSubcorpus = subcorpusName; | ||||
|             this.renderSubcorpusList(); | ||||
| @@ -154,48 +154,140 @@ class CorpusAnalysisConcordance { | ||||
|   renderSubcorpusActions() { | ||||
|     this.clearSubcorpusActions(); | ||||
|     this.elements.subcorpusActions.innerHTML += ` | ||||
|       <a class="btn-floating btn-small tooltipped waves-effect waves-light corpus-analysis-action download-subcorpus-selection-trigger" data-tooltip="Download subcorpus selection"> | ||||
|         <i class="material-icons">playlist_add_check</i> | ||||
|       <a class="btn-floating btn-small tooltipped waves-effect waves-light corpus-analysis-action subcorpus-export-trigger" data-tooltip="Export subcorpus"> | ||||
|         <i class="material-icons">export</i> | ||||
|       </a> | ||||
|       <a class="btn-floating btn-small tooltipped waves-effect waves-light corpus-analysis-action download-subcorpus-trigger" data-tooltip="Download subcorpus"> | ||||
|         <i class="material-icons">file_download</i> | ||||
|       </a> | ||||
|       <a class="btn-floating btn-small red tooltipped waves-effect waves-light corpus-analysis-action delete-subcorpus-trigger" data-tooltip="Delete subcorpus"> | ||||
|       <a class="btn-floating btn-small red tooltipped waves-effect waves-light corpus-analysis-action subcorpus-delete-trigger" data-tooltip="Delete subcorpus"> | ||||
|         <i class="material-icons">delete</i> | ||||
|       </a> | ||||
|     `.trim(); | ||||
|     M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped')); | ||||
|     this.elements.subcorpusActions.querySelector('.download-subcorpus-trigger').addEventListener('click', event => { | ||||
|       event.preventDefault(); | ||||
|       app.flash('This feature is currently not available', 'error'); | ||||
|     }); | ||||
|     this.elements.subcorpusActions.querySelector('.download-subcorpus-selection-trigger').addEventListener('click', event => { | ||||
|     this.elements.subcorpusActions.querySelector('.subcorpus-export-trigger').addEventListener('click', event => { | ||||
|       event.preventDefault(); | ||||
|       let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; | ||||
|       if (JSON.stringify(subcorpus.selectedItems) === '{}') {app.flash('No items selected', 'error'); return;} | ||||
|       let csvContent = 'sep=,\r\n'; | ||||
|       csvContent += '"#Match","Text title","Left context","KWIC","Right context"\r\n'; | ||||
|       for (let selectedItem of Object.values(subcorpus.selectedItems)) { | ||||
|         csvContent += `"${selectedItem.num}",`; | ||||
|         csvContent += `"${selectedItem.textTitle.replace('"', '""')}",`; | ||||
|         csvContent += `"${selectedItem.leftContext.replace('"', '""')}",`; | ||||
|         csvContent += `"${selectedItem.kwic.replace('"', '""')}",`; | ||||
|         csvContent += `"${selectedItem.rightContext.replace('"', '""')}"\r\n`; | ||||
|       let modalElementId = Utils.generateElementId('export-subcorpus-modal-'); | ||||
|       let exportFormatSelectElementId = Utils.generateElementId('export-format-select-'); | ||||
|       let exportSelectedMatchesOnlyCheckboxElementId = Utils.generateElementId('export-selected-matches-only-checkbox-'); | ||||
|       let exportFileNameInputElementId = Utils.generateElementId('export-file-name-input-'); | ||||
|       let modalElement = Utils.HTMLToElement( | ||||
|         ` | ||||
|           <div class="modal" id="${modalElementId}"> | ||||
|             <div class="modal-content"> | ||||
|               <h4>Export subcorpus "${subcorpus.o.name}"</h4> | ||||
|               <br> | ||||
|               <div class="row"> | ||||
|                 <div class="input-field col s3"> | ||||
|                   <select id="${exportFormatSelectElementId}"> | ||||
|                     <option value="csv" selected>CSV</option> | ||||
|                     <option value="json">JSON</option> | ||||
|                   </select> | ||||
|                   <label>Export format</label> | ||||
|                 </div> | ||||
|                 <div class="input-field col s9"> | ||||
|                   <input id="${exportFileNameInputElementId}" type="text" class="validate" value="export"> | ||||
|                   <label class="active" for="${exportFileNameInputElementId}">Export filename without filename extension (.csv/.json/...)</label> | ||||
|                 </div> | ||||
|                 <p class="col s12"> | ||||
|                   <label> | ||||
|                     <input id="${exportSelectedMatchesOnlyCheckboxElementId}" type="checkbox" ${subcorpus.selectedItems.size === 0 ? '' : 'checked'}> | ||||
|                     <span>Export selected matches only</span> | ||||
|                   </label> | ||||
|                 </p> | ||||
|               </div> | ||||
|             </div> | ||||
|             <div class="modal-footer"> | ||||
|               <a class="btn-flat modal-close waves-effect waves-light">Cancel</a> | ||||
|               <a class="action-button btn modal-close waves-effect waves-light" data-action="export">Export</a> | ||||
|             </div> | ||||
|           </div> | ||||
|         ` | ||||
|       ); | ||||
|       document.querySelector('#modals').appendChild(modalElement); | ||||
|       let exportFormatSelectElement = modalElement.querySelector(`#${exportFormatSelectElementId}`); | ||||
|       let exportFormatSelect = M.FormSelect.init(exportFormatSelectElement); | ||||
|       let exportSelectedMatchesOnlyCheckboxElement = modalElement.querySelector(`#${exportSelectedMatchesOnlyCheckboxElementId}`); | ||||
|       let exportFileNameInputElement = modalElement.querySelector(`#${exportFileNameInputElementId}`); | ||||
|       let exportButton = modalElement.querySelector('.action-button[data-action="export"]'); | ||||
|       let modal = M.Modal.init( | ||||
|         modalElement, | ||||
|         { | ||||
|           dismissible: false, | ||||
|           onCloseEnd: () => { | ||||
|             exportFormatSelect.destroy(); | ||||
|             modal.destroy(); | ||||
|             modalElement.remove(); | ||||
|           } | ||||
|         } | ||||
|       ); | ||||
|       exportButton.addEventListener('click', event => { | ||||
|         event.preventDefault(); | ||||
|         this.app.disableActionElements(); | ||||
|         this.elements.progress.classList.remove('hide'); | ||||
|         let exportFormat = exportFormatSelectElement.value; | ||||
|         let exportFileName = exportFileNameInputElement.value; | ||||
|         let exportFileNameExtension = exportFormat === 'csv' ? 'csv' : 'json'; | ||||
|         let exportFileNameWithExtension = `${exportFileName}.${exportFileNameExtension}`; | ||||
|         let exportSelectedMatchesOnly = exportSelectedMatchesOnlyCheckboxElement.checked; | ||||
|         let promise; | ||||
|         if (exportSelectedMatchesOnly) { | ||||
|           if (subcorpus.selectedItems.size === 0) { | ||||
|             this.elements.progress.classList.add('hide'); | ||||
|             this.app.enableActionElements(); | ||||
|             app.flash('No matches selected', 'error'); | ||||
|             return; | ||||
|           } | ||||
|           promise = subcorpus.o.partial_export([...subcorpus.selectedItems], 50); | ||||
|         } else { | ||||
|           promise = subcorpus.o.export(50); | ||||
|         } | ||||
|         promise.then( | ||||
|           data => { | ||||
|             let blob; | ||||
|             if (exportFormat === 'csv') { | ||||
|               let csvContent = 'sep=,\r\n'; | ||||
|               csvContent += '"#Match","Text title","Left context","KWIC","Right context"'; | ||||
|               for (let match of data.matches) { | ||||
|                 csvContent += '\r\n'; | ||||
|                 csvContent += `"${match.num}"`; | ||||
|                 csvContent += ','; | ||||
|                 let textIds = new Set(); | ||||
|                 for (let cpos = match.c[0]; cpos <= match.c[1]; cpos++) { | ||||
|                   textIds.add(data.cpos_lookup[cpos].text); | ||||
|                 } | ||||
|                 csvContent += '"' + [...textIds].map(x => data.text_lookup[x].title.replace('"', '""')).join(', ') + '"'; | ||||
|                 csvContent += ','; | ||||
|                 if (match.lc !== null) { | ||||
|                   let lc_cpos_list = []; | ||||
|                   for (let cpos = match.lc[0]; cpos <= match.lc[1]; cpos++) {lc_cpos_list.push(cpos);} | ||||
|                   csvContent += '"' + lc_cpos_list.map(x => data.cpos_lookup[x].word.replace('"', '""')).join(' ') + '"'; | ||||
|                 } | ||||
|                 csvContent += ','; | ||||
|                 let c_cpos_list = []; | ||||
|                 for (let cpos = match.c[0]; cpos <= match.c[1]; cpos++) {c_cpos_list.push(cpos);} | ||||
|                 csvContent += '"' + c_cpos_list.map(x => data.cpos_lookup[x].word.replace('"', '""')).join(' ') + '"'; | ||||
|                 csvContent += ','; | ||||
|                 let rc_cpos_list = []; | ||||
|                 for (let cpos = match.rc[0]; cpos <= match.rc[1]; cpos++) {rc_cpos_list.push(cpos);} | ||||
|                 if (match.rc !== null) { | ||||
|                   csvContent += '"' + rc_cpos_list.map(x => data.cpos_lookup[x].word.replace('"', '""')).join(' ') + '"'; | ||||
|                 } | ||||
|               } | ||||
|               blob = new Blob([csvContent], {type: 'text/csv;charset=utf-8;'}); | ||||
|             } else { | ||||
|               blob = new Blob([JSON.stringify(data, null, 2)], {type: 'application/json;charset=utf-8;'}); | ||||
|             } | ||||
|       // Create a blob | ||||
|       let blob = new Blob([csvContent], {type: 'text/csv;charset=utf-8;'}); | ||||
|             let url = URL.createObjectURL(blob); | ||||
|  | ||||
|       // Create a link to download it | ||||
|             let pom = document.createElement('a'); | ||||
|             pom.href = url; | ||||
|       pom.setAttribute('download', 'export.csv'); | ||||
|             pom.setAttribute('download', exportFileNameWithExtension); | ||||
|             pom.click(); | ||||
|       // console.log(csvContent); | ||||
|       // let encodedUri = encodeURI(csvContent); | ||||
|       // window.open(encodedUri); | ||||
|             this.elements.progress.classList.add('hide'); | ||||
|             this.app.enableActionElements(); | ||||
|         }); | ||||
|     this.elements.subcorpusActions.querySelector('.delete-subcorpus-trigger').addEventListener('click', event => { | ||||
|       }); | ||||
|       modal.open(); | ||||
|     }); | ||||
|     this.elements.subcorpusActions.querySelector('.subcorpus-delete-trigger').addEventListener('click', event => { | ||||
|       event.preventDefault(); | ||||
|       let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; | ||||
|       subcorpus.o.drop().then( | ||||
| @@ -296,18 +388,12 @@ class CorpusAnalysisConcordance { | ||||
|         event.preventDefault(); | ||||
|         let itemElement = selectTriggerElement.closest('.item'); | ||||
|         let itemId = parseInt(itemElement.dataset.id); | ||||
|         if (itemId in subcorpus.selectedItems) { | ||||
|           delete subcorpus.selectedItems[itemId]; | ||||
|         if (subcorpus.selectedItems.has(itemId)) { | ||||
|           subcorpus.selectedItems.delete(itemId); | ||||
|           selectTriggerElement.classList.remove('green'); | ||||
|           selectTriggerElement.querySelector('i').textContent = 'add'; | ||||
|         } else { | ||||
|           subcorpus.selectedItems[itemId] = { | ||||
|             num: itemId, | ||||
|             textTitle: itemElement.querySelector('.text-title').textContent, | ||||
|             leftContext: [...itemElement.querySelectorAll('.left-context .p-attr')].map(x => x.textContent).join(' '), | ||||
|             kwic: [...itemElement.querySelectorAll('.kwic .p-attr')].map(x => x.textContent).join(' '), | ||||
|             rightContext: [...itemElement.querySelectorAll('.right-context .p-attr')].map(x => x.textContent).join(' ') | ||||
|           }; | ||||
|           subcorpus.selectedItems.add(itemId); | ||||
|           selectTriggerElement.classList.add('green'); | ||||
|           selectTriggerElement.querySelector('i').textContent = 'check'; | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user