Big Corpus analysis update

This commit is contained in:
Patrick Jentsch
2021-11-16 15:23:57 +01:00
parent c1436c2a5d
commit f6c2292e03
47 changed files with 2549 additions and 1840 deletions

View File

@ -0,0 +1,439 @@
class CQiClient {
constructor(corpusId) {
this.socket = io(
'/corpora/corpus/corpus_analysis',
{auth: {corpus_id: corpusId}, transports: ['websocket'], upgrade: false}
);
this.connected = false;
this.corpora = new CQiCorpusCollection(this.socket);
}
connect() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.connect', response => {
if (response.code === 200) {
this.connected = true;
resolve(response.payload);
} else {
reject(response);
}
});
});
}
disconnect() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.disconnect', response => {
if (response.code === 200) {
this.connected = false;
resolve(response.payload);
} else {
reject(response);
}
});
});
}
ping() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.ping', response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
}
class CQiCorpusCollection {
constructor(socket) {
this.socket = socket;
}
get(corpusName) {
return new Promise((resolve, reject) => {
let args = {corpus_name: corpusName};
this.socket.emit('cqi.corpora.get', args, response => {
if (response.code === 200) {
resolve(new CQiCorpus(this.socket, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.corpora.list', response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiCorpus {
constructor(socket, attrs) {
this.socket = socket;
this.charset = attrs.charset;
this.name = attrs.name;
this.properties = attrs.properties;
this.size = attrs.size;
this.alignmentAttributes = new CQiAlignmentAttributeCollection(this.socket, this);
this.positionalAttributes = new CQiPositionalAttributeCollection(this.socket, this);
this.structuralAttributes = new CQiStructuralAttributeCollection(this.socket, this);
this.subcorpora = new CQiSubcorpusCollection(this.socket, this);
}
drop() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.drop', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
query(subcorpus_name, queryString) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.name,
subcorpus_name: subcorpus_name,
query: queryString
};
this.socket.emit('cqi.corpora.corpus.query', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
// nopaque specific CQi extension
paginate(page=1, perPage=20) {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.name, page: page, per_page: perPage};
this.socket.emit('cqi.corpora.corpus.paginate', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
updateDb() {
let args = {corpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.update_db', args);
}
}
class CQiAlignmentAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(alignmentAttributeName) {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name,
alignment_attribute_name: alignmentAttributeName};
this.socket.emit('cqi.corpora.corpus.alignment_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiAlignmentAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpus.alignment_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiAlignmentAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiAlignmentAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiPositionalAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(positionalAttributeName) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
positional_attribute_name: positionalAttributeName
};
this.socket.emit('cqi.corpora.corpus.positional_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiPositionalAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpus.positional_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiPositionalAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiPositionalAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.lexiconSize = attrs.lexicon_size;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiStructuralAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(structuralAttributeName) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
structural_attribute_name: structuralAttributeName
};
this.socket.emit('cqi.corpora.corpus.structural_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiStructuralAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpus.structural_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiStructuralAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiStructuralAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.hasValues = attrs.has_values;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiSubcorpusCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(subcorpusName) {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name, subcorpus_name: subcorpusName};
this.socket.emit('cqi.corpora.corpus.subcorpora.get', args, response => {
if (response.code === 200) {
resolve(new CQiSubcorpus(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpora.corpus.subcorpora.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiSubcorpus {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.fields = attrs.fields;
this.name = attrs.name;
this.size = attrs.size;
}
drop() {
return new Promise((resolve, reject) => {
let args = {corpus_name: this.corpus.name, subcorpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.drop', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
dump(field, first, last) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
field: field,
first: first,
last: last
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.dump', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
export(context=50) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
context: context
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.export', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
fdst_1(cutoff, field, attribute) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
cutoff: cutoff,
field: field,
attribute: attribute
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
fdst_2(cutoff, field1, attribute1, field2, attribute2) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
cutoff: cutoff,
field1: field1,
attribute1: attribute1,
field2: field2,
attribute2: attribute2
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
// nopaque specific CQi extension
paginate(page=1, perPage=20, context=50) {
return new Promise((resolve, reject) => {
let args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
page: page,
per_page: perPage,
context: context
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.paginate', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
}

View File

@ -0,0 +1,118 @@
class CorpusAnalysisApp {
static entitiyColors = {
PERSON: '#a6e22d',
PER: '#a6e22d',
NORP: '#ef60b4',
FACILITY: '#43c6fc',
ORG: '#43c6fc',
GPE: '#fd9720',
LOC: '#fd9720',
PRODUCT: '#a99dfb',
MISC: '#a99dfb',
EVENT: ':#fc0',
WORK_OF_ART: '#fc0',
LANGUAGE: '#fc0',
DATE: '#2fbbab',
TIME: '#2fbbab',
PERCENT: '#bbb',
MONEY: '#bbb',
QUANTITY: '#bbb',
ORDINAL: '#bbb',
CARDINAL: '#bbb'
};
constructor(corpusId) {
this.data = {};
// HTML elements
this.elements = {
container: document.querySelector('#corpus-analysis-app-container'),
extensionTabs: document.querySelector('#corpus-analysis-app-extension-tabs'),
initModal: document.querySelector('#corpus-analysis-app-init-modal'),
initError: document.querySelector('#corpus-analysis-app-init-error'),
initProgress: document.querySelector('#corpus-analysis-app-init-progress'),
overview: document.querySelector('#corpus-analysis-app-overview')
};
// Materialize elements
this.elements.m = {
extensionTabs: M.Tabs.init(this.elements.extensionTabs),
initModal: M.Modal.init(this.elements.initModal, {dismissible: false})
};
this.extensions = {};
this.settings = {
corpusId: corpusId
};
}
init() {
this.disableActionElements();
this.elements.m.initModal.open();
// Init data
this.data.cQiClient = new CQiClient(this.settings.corpusId);
this.data.cQiClient.connect()
.then(cQiStatus => {
return this.data.cQiClient.corpora.get('CORPUS');
})
.then(
cQiCorpus => {
this.data.corpus = {o: cQiCorpus};
// TODO: Don't do this here
cQiCorpus.updateDb();
this.enableActionElements();
for (let extension of Object.values(this.extensions)) {extension.init();}
this.elements.m.initModal.close();
},
cQiError => {
this.elements.initError.innerText = JSON.stringify(cQiError);
this.elements.initError.classList.remove('hide');
this.elements.initProgress.classList.add('hide');
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
}
}
);
// Add event listeners
for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) {
extensionSelectorElement.addEventListener('click', () => {
this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target);
});
}
}
registerExtension(extension) {
if (extension.name in this.extensions) {
console.error(`Can't register extension ${extension.name}: Already registered`);
return;
}
this.extensions[extension.name] = extension;
if ('cQiClient' in this.data && this.data.cQiClient.connected) {extension.init();}
}
disableActionElements() {
let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
for (let actionElement of actionElements) {
if (actionElement.nodeName === 'INPUT') {
actionElement.disabled = true;
} else if (actionElement.nodeName === 'SELECT') {
actionElement.parentNode.querySelector('input.select-dropdown').disabled = true;
} else {
actionElement.classList.add('disabled');
}
}
}
enableActionElements() {
let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
for (let actionElement of actionElements) {
if (actionElement.nodeName === 'INPUT') {
actionElement.disabled = false;
} else if (actionElement.nodeName === 'SELECT') {
actionElement.parentNode.querySelector('input.select-dropdown').disabled = false;
} else {
actionElement.classList.remove('disabled');
}
}
}
}

View File

@ -0,0 +1,432 @@
class CorpusAnalysisConcordance {
name = 'Concordance';
constructor(app) {
this.app = app;
this.data = {};
this.elements = {
// TODO: Prefix elements with "corpus-analysis-app-"
container: document.querySelector('#concordance-extension-container'),
error: document.querySelector('#concordance-extension-error'),
form: document.querySelector('#concordance-extension-form'),
progress: document.querySelector('#concordance-extension-progress'),
subcorpusInfo: document.querySelector('#concordance-extension-subcorpus-info'),
subcorpusActions: document.querySelector('#concordance-extension-subcorpus-actions'),
subcorpusItems: document.querySelector('#concordance-extension-subcorpus-items'),
subcorpusList: document.querySelector('#concordance-extension-subcorpus-list'),
subcorpusPagination: document.querySelector('#concordance-extension-subcorpus-pagination')
};
this.settings = {
context: parseInt(this.elements.form['context'].value),
perPage: parseInt(this.elements.form['per-page'].value),
selectedSubcorpus: undefined,
textStyle: parseInt(this.elements.form['text-style'].value),
tokenRepresentation: this.elements.form['token-representation'].value
};
this.app.registerExtension(this);
}
init() {
// Init data
this.data.corpus = this.app.data.corpus;
this.data.subcorpora = {};
// Add event listeners
this.elements.form.addEventListener('submit', event => {
event.preventDefault();
this.app.disableActionElements();
let query = this.elements.form.query.value.trim();
let subcorpusName = this.elements.form['subcorpus-name'].value;
this.elements.error.innerText = '';
this.elements.error.classList.add('hide');
this.elements.progress.classList.remove('hide');
let subcorpus = {};
this.data.corpus.o.query(subcorpusName, query)
.then(cQiStatus => {
subcorpus.q = query;
return this.data.corpus.o.subcorpora.get(subcorpusName);
})
.then(cQiSubcorpus => {
subcorpus.o = cQiSubcorpus;
return cQiSubcorpus.paginate(1, this.settings.perPage, this.settings.context);
})
.then(
paginatedSubcorpus => {
subcorpus.p = paginatedSubcorpus;
if (subcorpus !== 'Last') {this.data.subcorpora.Last = subcorpus;}
this.data.subcorpora[subcorpusName] = subcorpus;
this.settings.selectedSubcorpus = subcorpusName;
this.renderSubcorpusList();
this.renderSubcorpusInfo();
this.renderSubcorpusActions();
this.renderSubcorpusItems();
this.renderSubcorpusPagination();
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
},
cQiError => {
this.elements.error.innerText = JSON.stringify(cQiError);
this.elements.error.classList.remove('hide');
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
}
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
}
);
});
this.elements.form.addEventListener('change', event => {
if (event.target === this.elements.form['context']) {
this.settings.context = parseInt(this.elements.form['context'].value);
this.elements.form.submit.click();
}
if (event.target === this.elements.form['per-page']) {
this.settings.perPage = parseInt(this.elements.form['per-page'].value);
this.elements.form.submit.click();
}
if (event.target === this.elements.form['text-style']) {
this.settings.textStyle = parseInt(this.elements.form['text-style'].value);
this.setTextStyle();
}
if (event.target === this.elements.form['token-representation']) {
this.settings.tokenRepresentation = this.elements.form['token-representation'].value;
this.setTokenRepresentation();
}
});
}
clearSubcorpusList() {
this.elements.subcorpusList.innerHTML = '';
this.elements.subcorpusList.classList.add('hide');
}
renderSubcorpusList() {
this.clearSubcorpusList();
for (let subcorpusName in this.data.subcorpora) {
this.elements.subcorpusList.innerHTML += `
<a class="btn waves-effect waves-light subcorpus-selector" data-target="${subcorpusName}"><i class="material-icons left">bookmark</i>${subcorpusName}</a>
`.trim();
}
for (let subcorpusSelectorElement of this.elements.subcorpusList.querySelectorAll('.subcorpus-selector')) {
let subcorpusName = subcorpusSelectorElement.dataset.target;
if (subcorpusName === this.settings.selectedSubcorpus) {
subcorpusSelectorElement.classList.add('disabled');
continue;
}
subcorpusSelectorElement.addEventListener('click', () => {
this.settings.selectedSubcorpus = subcorpusName;
this.elements.progress.classList.remove('hide');
this.renderSubcorpusList();
this.renderSubcorpusInfo();
this.renderSubcorpusActions();
this.renderSubcorpusActions();
this.renderSubcorpusItems();
this.renderSubcorpusPagination();
this.elements.progress.classList.add('hide');
});
}
this.elements.subcorpusList.classList.remove('hide');
}
clearSubcorpusInfo() {
this.elements.subcorpusInfo.innerHTML = '';
this.elements.subcorpusInfo.classList.add('hide');
}
renderSubcorpusInfo() {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
this.clearSubcorpusInfo();
this.elements.subcorpusInfo.innerHTML = `${subcorpus.p.total} matches found for <code>${subcorpus.q.replace(/</g, "&lt;").replace(/>/g, "&gt;")}</code>`;
this.elements.subcorpusInfo.classList.remove('hide');
}
clearSubcorpusActions() {
for (let tooltippedElement of this.elements.subcorpusActions.querySelectorAll('.tooltipped')) {
M.Tooltip.getInstance(tooltippedElement).destroy();
}
this.elements.subcorpusActions.innerHTML = '';
}
renderSubcorpusActions() {
this.clearSubcorpusActions();
this.elements.subcorpusActions.innerHTML += `
<a class="btn-floating btn-small tooltipped waves-effect waves-light corpus-analysis-action download-subcorpus-trigger" data-tooltip="Download subcorpus">
<i class="material-icons">file_download</i>
</a>
<a class="btn-floating btn-small red tooltipped waves-effect waves-light corpus-analysis-action delete-subcorpus-trigger" data-tooltip="Delete subcorpus">
<i class="material-icons">delete</i>
</a>
`.trim();
M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped'));
this.elements.subcorpusActions.querySelector('.delete-subcorpus-trigger').addEventListener('click', event => {
event.preventDefault();
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
subcorpus.o.drop().then(
cQiStatus => {
nopaque.appClient.flash(`${subcorpus.o.name} deleted`, 'corpus');
delete this.data.subcorpora[subcorpus.o.name];
this.settings.selectedSubcorpus = undefined;
for (let subcorpusName in this.data.subcorpora) {
this.settings.selectedSubcorpus = subcorpusName;
break;
}
this.renderSubcorpusList();
if (this.settings.selectedSubcorpus) {
this.renderSubcorpusInfo();
this.renderSubcorpusActions();
this.renderSubcorpusItems();
this.renderSubcorpusPagination();
} else {
this.clearSubcorpusInfo();
this.clearSubcorpusActions();
this.clearSubcorpusItems();
this.clearSubcorpusPagination();
}
},
cQiError => {
nopaque.appClient.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
}
);
});
}
clearSubcorpusItems() {
// Destroy with .p-attr elements associated Materialize tooltips
for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr.tooltipped')) {
M.Tooltip.getInstance(pAttrElement)?.destroy();
}
this.elements.subcorpusItems.innerHTML = `
<tr class="show-if-only-child">
<td colspan="100%">
<p>
<span class="card-title"><i class="left material-icons" style="font-size: inherit;">search</i>Nothing here...</span><br>
No matches available.
</p>
</td>
</tr>
`.trim();
}
renderSubcorpusItems() {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
this.clearSubcorpusItems();
for (let item of subcorpus.p.items) {
this.elements.subcorpusItems.innerHTML += `
<tr class="item" data-id="${item.num}">
<td class="num">${item.num}</td>
<td class="text-title">${this.foo(...item.c)}</td>
<td class="left-context">${item.lc ? this.cposRange2HTML(...item.lc) : ''}</td>
<td class="kwic">${this.cposRange2HTML(...item.c)}</td>
<td class="right-context">${item.rc ? this.cposRange2HTML(...item.rc) : ''}</td>
<td class="actions right-align">
<a class="btn-floating btn-small waves-effect waves-light corpus-analysis-action goto-reader-trigger"><i class="material-icons prefix">search</i></a>
<a class="btn-floating btn-small waves-effect waves-light corpus-analysis-action export-trigger"><i class="material-icons prefix">add</i></a>
</td>
</tr>
`.trim();
}
this.setTextStyle();
this.setTokenRepresentation();
for (let gotoReaderTriggerElement of this.elements.subcorpusItems.querySelectorAll('.goto-reader-trigger')) {
gotoReaderTriggerElement.addEventListener('click', event => {
event.preventDefault();
let corpusAnalysisReader = this.app.extensions.Reader;
let itemId = parseInt(gotoReaderTriggerElement.closest('.item').dataset.id);
let item = undefined;
for (let x of subcorpus.p.items) {if (x.num === itemId) {item = x;}}
let page = Math.max(1, Math.ceil(item.c[0] / corpusAnalysisReader.settings.perPage));
corpusAnalysisReader.page(page, () => {
let range = new Range();
let leftCpos = corpusAnalysisReader.data.corpus.p.items[0].includes(item.c[0]) ? item.c[0] : corpusAnalysisReader.data.corpus.p.items[0][0];
let rightCpos = corpusAnalysisReader.data.corpus.p.items[0].includes(item.c[1]) ? item.c[1] : corpusAnalysisReader.data.corpus.p.items[0].at(-1);
let leftElement = corpusAnalysisReader.elements.corpus.querySelector(`.p-attr[data-cpos="${leftCpos}"]`);
let rightElement = corpusAnalysisReader.elements.corpus.querySelector(`.p-attr[data-cpos="${rightCpos}"]`);
range.setStartBefore(leftElement);
range.setEndAfter(rightElement);
document.getSelection().removeAllRanges();
document.getSelection().addRange(range);
});
this.app.elements.m.extensionTabs.select('reader-extension-container');
});
}
}
clearSubcorpusPagination() {
this.elements.subcorpusPagination.innerHTML = '';
this.elements.subcorpusPagination.classList.add('hide');
}
renderSubcorpusPagination() {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
this.clearSubcorpusPagination();
if (subcorpus.p.pages === 0) {return;}
this.elements.subcorpusPagination.innerHTML += `
<li class="${subcorpus.p.page === 1 ? 'disabled' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${subcorpus.p.page === 1 ? '' : 'data-target="1"'}>
<i class="material-icons">first_page</i>
</a>
</li>
`.trim();
this.elements.subcorpusPagination.innerHTML += `
<li class="${subcorpus.p.has_prev ? 'waves-effect' : 'disabled'}">
<a class="corpus-analysis-action pagination-trigger" ${subcorpus.p.has_prev ? 'data-target="' + subcorpus.p.prev_num + '"' : ''}>
<i class="material-icons">chevron_left</i>
</a>
</li>
`.trim();
for (let i = 1; i <= subcorpus.p.pages; i++) {
this.elements.subcorpusPagination.innerHTML += `
<li class="${i === subcorpus.p.page ? 'active' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${i === subcorpus.p.page ? '' : 'data-target="' + i + '"'}>${i}</a>
</li>
`.trim();
}
this.elements.subcorpusPagination.innerHTML += `
<li class="${subcorpus.p.has_next ? 'waves-effect' : 'disabled'}">
<a class="corpus-analysis-action pagination-trigger" ${subcorpus.p.has_next ? 'data-target="' + subcorpus.p.next_num + '"' : ''}>
<i class="material-icons">chevron_right</i>
</a>
</li>
`.trim();
this.elements.subcorpusPagination.innerHTML += `
<li class="${subcorpus.p.page === subcorpus.p.pages ? 'disabled' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${subcorpus.p.page === subcorpus.p.pages ? '' : 'data-target="' + subcorpus.p.pages + '"'}>
<i class="material-icons">last_page</i>
</a>
</li>
`.trim();
for (let paginationTriggerElement of this.elements.subcorpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
paginationTriggerElement.addEventListener('click', event => {
event.preventDefault();
this.app.disableActionElements();
this.elements.progress.classList.remove('hide');
let page = parseInt(paginationTriggerElement.dataset.target);
subcorpus.o.paginate(page, this.settings.perPage, this.settings.context)
.then(
paginatedSubcorpus => {
subcorpus.p = paginatedSubcorpus;
this.renderSubcorpusItems();
this.renderSubcorpusPagination();
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
}
)
});
}
this.elements.subcorpusPagination.classList.remove('hide');
}
foo(firstCpos, lastCpos) {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
/* Returns a list of texts occuring in this cpos range */
let textIds = new Set();
for (let cpos = firstCpos; cpos <= lastCpos; cpos++) {
textIds.add(subcorpus.p.lookups.cpos_lookup[cpos].text);
}
return [...textIds].map(x => subcorpus.p.lookups.text_lookup[x].title).join(', ');
}
cposRange2HTML(firstCpos, lastCpos) {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
let prevPAttr, pAttr, nextPAttr;
let isEntityStart, isEntityEnd;
let html = '';
for (let cpos = firstCpos; cpos <= lastCpos; cpos++) {
prevPAttr = cpos > firstCpos ? subcorpus.p.lookups.cpos_lookup[cpos - 1] : null;
pAttr = subcorpus.p.lookups.cpos_lookup[cpos];
nextPAttr = cpos < lastCpos ? subcorpus.p.lookups.cpos_lookup[cpos + 1] : null;
isEntityStart = 'ent' in pAttr && pAttr.ent !== prevPAttr?.ent;
isEntityEnd = 'ent' in pAttr && pAttr.ent !== nextPAttr?.ent;
// Add a space before pAttr
if (cpos !== firstCpos || pAttr.simple_pos !== 'PUNCT') {html += ' ';}
// Add entity start
if (isEntityStart) {
html += `<span class="s-attr" data-cpos="${cpos}" data-id="${pAttr.ent}" data-type="ent">`;
}
// Add pAttr
html += `<span class="p-attr" data-cpos="${cpos}"></span>`;
// Add entity end
if (isEntityEnd) {
html += ` <span class="badge black-text hide new s-attr white" cpos="${cpos}" data-type="ent_type" data-badge-caption="">${subcorpus.p.lookups.ent_lookup[pAttr.ent].type}</span>`;
html += '</span>';
}
}
return html;
}
setTextStyle() {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
if (this.settings.textStyle >= 0) {
// Destroy with .p-attr elements associated Materialize tooltips
for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr.tooltipped')) {
M.Tooltip.getInstance(pAttrElement)?.destroy();
}
// Set basic styling on .p-attr elements
for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) {
pAttrElement.setAttribute('class', 'p-attr');
}
// Set basic styling on .s-attr[data-type="ent"] elements
for (let entElement of this.elements.subcorpusItems.querySelectorAll('.s-attr[data-type="ent"]')) {
entElement.querySelector('.s-attr[data-type="ent_type"]').classList.add('hide');
entElement.removeAttribute('style');
entElement.setAttribute('class', 's-attr');
}
}
if (this.settings.textStyle >= 1) {
// Set advanced styling on .s-attr[data-type="ent"] elements
for (let entElement of this.elements.subcorpusItems.querySelectorAll('.s-attr[data-type="ent"]')) {
let ent = subcorpus.p.lookups.ent_lookup[entElement.dataset.id];
entElement.classList.add('chip');
entElement.style.backgroundColor = CorpusAnalysisApp.entitiyColors[ent.type];
entElement.querySelector('.s-attr[data-type="ent_type"]').classList.remove('hide');
}
}
if (this.settings.textStyle >= 2) {
// Set advanced styling on .p-attr elements
for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) {
pAttrElement.classList.add('chip', 'hoverable', 'tooltipped');
let cpos = pAttrElement.dataset.cpos;
let pAttr = subcorpus.p.lookups.cpos_lookup[cpos];
let positionalPropertiesHTML = `
<p class="left-align">
<b>Positional properties</b><br>
<span>Token: ${cpos}</span>
`.trim();
let structuralPropertiesHTML = `
<p class="left-align">
<b>Structural properties</b>
`.trim();
for (let [property, propertyValue] of Object.entries(pAttr)) {
if (['lemma', 'ner', 'pos', 'simple_pos', 'word'].includes(property)) {
if (propertyValue === 'None') {continue;}
positionalPropertiesHTML += `<br><i class="material-icons" style="font-size: inherit;">subdirectory_arrow_right</i>${property}: ${propertyValue}`;
} else {
structuralPropertiesHTML += `<br><span>${property}: ${propertyValue}</span>`;
if (!(`${property}_lookup` in subcorpus.p.lookups)) {continue;}
for (let [subproperty, subpropertyValue] of Object.entries(subcorpus.p.lookups[`${property}_lookup`][propertyValue])) {
if (subpropertyValue === 'NULL') {continue;}
structuralPropertiesHTML += `<br><i class="material-icons" style="font-size: inherit;">subdirectory_arrow_right</i>${subproperty}: ${subpropertyValue}`
}
}
}
positionalPropertiesHTML += '</p>';
structuralPropertiesHTML += '</p>';
M.Tooltip.init(
pAttrElement,
{html: positionalPropertiesHTML + structuralPropertiesHTML}
);
}
}
}
setTokenRepresentation() {
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
for (let pAttrElement of this.elements.subcorpusItems.querySelectorAll('.p-attr')) {
let pAttr = subcorpus.p.lookups.cpos_lookup[pAttrElement.dataset.cpos];
pAttrElement.innerText = pAttr[this.settings.tokenRepresentation];
}
}
}

View File

@ -0,0 +1,270 @@
class CorpusAnalysisReader {
name = 'Reader';
constructor(app) {
this.app = app;
this.data = {};
this.elements = {
// TODO: Prefix elements with "corpus-analysis-app-"
container: document.querySelector('#reader-extension-container'),
error: document.querySelector('#reader-extension-error'),
form: document.querySelector('#reader-extension-form'),
progress: document.querySelector('#reader-extension-progress'),
corpus: document.querySelector('#reader-extension-corpus'),
corpusPagination: document.querySelector('#reader-extension-corpus-pagination')
};
this.settings = {
perPage: parseInt(this.elements.form['per-page'].value),
textStyle: parseInt(this.elements.form['text-style'].value),
tokenRepresentation: this.elements.form['token-representation'].value
}
this.app.registerExtension(this);
}
init() {
// Init data
this.data.corpus = this.app.data.corpus;
this.data.subcorpora = {};
// Add event listeners
this.elements.form.addEventListener('submit', (event) => {
event.preventDefault();
this.app.disableActionElements();
this.elements.error.innerText = '';
this.elements.error.classList.add('hide');
this.elements.progress.classList.remove('hide');
this.data.corpus.o.paginate(1, this.settings.perPage)
.then(
paginatedCorpus => {
this.data.corpus.p = paginatedCorpus;
this.renderCorpus();
this.renderCorpusPagination();
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
},
error => {
this.elements.error.innerText = JSON.stringify(error);
this.elements.error.classList.remove('hide');
if ('payload' in error && 'code' in error.payload && 'msg' in error.payload) {
nopaque.appClient.flash(`${error.payload.code}: ${error.payload.msg}`, 'error');
}
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
}
);
});
this.elements.form.addEventListener('change', event => {
if (event.target === this.elements.form['per-page']) {
this.settings.perPage = parseInt(this.elements.form['per-page'].value);
this.elements.form.submit.click();
}
if (event.target === this.elements.form['text-style']) {
this.settings.textStyle = parseInt(this.elements.form['text-style'].value);
this.setTextStyle();
}
if (event.target === this.elements.form['token-representation']) {
this.settings.tokenRepresentation = this.elements.form['token-representation'].value;
this.setTokenRepresentation();
}
});
// Load initial data
this.elements.form.submit.click();
}
clearCorpus() {
// Destroy with .p-attr elements associated Materialize tooltips
for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr.tooltipped')) {
M.Tooltip.getInstance(pAttrElement)?.destroy();
}
this.elements.corpus.innerHTML = `
<p class="show-if-only-child">
<span class="card-title"><i class="left material-icons" style="font-size: inherit;">search</i>Nothing here...</span><br>
No text available.
</p>
`.trim();
}
renderCorpus() {
this.clearCorpus();
let item = this.data.corpus.p.items[0];
this.elements.corpus.innerHTML += `
<p>${this.cposRange2HTML(item[0], item[item.length - 1])}</p>
`.trim();
this.setTextStyle();
this.setTokenRepresentation();
}
clearCorpusPagination() {
this.elements.corpusPagination.innerHTML = '';
this.elements.corpusPagination.classList.add('hide');
}
renderCorpusPagination() {
this.clearCorpusPagination();
if (this.data.corpus.p.pages === 0) {return;}
this.elements.corpusPagination.innerHTML += `
<li class="${this.data.corpus.p.page === 1 ? 'disabled' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.page === 1 ? '' : 'data-target="1"'}>
<i class="material-icons">first_page</i>
</a>
</li>
`.trim();
this.elements.corpusPagination.innerHTML += `
<li class="${this.data.corpus.p.has_prev ? 'waves-effect' : 'disabled'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.has_prev ? 'data-target="' + this.data.corpus.p.prev_num + '"' : ''}>
<i class="material-icons">chevron_left</i>
</a>
</li>
`.trim();
for (let i = 1; i <= this.data.corpus.p.pages; i++) {
this.elements.corpusPagination.innerHTML += `
<li class="${i === this.data.corpus.p.page ? 'active' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${i === this.data.corpus.p.page ? '' : 'data-target="' + i + '"'}>${i}</a>
</li>
`.trim();
}
this.elements.corpusPagination.innerHTML += `
<li class="${this.data.corpus.p.has_next ? 'waves-effect' : 'disabled'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.has_next ? 'data-target="' + this.data.corpus.p.next_num + '"' : ''}>
<i class="material-icons">chevron_right</i>
</a>
</li>
`.trim();
this.elements.corpusPagination.innerHTML += `
<li class="${this.data.corpus.p.page === this.data.corpus.p.pages ? 'disabled' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.page === this.data.corpus.p.pages ? '' : 'data-target="' + this.data.corpus.p.pages + '"'}>
<i class="material-icons">last_page</i>
</a>
</li>
`.trim();
for (let paginateTriggerElement of this.elements.corpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
paginateTriggerElement.addEventListener('click', event => {
event.preventDefault();
let page = parseInt(paginateTriggerElement.dataset.target);
this.page(page);
});
}
this.elements.corpusPagination.classList.remove('hide');
}
cposRange2HTML(firstCpos, lastCpos) {
let prevPAttr, pAttr, nextPAttr;
let isEntityStart, isEntityEnd;
let html = '';
for (let cpos = firstCpos; cpos <= lastCpos; cpos++) {
prevPAttr = cpos > firstCpos ? this.data.corpus.p.lookups.cpos_lookup[cpos - 1] : null;
pAttr = this.data.corpus.p.lookups.cpos_lookup[cpos];
nextPAttr = cpos < lastCpos ? this.data.corpus.p.lookups.cpos_lookup[cpos + 1] : null;
isEntityStart = 'ent' in pAttr && pAttr.ent !== prevPAttr?.ent;
isEntityEnd = 'ent' in pAttr && pAttr.ent !== nextPAttr?.ent;
// Add a space before pAttr
if (cpos !== firstCpos || pAttr.simple_pos !== 'PUNCT') {html += ' ';}
// Add entity start
if (isEntityStart) {
html += `<span class="s-attr" data-cpos="${cpos}" data-id="${pAttr.ent}" data-type="ent">`;
}
// Add pAttr
html += `<span class="p-attr" data-cpos="${cpos}"></span>`;
// Add entity end
if (isEntityEnd) {
html += ` <span class="badge black-text hide new s-attr white" cpos="${cpos}" data-type="ent_type" data-badge-caption="">${this.data.corpus.p.lookups.ent_lookup[pAttr.ent].type}</span>`;
html += '</span>';
}
}
return html;
}
page(pageNum, callback) {
if (this.data.corpus.p.page === pageNum && typeof callback === 'function') {
callback();
return;
}
this.app.disableActionElements();
this.elements.progress.classList.remove('hide');
this.data.corpus.o.paginate(pageNum, this.settings.perPage)
.then(
paginatedCorpus => {
this.data.corpus.p = paginatedCorpus;
this.renderCorpus();
this.renderCorpusPagination();
this.elements.progress.classList.add('hide');
this.app.enableActionElements();
if (typeof callback === 'function') {callback();}
}
)
}
setTextStyle() {
if (this.settings.textStyle >= 0) {
// Destroy with .p-attr elements associated Materialize tooltips
for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr.tooltipped')) {
M.Tooltip.getInstance(pAttrElement)?.destroy();
}
// Set basic styling on .p-attr elements
for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) {
pAttrElement.setAttribute('class', 'p-attr');
}
// Set basic styling on .s-attr[data-type="ent"] elements
for (let entElement of this.elements.corpus.querySelectorAll('.s-attr[data-type="ent"]')) {
entElement.querySelector('.s-attr[data-type="ent_type"]').classList.add('hide');
entElement.removeAttribute('style');
entElement.setAttribute('class', 's-attr');
}
}
if (this.settings.textStyle >= 1) {
// Set advanced styling on .s-attr[data-type="ent"] elements
for (let entElement of this.elements.corpus.querySelectorAll('.s-attr[data-type="ent"]')) {
let ent = this.data.corpus.p.lookups.ent_lookup[entElement.dataset.id];
entElement.classList.add('chip');
entElement.style.backgroundColor = CorpusAnalysisApp.entitiyColors[ent.type];
entElement.querySelector('.s-attr[data-type="ent_type"]').classList.remove('hide');
}
}
if (this.settings.textStyle >= 2) {
// Set advanced styling on .p-attr elements
for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) {
pAttrElement.classList.add('chip', 'hoverable', 'tooltipped');
let cpos = pAttrElement.dataset.cpos;
let pAttr = this.data.corpus.p.lookups.cpos_lookup[cpos];
let positionalPropertiesHTML = `
<p class="left-align">
<b>Positional properties</b><br>
<span>Token: ${cpos}</span>
`.trim();
let structuralPropertiesHTML = `
<p class="left-align">
<b>Structural properties</b>
`.trim();
for (let [property, propertyValue] of Object.entries(pAttr)) {
if (['lemma', 'ner', 'pos', 'simple_pos', 'word'].includes(property)) {
if (propertyValue === 'None') {continue;}
positionalPropertiesHTML += `<br><i class="material-icons" style="font-size: inherit;">subdirectory_arrow_right</i>${property}: ${propertyValue}`;
} else {
structuralPropertiesHTML += `<br><span>${property}: ${propertyValue}</span>`;
if (!(`${property}_lookup` in this.data.corpus.p.lookups)) {continue;}
for (let [subproperty, subpropertyValue] of Object.entries(this.data.corpus.p.lookups[`${property}_lookup`][propertyValue])) {
if (subpropertyValue === 'NULL') {continue;}
structuralPropertiesHTML += `<br><i class="material-icons" style="font-size: inherit;">subdirectory_arrow_right</i>${subproperty}: ${subpropertyValue}`
}
}
}
positionalPropertiesHTML += '</p>';
structuralPropertiesHTML += '</p>';
M.Tooltip.init(
pAttrElement,
{html: positionalPropertiesHTML + structuralPropertiesHTML}
);
}
}
}
setTokenRepresentation() {
for (let pAttrElement of this.elements.corpus.querySelectorAll('.p-attr')) {
let pAttr = this.data.corpus.p.lookups.cpos_lookup[pAttrElement.dataset.cpos];
pAttrElement.innerText = pAttr[this.settings.tokenRepresentation];
}
}
}

View File

@ -13,7 +13,7 @@ class CorpusDisplay extends RessourceDisplay {
this.setLastEditedDate(this.user.data.corpora[this.corpusId].last_edited_date);
this.setStatus(this.user.data.corpora[this.corpusId].status);
this.setTitle(this.user.data.corpora[this.corpusId].title);
this.setTokenRatio(this.user.data.corpora[this.corpusId].current_nr_of_tokens, this.user.data.corpora[this.corpusId].max_nr_of_tokens);
this.setTokenRatio(this.user.data.corpora[this.corpusId].num_tokens, this.user.data.corpora[this.corpusId].max_num_tokens);
}
patch(patch) {
@ -53,9 +53,8 @@ class CorpusDisplay extends RessourceDisplay {
for (let element of this.displayElement.querySelectorAll('.corpus-title')) {this.setElement(element, title);}
}
setTokenRatio(currentNrOfTokens, maxNrOfTokens) {
let tokenRatio = `${currentNrOfTokens}/${maxNrOfTokens}`;
for (let element of this.displayElement.querySelectorAll('.corpus-token-ratio')) {this.setElement(element, tokenRatio);}
setTokenRatio(numTokens, maxNumTokens) {
for (let element of this.displayElement.querySelectorAll('.corpus-token-ratio')) {this.setElement(element, `${numTokens}/${maxNumTokens}`);}
}
setDescription(description) {