nopaque/app/static/js/CorpusAnalysis/CQiClient.js

677 lines
21 KiB
JavaScript
Raw Normal View History

2021-11-16 14:23:57 +00:00
class CQiClient {
constructor(corpusId) {
this.socket = io(
'/corpora/corpus/corpus_analysis',
2021-12-01 15:03:55 +00:00
{
auth: {corpus_id: corpusId},
transports: ['websocket'],
upgrade: false
}
2021-11-16 14:23:57 +00:00
);
this.connected = false;
this.corpora = new CQiCorpusCollection(this.socket);
}
connect() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.connect', response => {
if (response.code === 200) {
this.connected = true;
resolve(response.payload);
} else {
reject(response);
}
});
});
}
disconnect() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.disconnect', response => {
if (response.code === 200) {
this.connected = false;
resolve(response.payload);
} else {
reject(response);
}
});
});
}
ping() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.ping', response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
}
class CQiCorpusCollection {
constructor(socket) {
this.socket = socket;
}
get(corpusName) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {corpus_name: corpusName};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.get', args, response => {
if (response.code === 200) {
resolve(new CQiCorpus(this.socket, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.corpora.list', response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiCorpus {
constructor(socket, attrs) {
this.socket = socket;
this.charset = attrs.charset;
this.name = attrs.name;
this.properties = attrs.properties;
this.size = attrs.size;
this.alignmentAttributes = new CQiAlignmentAttributeCollection(this.socket, this);
this.positionalAttributes = new CQiPositionalAttributeCollection(this.socket, this);
this.structuralAttributes = new CQiStructuralAttributeCollection(this.socket, this);
this.subcorpora = new CQiSubcorpusCollection(this.socket, this);
}
getVisualizationData() {
return new Promise((resolve, reject) => {
const args = {corpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.get_visualization_data', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
getCorpusData() {
return new Promise((resolve, reject) => {
2023-06-13 13:41:34 +00:00
const dummyData = {
"num_tokens": 2000, // number of tokens in the corpus
"num_unique_words": 500, // number of unique words in the corpus
"num_unique_lemmas": 200, // number of unique lemmas in the corpus
"num_sentences": 90, // number of sentences in the corpus
"average_sentence_length": 11, // average number of tokens per sentence in the corpus
"num_ent_types": 30, // number of entities in the corpus
"num_unique_ent_types":10,
"ent_type_freqs": {
"str": 10, // number of ent_types with ent_type "str"
// ...
},
"texts": [
{
"num_tokens": 11, // number of tokens in the text
"num_unique_words": 12, // number of unique words in the text
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
"str": "int", // number of tokens with word "str"
// ...
},
"num_unique_lemmas": 15, // number of unique lemmas in the text
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
"str": "int", // number of tokens with lemma "str"
// ...
},
"num_sentences": 4, // number of sentences in the text
"average_sentence_length": 3, // average number of tokens per sentence in the text
"num_ent_types": 12, // number of ent_types in the text
"num_unique_ent_types": 28, // number of unique ent_types in the text
"num_entities_by_id": {
"1": "int", // number of entities with id 1
// ...
},
"author": "Author Name",
"title": "Titel",
"publishing_year": 1950
},
{
2023-06-13 15:18:00 +00:00
"num_tokens": 15, // number of tokens in the text
"num_unique_words": 4, // number of unique words in the text
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
"str": "int", // number of tokens with word "str"
// ...
},
"num_unique_lemmas": 90, // number of unique lemmas in the text
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
"str": "int", // number of tokens with lemma "str"
// ...
},
"num_sentences": 11, // number of sentences in the text
"average_sentence_length": 3, // average number of tokens per sentence in the text
"num_ent_types": 4, // number of ent_types in the text
"num_unique_ent_types": 300, // number of unique ent_types in the text
"num_entities_by_id": {
"1": "int", // number of entities with id 1
// ...
},
"author": "Author Name",
"title": "Titel 1",
"publishing_year": 1962
},
{
"num_tokens": 11, // number of tokens in the text
"num_unique_words": 12, // number of unique words in the text
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
"str": "int", // number of tokens with word "str"
// ...
},
"num_unique_lemmas": 64, // number of unique lemmas in the text
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
"str": "int", // number of tokens with lemma "str"
// ...
},
"num_sentences": 52, // number of sentences in the text
"average_sentence_length": 3, // average number of tokens per sentence in the text
"num_ent_types": 45, // number of ent_types in the text
"num_unique_ent_types": 68, // number of unique ent_types in the text
"num_entities_by_id": {
"1": "int", // number of entities with id 1
// ...
},
"author": "Author Name",
"title": "Titel 2",
"publishing_year": 1850
},
{
"num_tokens": 56, // number of tokens in the text
"num_unique_words": 13, // number of unique words in the text
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
"str": "int", // number of tokens with word "str"
// ...
},
"num_unique_lemmas": 43, // number of unique lemmas in the text
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
"str": "int", // number of tokens with lemma "str"
// ...
},
"num_sentences": 45, // number of sentences in the text
"average_sentence_length": 56, // average number of tokens per sentence in the text
"num_ent_types": 8792, // number of ent_types in the text
"num_unique_ent_types": 56758, // number of unique ent_types in the text
"num_entities_by_id": {
"1": "int", // number of entities with id 1
// ...
},
"author": "Author Name",
"title": "Titel 3",
"publishing_year": 1504
},
{
"num_tokens": 54345, // number of tokens in the text
"num_unique_words": 561, // number of unique words in the text
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
"str": "int", // number of tokens with word "str"
// ...
},
"num_unique_lemmas": 546, // number of unique lemmas in the text
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
"str": "int", // number of tokens with lemma "str"
// ...
},
"num_sentences": 5427, // number of sentences in the text
"average_sentence_length": 657, // average number of tokens per sentence in the text
"num_ent_types": 3465, // number of ent_types in the text
"num_unique_ent_types": 45, // number of unique ent_types in the text
"num_entities_by_id": {
"1": "int", // number of entities with id 1
// ...
},
"author": "Author Name",
"title": "Titel 4",
"publishing_year": 1712
},
{
"num_tokens": 4354, // number of tokens in the text
"num_unique_words": 45234, // number of unique words in the text
2023-06-13 13:41:34 +00:00
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
"testwort": 50, // number of tokens with word "str"
"testwort2": 1
},
"num_unique_lemmas": 15, // number of unique lemmas in the text
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
"testlemma": 11, // number of tokens with lemma "str"
"testlemma2": 1
},
"num_sentences": 90, // number of sentences in the text
"average_sentence_length": 7, // average number of tokens per sentence in the text
"num_ent_types": 19,
"num_unique_ent_types": 5, // number of unique ent_types in the text
"num_entities_by_id": {
"1": "int", // number of entities with id 1
// ...
},
"author": "Author Name 2",
2023-06-13 15:18:00 +00:00
"title": "Titel 5",
2023-06-13 13:41:34 +00:00
"publishing_year": 1951
}
]
};
resolve(dummyData);
/*
const args = {corpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.getCorpusData', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
*/
})
}
2021-11-16 14:23:57 +00:00
drop() {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {corpus_name: this.name};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.drop', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
query(subcorpus_name, queryString) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {
2021-11-16 14:23:57 +00:00
corpus_name: this.name,
subcorpus_name: subcorpus_name,
query: queryString
};
2021-12-01 15:03:55 +00:00
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.query', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
// nopaque specific CQi extension
paginate(page=1, perPage=20) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {corpus_name: this.name, page: page, per_page: perPage};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.paginate', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
updateDb() {
2021-12-01 15:03:55 +00:00
const args = {corpus_name: this.name};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.update_db', args);
}
}
class CQiAlignmentAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(alignmentAttributeName) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {
corpus_name: this.corpus.name,
alignment_attribute_name: alignmentAttributeName
};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.alignment_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiAlignmentAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {corpus_name: this.corpus.name};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpus.alignment_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiAlignmentAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiAlignmentAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiPositionalAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(positionalAttributeName) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {
2021-11-16 14:23:57 +00:00
corpus_name: this.corpus.name,
positional_attribute_name: positionalAttributeName
};
2021-12-01 15:03:55 +00:00
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.positional_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiPositionalAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {corpus_name: this.corpus.name};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpus.positional_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiPositionalAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiPositionalAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.lexiconSize = attrs.lexicon_size;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiStructuralAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(structuralAttributeName) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {
2021-11-16 14:23:57 +00:00
corpus_name: this.corpus.name,
structural_attribute_name: structuralAttributeName
};
2021-12-01 15:03:55 +00:00
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.structural_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiStructuralAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {corpus_name: this.corpus.name};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpus.structural_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiStructuralAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiStructuralAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.hasValues = attrs.has_values;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiSubcorpusCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(subcorpusName) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {
corpus_name: this.corpus.name,
subcorpus_name: subcorpusName
};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.subcorpora.get', args, response => {
if (response.code === 200) {
resolve(new CQiSubcorpus(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {corpus_name: this.corpus.name};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.subcorpora.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiSubcorpus {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.fields = attrs.fields;
this.name = attrs.name;
this.size = attrs.size;
}
drop() {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {corpus_name: this.corpus.name, subcorpus_name: this.name};
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.drop', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
dump(field, first, last) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {
2021-11-16 14:23:57 +00:00
corpus_name: this.corpus.name,
subcorpus_name: this.name,
field: field,
first: first,
last: last
};
2021-12-01 15:03:55 +00:00
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.dump', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
export(context=50) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {
2021-11-16 14:23:57 +00:00
corpus_name: this.corpus.name,
subcorpus_name: this.name,
context: context
};
2021-12-01 15:03:55 +00:00
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.export', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
2023-01-19 13:59:09 +00:00
partial_export(matchIdList, context=50) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
match_id_list: matchIdList,
context: context
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
2021-11-16 14:23:57 +00:00
fdst_1(cutoff, field, attribute) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {
2021-11-16 14:23:57 +00:00
corpus_name: this.corpus.name,
subcorpus_name: this.name,
cutoff: cutoff,
field: field,
attribute: attribute
};
2021-12-01 15:03:55 +00:00
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
fdst_2(cutoff, field1, attribute1, field2, attribute2) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {
2021-11-16 14:23:57 +00:00
corpus_name: this.corpus.name,
subcorpus_name: this.name,
cutoff: cutoff,
field1: field1,
attribute1: attribute1,
field2: field2,
attribute2: attribute2
};
2021-12-01 15:03:55 +00:00
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
// nopaque specific CQi extension
paginate(page=1, perPage=20, context=50) {
return new Promise((resolve, reject) => {
2021-12-01 15:03:55 +00:00
const args = {
2021-11-16 14:23:57 +00:00
corpus_name: this.corpus.name,
subcorpus_name: this.name,
page: page,
per_page: perPage,
context: context
};
2021-12-01 15:03:55 +00:00
2021-11-16 14:23:57 +00:00
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.paginate', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
}