mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-15 17:25:44 +00:00
663 lines
21 KiB
JavaScript
663 lines
21 KiB
JavaScript
class CQiClient {
|
|
constructor(corpusId) {
|
|
this.socket = io(
|
|
'/corpora/corpus/corpus_analysis',
|
|
{
|
|
auth: {corpus_id: corpusId},
|
|
transports: ['websocket'],
|
|
upgrade: false
|
|
}
|
|
);
|
|
this.connected = false;
|
|
this.corpora = new CQiCorpusCollection(this.socket);
|
|
}
|
|
|
|
connect() {
|
|
return new Promise((resolve, reject) => {
|
|
this.socket.emit('cqi.connect', response => {
|
|
if (response.code === 200) {
|
|
this.connected = true;
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
disconnect() {
|
|
return new Promise((resolve, reject) => {
|
|
this.socket.emit('cqi.disconnect', response => {
|
|
if (response.code === 200) {
|
|
this.connected = false;
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
ping() {
|
|
return new Promise((resolve, reject) => {
|
|
this.socket.emit('cqi.ping', response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
}
|
|
|
|
|
|
class CQiCorpusCollection {
|
|
constructor(socket) {
|
|
this.socket = socket;
|
|
}
|
|
|
|
get(corpusName) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {corpus_name: corpusName};
|
|
|
|
this.socket.emit('cqi.corpora.get', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(new CQiCorpus(this.socket, response.payload));
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
list() {
|
|
return new Promise((resolve, reject) => {
|
|
this.socket.emit('cqi.corpora.list', response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, x);}));
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
}
|
|
|
|
|
|
class CQiCorpus {
|
|
constructor(socket, attrs) {
|
|
this.socket = socket;
|
|
this.charset = attrs.charset;
|
|
this.name = attrs.name;
|
|
this.properties = attrs.properties;
|
|
this.size = attrs.size;
|
|
this.alignmentAttributes = new CQiAlignmentAttributeCollection(this.socket, this);
|
|
this.positionalAttributes = new CQiPositionalAttributeCollection(this.socket, this);
|
|
this.structuralAttributes = new CQiStructuralAttributeCollection(this.socket, this);
|
|
this.subcorpora = new CQiSubcorpusCollection(this.socket, this);
|
|
}
|
|
|
|
getCorpusData() {
|
|
return new Promise((resolve, reject) => {
|
|
const dummyData = {
|
|
"num_tokens": 2000, // number of tokens in the corpus
|
|
"num_unique_words": 500, // number of unique words in the corpus
|
|
"num_unique_lemmas": 200, // number of unique lemmas in the corpus
|
|
"num_sentences": 90, // number of sentences in the corpus
|
|
"average_sentence_length": 11, // average number of tokens per sentence in the corpus
|
|
"num_ent_types": 30, // number of entities in the corpus
|
|
"num_unique_ent_types":10,
|
|
"ent_type_freqs": {
|
|
"str": 10, // number of ent_types with ent_type "str"
|
|
// ...
|
|
},
|
|
"texts": [
|
|
{
|
|
"num_tokens": 11, // number of tokens in the text
|
|
"num_unique_words": 12, // number of unique words in the text
|
|
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
|
"str": "int", // number of tokens with word "str"
|
|
// ...
|
|
},
|
|
"num_unique_lemmas": 15, // number of unique lemmas in the text
|
|
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
|
"str": "int", // number of tokens with lemma "str"
|
|
// ...
|
|
},
|
|
"num_sentences": 4, // number of sentences in the text
|
|
"average_sentence_length": 3, // average number of tokens per sentence in the text
|
|
"num_ent_types": 12, // number of ent_types in the text
|
|
"num_unique_ent_types": 28, // number of unique ent_types in the text
|
|
"num_entities_by_id": {
|
|
"1": "int", // number of entities with id 1
|
|
// ...
|
|
},
|
|
"author": "Author Name",
|
|
"title": "Titel",
|
|
"publishing_year": 1950
|
|
},
|
|
{
|
|
"num_tokens": 15, // number of tokens in the text
|
|
"num_unique_words": 4, // number of unique words in the text
|
|
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
|
"str": "int", // number of tokens with word "str"
|
|
// ...
|
|
},
|
|
"num_unique_lemmas": 90, // number of unique lemmas in the text
|
|
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
|
"str": "int", // number of tokens with lemma "str"
|
|
// ...
|
|
},
|
|
"num_sentences": 11, // number of sentences in the text
|
|
"average_sentence_length": 3, // average number of tokens per sentence in the text
|
|
"num_ent_types": 4, // number of ent_types in the text
|
|
"num_unique_ent_types": 300, // number of unique ent_types in the text
|
|
"num_entities_by_id": {
|
|
"1": "int", // number of entities with id 1
|
|
// ...
|
|
},
|
|
"author": "Author Name",
|
|
"title": "Titel 1",
|
|
"publishing_year": 1962
|
|
},
|
|
{
|
|
"num_tokens": 11, // number of tokens in the text
|
|
"num_unique_words": 12, // number of unique words in the text
|
|
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
|
"str": "int", // number of tokens with word "str"
|
|
// ...
|
|
},
|
|
"num_unique_lemmas": 64, // number of unique lemmas in the text
|
|
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
|
"str": "int", // number of tokens with lemma "str"
|
|
// ...
|
|
},
|
|
"num_sentences": 52, // number of sentences in the text
|
|
"average_sentence_length": 3, // average number of tokens per sentence in the text
|
|
"num_ent_types": 45, // number of ent_types in the text
|
|
"num_unique_ent_types": 68, // number of unique ent_types in the text
|
|
"num_entities_by_id": {
|
|
"1": "int", // number of entities with id 1
|
|
// ...
|
|
},
|
|
"author": "Author Name",
|
|
"title": "Titel 2",
|
|
"publishing_year": 1850
|
|
},
|
|
{
|
|
"num_tokens": 56, // number of tokens in the text
|
|
"num_unique_words": 13, // number of unique words in the text
|
|
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
|
"str": "int", // number of tokens with word "str"
|
|
// ...
|
|
},
|
|
"num_unique_lemmas": 43, // number of unique lemmas in the text
|
|
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
|
"str": "int", // number of tokens with lemma "str"
|
|
// ...
|
|
},
|
|
"num_sentences": 45, // number of sentences in the text
|
|
"average_sentence_length": 56, // average number of tokens per sentence in the text
|
|
"num_ent_types": 8792, // number of ent_types in the text
|
|
"num_unique_ent_types": 56758, // number of unique ent_types in the text
|
|
"num_entities_by_id": {
|
|
"1": "int", // number of entities with id 1
|
|
// ...
|
|
},
|
|
"author": "Author Name",
|
|
"title": "Titel 3",
|
|
"publishing_year": 1504
|
|
},
|
|
{
|
|
"num_tokens": 54345, // number of tokens in the text
|
|
"num_unique_words": 561, // number of unique words in the text
|
|
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
|
"str": "int", // number of tokens with word "str"
|
|
// ...
|
|
},
|
|
"num_unique_lemmas": 546, // number of unique lemmas in the text
|
|
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
|
"str": "int", // number of tokens with lemma "str"
|
|
// ...
|
|
},
|
|
"num_sentences": 5427, // number of sentences in the text
|
|
"average_sentence_length": 657, // average number of tokens per sentence in the text
|
|
"num_ent_types": 3465, // number of ent_types in the text
|
|
"num_unique_ent_types": 45, // number of unique ent_types in the text
|
|
"num_entities_by_id": {
|
|
"1": "int", // number of entities with id 1
|
|
// ...
|
|
},
|
|
"author": "Author Name",
|
|
"title": "Titel 4",
|
|
"publishing_year": 1712
|
|
},
|
|
{
|
|
"num_tokens": 4354, // number of tokens in the text
|
|
"num_unique_words": 45234, // number of unique words in the text
|
|
"word_freqs": { // frequency of unique words in the text (sorted by frequency)
|
|
"testwort": 50, // number of tokens with word "str"
|
|
"testwort2": 1
|
|
},
|
|
"num_unique_lemmas": 15, // number of unique lemmas in the text
|
|
"lemma_freqs": { // frequency of unique lemmas in the text (sorted by frequency)
|
|
"testlemma": 11, // number of tokens with lemma "str"
|
|
"testlemma2": 1
|
|
},
|
|
"num_sentences": 90, // number of sentences in the text
|
|
"average_sentence_length": 7, // average number of tokens per sentence in the text
|
|
"num_ent_types": 19,
|
|
"num_unique_ent_types": 5, // number of unique ent_types in the text
|
|
"num_entities_by_id": {
|
|
"1": "int", // number of entities with id 1
|
|
// ...
|
|
},
|
|
"author": "Author Name 2",
|
|
"title": "Titel 5",
|
|
"publishing_year": 1951
|
|
}
|
|
]
|
|
};
|
|
|
|
resolve(dummyData);
|
|
/*
|
|
const args = {corpus_name: this.name};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.getCorpusData', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
*/
|
|
})
|
|
}
|
|
|
|
drop() {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {corpus_name: this.name};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.drop', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
query(subcorpus_name, queryString) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.name,
|
|
subcorpus_name: subcorpus_name,
|
|
query: queryString
|
|
};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.query', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
// nopaque specific CQi extension
|
|
paginate(page=1, perPage=20) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {corpus_name: this.name, page: page, per_page: perPage};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.paginate', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
updateDb() {
|
|
const args = {corpus_name: this.name};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.update_db', args);
|
|
}
|
|
}
|
|
|
|
|
|
class CQiAlignmentAttributeCollection {
|
|
constructor(socket, corpus) {
|
|
this.corpus = corpus;
|
|
this.socket = socket;
|
|
}
|
|
|
|
get(alignmentAttributeName) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.corpus.name,
|
|
alignment_attribute_name: alignmentAttributeName
|
|
};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.alignment_attributes.get', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(new CQiAlignmentAttribute(this.socket, this.corpus, response.payload));
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
list() {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {corpus_name: this.corpus.name};
|
|
|
|
this.socket.emit('cqi.corpus.alignment_attributes.list', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload.map(x => {return new CQiAlignmentAttribute(this.socket, this.corpus, x);}));
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
}
|
|
|
|
|
|
class CQiAlignmentAttribute {
|
|
constructor(socket, corpus, attrs) {
|
|
this.socket = socket;
|
|
this.corpus = corpus;
|
|
this.name = attrs.name;
|
|
this.size = attrs.size;
|
|
}
|
|
}
|
|
|
|
|
|
class CQiPositionalAttributeCollection {
|
|
constructor(socket, corpus) {
|
|
this.corpus = corpus;
|
|
this.socket = socket;
|
|
}
|
|
|
|
get(positionalAttributeName) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.corpus.name,
|
|
positional_attribute_name: positionalAttributeName
|
|
};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.positional_attributes.get', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(new CQiPositionalAttribute(this.socket, this.corpus, response.payload));
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
list() {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {corpus_name: this.corpus.name};
|
|
|
|
this.socket.emit('cqi.corpus.positional_attributes.list', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload.map(x => {return new CQiPositionalAttribute(this.socket, this.corpus, x);}));
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
}
|
|
|
|
|
|
class CQiPositionalAttribute {
|
|
constructor(socket, corpus, attrs) {
|
|
this.socket = socket;
|
|
this.corpus = corpus;
|
|
this.lexiconSize = attrs.lexicon_size;
|
|
this.name = attrs.name;
|
|
this.size = attrs.size;
|
|
}
|
|
}
|
|
|
|
|
|
class CQiStructuralAttributeCollection {
|
|
constructor(socket, corpus) {
|
|
this.corpus = corpus;
|
|
this.socket = socket;
|
|
}
|
|
|
|
get(structuralAttributeName) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.corpus.name,
|
|
structural_attribute_name: structuralAttributeName
|
|
};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.structural_attributes.get', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(new CQiStructuralAttribute(this.socket, this.corpus, response.payload));
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
list() {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {corpus_name: this.corpus.name};
|
|
|
|
this.socket.emit('cqi.corpus.structural_attributes.list', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload.map(x => {return new CQiStructuralAttribute(this.socket, this.corpus, x);}));
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
}
|
|
|
|
|
|
class CQiStructuralAttribute {
|
|
constructor(socket, corpus, attrs) {
|
|
this.socket = socket;
|
|
this.corpus = corpus;
|
|
this.hasValues = attrs.has_values;
|
|
this.name = attrs.name;
|
|
this.size = attrs.size;
|
|
}
|
|
}
|
|
|
|
|
|
class CQiSubcorpusCollection {
|
|
constructor(socket, corpus) {
|
|
this.corpus = corpus;
|
|
this.socket = socket;
|
|
}
|
|
|
|
get(subcorpusName) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.corpus.name,
|
|
subcorpus_name: subcorpusName
|
|
};
|
|
this.socket.emit('cqi.corpora.corpus.subcorpora.get', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(new CQiSubcorpus(this.socket, this.corpus, response.payload));
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
list() {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {corpus_name: this.corpus.name};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.subcorpora.list', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, this.corpus, x);}));
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
}
|
|
|
|
|
|
class CQiSubcorpus {
|
|
constructor(socket, corpus, attrs) {
|
|
this.socket = socket;
|
|
this.corpus = corpus;
|
|
this.fields = attrs.fields;
|
|
this.name = attrs.name;
|
|
this.size = attrs.size;
|
|
}
|
|
|
|
drop() {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {corpus_name: this.corpus.name, subcorpus_name: this.name};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.drop', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
dump(field, first, last) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.corpus.name,
|
|
subcorpus_name: this.name,
|
|
field: field,
|
|
first: first,
|
|
last: last
|
|
};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.dump', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
export(context=50) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.corpus.name,
|
|
subcorpus_name: this.name,
|
|
context: context
|
|
};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.export', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
partial_export(matchIdList, context=50) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.corpus.name,
|
|
subcorpus_name: this.name,
|
|
match_id_list: matchIdList,
|
|
context: context
|
|
};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
fdst_1(cutoff, field, attribute) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.corpus.name,
|
|
subcorpus_name: this.name,
|
|
cutoff: cutoff,
|
|
field: field,
|
|
attribute: attribute
|
|
};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
fdst_2(cutoff, field1, attribute1, field2, attribute2) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.corpus.name,
|
|
subcorpus_name: this.name,
|
|
cutoff: cutoff,
|
|
field1: field1,
|
|
attribute1: attribute1,
|
|
field2: field2,
|
|
attribute2: attribute2
|
|
};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
// nopaque specific CQi extension
|
|
paginate(page=1, perPage=20, context=50) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = {
|
|
corpus_name: this.corpus.name,
|
|
subcorpus_name: this.name,
|
|
page: page,
|
|
per_page: perPage,
|
|
context: context
|
|
};
|
|
|
|
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.paginate', args, response => {
|
|
if (response.code === 200) {
|
|
resolve(response.payload);
|
|
} else {
|
|
reject(response);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
}
|