mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-11 18:50:34 +00:00
Add package implementation of cqi
This commit is contained in:
parent
7752e7fb57
commit
acfcc0321b
9
app/corpora/cqi/__init__.py
Normal file
9
app/corpora/cqi/__init__.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# flake8: noqa
|
||||||
|
from .api import APIClient
|
||||||
|
from .client import CQiClient
|
||||||
|
from .wrapper import CQiWrapper
|
||||||
|
from .version import version, version_info
|
||||||
|
|
||||||
|
|
||||||
|
__title__ = 'CQi'
|
||||||
|
__version__ = version
|
2
app/corpora/cqi/api/__init__.py
Normal file
2
app/corpora/cqi/api/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
# flake8: noqa
|
||||||
|
from .client import APIClient
|
31
app/corpora/cqi/client.py
Normal file
31
app/corpora/cqi/client.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
from .api import APIClient
|
||||||
|
from .constants import MAJOR_VERSION, MINOR_VERSION
|
||||||
|
from .models.corpora import CorpusCollection
|
||||||
|
|
||||||
|
|
||||||
|
class CQiClient(APIClient):
|
||||||
|
def __init__(self, host, port=4877):
|
||||||
|
super(CQiClient, self).__init__(host, port=port)
|
||||||
|
|
||||||
|
def connect(self, username='anonymous', password=''):
|
||||||
|
super(CQiClient, self).setup()
|
||||||
|
self.ctrl_connect(username, password)
|
||||||
|
self.__load()
|
||||||
|
|
||||||
|
def disconnect(self):
|
||||||
|
self.ctrl_bye()
|
||||||
|
super(CQiClient, self).teardown()
|
||||||
|
|
||||||
|
def __load(self):
|
||||||
|
self.corpora = CorpusCollection(self)
|
||||||
|
self.info = {'version': '{}.{}'.format(MAJOR_VERSION, MINOR_VERSION)}
|
||||||
|
|
||||||
|
def features(self):
|
||||||
|
features = []
|
||||||
|
if self.ask_feature_cqi_1_0():
|
||||||
|
features.append('cqi_1_0')
|
||||||
|
if self.ask_feature_cl_2_3():
|
||||||
|
features.append('cl_2_3')
|
||||||
|
if self.ask_feature_cqp_2_3():
|
||||||
|
features.append('cqp_2_3')
|
||||||
|
return features
|
36
app/corpora/cqi/constants.py
Normal file
36
app/corpora/cqi/constants.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
""" 4. Constant Definitions """
|
||||||
|
CONST_FALSE = 0x00
|
||||||
|
CONST_NO = 0x00
|
||||||
|
CONST_TRUE = 0x01
|
||||||
|
CONST_YES = 0x01
|
||||||
|
"""
|
||||||
|
" NOTE: The following constants specify which field will be returned by
|
||||||
|
" CQI_CQP_DUMP_SUBCORPUS and some other subcorpus commands.
|
||||||
|
"""
|
||||||
|
CONST_FIELD_MATCH = 0x10
|
||||||
|
CONST_FIELD_MATCHEND = 0x11
|
||||||
|
"""
|
||||||
|
" NOTE: The constants specifiying target0 .. target9 are guaranteed to have the
|
||||||
|
" numerical values 0 .. 9, so clients do not need to look up the constant
|
||||||
|
" values if they're handling arbitrary targets.
|
||||||
|
"""
|
||||||
|
CONST_FIELD_TARGET_0 = 0x00
|
||||||
|
CONST_FIELD_TARGET_1 = 0x01
|
||||||
|
CONST_FIELD_TARGET_2 = 0x02
|
||||||
|
CONST_FIELD_TARGET_3 = 0x03
|
||||||
|
CONST_FIELD_TARGET_4 = 0x04
|
||||||
|
CONST_FIELD_TARGET_5 = 0x05
|
||||||
|
CONST_FIELD_TARGET_6 = 0x06
|
||||||
|
CONST_FIELD_TARGET_7 = 0x07
|
||||||
|
CONST_FIELD_TARGET_8 = 0x08
|
||||||
|
CONST_FIELD_TARGET_9 = 0x09
|
||||||
|
"""
|
||||||
|
" NOTE: The following constants are provided for backward compatibility with
|
||||||
|
" traditional CQP field names & while the generalised target concept
|
||||||
|
" isn't yet implemented in the CQPserver.
|
||||||
|
"""
|
||||||
|
CONST_FIELD_TARGET = 0x00
|
||||||
|
CONST_FIELD_KEYWORD = 0x09
|
||||||
|
""" NOTE: CQi version is CQI_MAJOR_VERSION.CQI_MINOR_VERSION """
|
||||||
|
MAJOR_VERSION = 0x00
|
||||||
|
MINOR_VERSION = 0x01
|
0
app/corpora/cqi/models/__init__.py
Normal file
0
app/corpora/cqi/models/__init__.py
Normal file
102
app/corpora/cqi/models/corpora.py
Normal file
102
app/corpora/cqi/models/corpora.py
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
from .subcorpora import SubcorpusCollection
|
||||||
|
|
||||||
|
|
||||||
|
class CorpusCollection:
|
||||||
|
def __init__(self, client):
|
||||||
|
self.client = client
|
||||||
|
|
||||||
|
def get(self, name):
|
||||||
|
return Corpus(self.client, name)
|
||||||
|
|
||||||
|
def list(self):
|
||||||
|
return [Corpus(self.client, corpus) for corpus
|
||||||
|
in self.client.corpus_list_coprora()]
|
||||||
|
|
||||||
|
|
||||||
|
class Corpus:
|
||||||
|
def __init__(self, client, name):
|
||||||
|
self.client = client
|
||||||
|
self.name = name
|
||||||
|
self.__load()
|
||||||
|
|
||||||
|
def __load(self):
|
||||||
|
self.size = self.client.cl_attribute_size('{}.word'.format(self.name))
|
||||||
|
# self.info = client.corpus_info(self.name)
|
||||||
|
self.charset = self.client.corpus_charset(self.name)
|
||||||
|
# self.full_name = self.client.corpus_full_name(self.name)
|
||||||
|
self.properties = self.client.corpus_properties(self.name)
|
||||||
|
self.alignment_attributes = \
|
||||||
|
self.client.corpus_alignment_attributes(self.name)
|
||||||
|
self.structural_attributes = \
|
||||||
|
self.client.corpus_structural_attributes(self.name)
|
||||||
|
self.positional_attributes = \
|
||||||
|
self.client.corpus_positional_attributes(self.name)
|
||||||
|
self.subcorpora = SubcorpusCollection(self.client, self)
|
||||||
|
|
||||||
|
def alg2cpos(self, attribute, alg):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_alg2cpos(__attribute, alg)
|
||||||
|
|
||||||
|
def cpos2alg(self, attribute, cpos_list):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_cpos2alg(__attribute, cpos_list)
|
||||||
|
|
||||||
|
def cpos2id(self, attribute, cpos_list):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_cpos2id(__attribute, cpos_list)
|
||||||
|
|
||||||
|
def cpos2lbound(self, attribute, cpos_list):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_cpos2lbound(__attribute, cpos_list)
|
||||||
|
|
||||||
|
def cpos2rbound(self, attribute, cpos_list):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_cpos2rbound(__attribute, cpos_list)
|
||||||
|
|
||||||
|
def cpos2str(self, attribute, cpos_list):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_cpos2str(__attribute, cpos_list)
|
||||||
|
|
||||||
|
def cpos2struc(self, attribute, cpos_list):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_cpos2struc(__attribute, cpos_list)
|
||||||
|
|
||||||
|
def id2cpos(self, attribute, id):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_id2cpos(__attribute, id)
|
||||||
|
|
||||||
|
def idlist2cpos(self, attribute, ids):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_idlist2cpos(__attribute, ids)
|
||||||
|
|
||||||
|
def id2freq(self, attribute, ids):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_id2freq(__attribute, ids)
|
||||||
|
|
||||||
|
def id2str(self, attribute, ids):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_id2str(__attribute, ids)
|
||||||
|
|
||||||
|
def query(self, query, subcorpus_name='Results'):
|
||||||
|
self.client.cqp_query(self.name, subcorpus_name, query)
|
||||||
|
return self.subcorpora.get('Results')
|
||||||
|
|
||||||
|
def regex2id(self, attribute, regex):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_regex2id(__attribute, regex)
|
||||||
|
|
||||||
|
def structural_attribute_has_values(self, attribute):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.corpus_structural_attribute_has_values(__attribute)
|
||||||
|
|
||||||
|
def str2id(self, attribute, strings):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_str2id(__attribute, strings)
|
||||||
|
|
||||||
|
def struc2cpos(self, attribute, struc):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_struc2cpos(__attribute, struc)
|
||||||
|
|
||||||
|
def struc2str(self, attribute, strucs):
|
||||||
|
__attribute = '{}.{}'.format(self.name, attribute)
|
||||||
|
return self.client.cl_struc2str(__attribute, strucs)
|
112
app/corpora/cqi/models/subcorpora.py
Normal file
112
app/corpora/cqi/models/subcorpora.py
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
from ..constants import (CONST_FIELD_KEYWORD, CONST_FIELD_MATCH,
|
||||||
|
CONST_FIELD_MATCHEND, CONST_FIELD_TARGET)
|
||||||
|
|
||||||
|
|
||||||
|
class SubcorpusCollection:
|
||||||
|
def __init__(self, client, parent_corpus):
|
||||||
|
self.client = client
|
||||||
|
self.parent_corpus = parent_corpus
|
||||||
|
|
||||||
|
def get(self, name):
|
||||||
|
return Subcorpus(self.client, self.parent_corpus, name)
|
||||||
|
|
||||||
|
def list(self):
|
||||||
|
return [Subcorpus(self.client, self.parent_corpus, subcorpus)
|
||||||
|
for subcorpus
|
||||||
|
in self.client.cqp_list_subcorpora(self.parent_corpus.name)]
|
||||||
|
|
||||||
|
|
||||||
|
class Subcorpus:
|
||||||
|
def __init__(self, client, parent_corpus, name):
|
||||||
|
self.client = client
|
||||||
|
self.parent_corpus = parent_corpus
|
||||||
|
self.name = name
|
||||||
|
self.__name = '{}:{}'.format(parent_corpus.name, name)
|
||||||
|
self.__load()
|
||||||
|
|
||||||
|
def __load(self):
|
||||||
|
self.fields = {}
|
||||||
|
if self.client.cqp_subcorpus_has_field(self.__name, CONST_FIELD_MATCH):
|
||||||
|
self.fields['match'] = CONST_FIELD_MATCH
|
||||||
|
if self.client.cqp_subcorpus_has_field(self.__name,
|
||||||
|
CONST_FIELD_MATCHEND):
|
||||||
|
self.fields['matchend'] = CONST_FIELD_MATCHEND
|
||||||
|
if self.client.cqp_subcorpus_has_field(self.__name,
|
||||||
|
CONST_FIELD_TARGET):
|
||||||
|
self.fields['target'] = CONST_FIELD_TARGET
|
||||||
|
if self.client.cqp_subcorpus_has_field(self.__name,
|
||||||
|
CONST_FIELD_KEYWORD):
|
||||||
|
self.fields['keyword'] = CONST_FIELD_KEYWORD
|
||||||
|
self.size = self.client.cqp_subcorpus_size(self.__name)
|
||||||
|
|
||||||
|
def drop(self):
|
||||||
|
return self.client.cqp_drop_subcorpus(self.__name)
|
||||||
|
|
||||||
|
def dump(self, field, first, last):
|
||||||
|
return self.client.cqp_dump_subcorpus(self.__name, field, first, last)
|
||||||
|
|
||||||
|
def dump_values(self, context=25, first_result=0,
|
||||||
|
num_results=float('inf')):
|
||||||
|
first_result = max(0, first_result)
|
||||||
|
last_result = min((first_result + num_results), (self.size - 1))
|
||||||
|
matches = []
|
||||||
|
match_boundaries = zip(self.dump(self.fields['match'], first_result,
|
||||||
|
last_result),
|
||||||
|
self.dump(self.fields['matchend'], first_result,
|
||||||
|
last_result))
|
||||||
|
for match_start, match_end in match_boundaries:
|
||||||
|
left_start = max(0, match_start - context)
|
||||||
|
right_end = min(self.parent_corpus.size, (match_end + 1 + context))
|
||||||
|
matches.append({'left': list(range(left_start, match_start)),
|
||||||
|
'hit': list(range(match_start, match_end + 1)),
|
||||||
|
'right': list(range(match_end + 1, right_end))})
|
||||||
|
cpos_list = []
|
||||||
|
for match in matches:
|
||||||
|
cpos_list += match['left'] + match['hit'] + match['right']
|
||||||
|
cpos_list = list(set(cpos_list))
|
||||||
|
lookups = {}
|
||||||
|
if len(cpos_list) > 0:
|
||||||
|
lookups['cpos_lookup'] = {}
|
||||||
|
for cpos in cpos_list:
|
||||||
|
lookups['cpos_lookup'][cpos] = {}
|
||||||
|
for attr in self.parent_corpus.positional_attributes:
|
||||||
|
cpos_attr_values = self.parent_corpus.cpos2str(attr, cpos_list)
|
||||||
|
for i, cpos in enumerate(cpos_list):
|
||||||
|
lookups['cpos_lookup'][cpos][attr] = cpos_attr_values[i]
|
||||||
|
for attr in self.parent_corpus.structural_attributes:
|
||||||
|
if self.parent_corpus.structural_attribute_has_values(attr):
|
||||||
|
continue
|
||||||
|
cpos_attr_ids = self.parent_corpus.cpos2struc(attr, cpos_list)
|
||||||
|
for i, cpos in enumerate(cpos_list):
|
||||||
|
if cpos_attr_ids[i] != -1:
|
||||||
|
lookups['cpos_lookup'][cpos][attr] = cpos_attr_ids[i]
|
||||||
|
occured_attr_ids = list(set(cpos_attr_ids))
|
||||||
|
occured_attr_ids = list(filter(lambda x: x != -1,
|
||||||
|
occured_attr_ids))
|
||||||
|
if len(occured_attr_ids) == 0:
|
||||||
|
continue
|
||||||
|
attr_subattrs = \
|
||||||
|
list(filter(lambda x: x.startswith(attr + '_'),
|
||||||
|
self.parent_corpus.structural_attributes))
|
||||||
|
attr_subattrs = list(map(lambda x: x.split('_', 1)[1],
|
||||||
|
attr_subattrs))
|
||||||
|
if len(attr_subattrs) == 0:
|
||||||
|
continue
|
||||||
|
lookups[attr + '_lookup'] = {}
|
||||||
|
for attr_id in occured_attr_ids:
|
||||||
|
lookups[attr + '_lookup'][attr_id] = {}
|
||||||
|
for subattr in attr_subattrs:
|
||||||
|
__subattr = attr + '_' + subattr
|
||||||
|
subattr_values = \
|
||||||
|
self.parent_corpus.struc2str(__subattr, occured_attr_ids)
|
||||||
|
for i, value in enumerate(subattr_values):
|
||||||
|
lookups[attr + '_lookup'][occured_attr_ids[i]][subattr] = \
|
||||||
|
value
|
||||||
|
return {'matches': matches, **lookups}
|
||||||
|
|
||||||
|
def fdist_1(self, cutoff, field, attribute):
|
||||||
|
return self.client.cqp_fdist_1(self.__name, cutoff, field, attribute)
|
||||||
|
|
||||||
|
def fdist_2(self, cutoff, field_1, attribute_1, field_2, attribute_2):
|
||||||
|
return self.client.cqp_fdist_2(self.__name, cutoff, field_1,
|
||||||
|
attribute_1, field_2, attribute_2)
|
5
app/corpora/cqi/version.py
Normal file
5
app/corpora/cqi/version.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from .constants import MAJOR_VERSION, MINOR_VERSION
|
||||||
|
|
||||||
|
|
||||||
|
version = '{}.{}'.format(MAJOR_VERSION, MINOR_VERSION)
|
||||||
|
version_info = (MAJOR_VERSION, MINOR_VERSION)
|
@ -1,9 +1,9 @@
|
|||||||
from app import logger # only works if imported into opaque web app
|
from .api import APIClient
|
||||||
from . import CQi
|
from .constants import CONST_FIELD_MATCH, CONST_FIELD_MATCHEND
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
class CQiWrapper(CQi.APIClient):
|
class CQiWrapper(APIClient):
|
||||||
'''
|
'''
|
||||||
CQIiWrapper object
|
CQIiWrapper object
|
||||||
|
|
||||||
@ -55,7 +55,7 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
self.attr_strings['struct_attrs'][struct_attr] = (self.corpus_name
|
self.attr_strings['struct_attrs'][struct_attr] = (self.corpus_name
|
||||||
+ '.'
|
+ '.'
|
||||||
+ struct_attr)
|
+ struct_attr)
|
||||||
logger.warning(('All positional and '
|
print(('All positional and '
|
||||||
'structural attributes: {}').format(self.attr_strings))
|
'structural attributes: {}').format(self.attr_strings))
|
||||||
|
|
||||||
def select_corpus(self, corpus_name):
|
def select_corpus(self, corpus_name):
|
||||||
@ -67,9 +67,9 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
if corpus_name in self.corpus_list_coprora():
|
if corpus_name in self.corpus_list_coprora():
|
||||||
self.corpus_name = corpus_name
|
self.corpus_name = corpus_name
|
||||||
self.__create_attribute_strings()
|
self.__create_attribute_strings()
|
||||||
logger.warning('{} does exist.'.format(corpus_name))
|
print('{} does exist.'.format(corpus_name))
|
||||||
else:
|
else:
|
||||||
logger.warning('{} does not exist.'.format(corpus_name))
|
print('{} does not exist.'.format(corpus_name))
|
||||||
raise Exception('Given Corpus Name is not in corpora list.')
|
raise Exception('Given Corpus Name is not in corpora list.')
|
||||||
|
|
||||||
def disconnect(self):
|
def disconnect(self):
|
||||||
@ -80,7 +80,7 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
'''
|
'''
|
||||||
self.ctrl_bye()
|
self.ctrl_bye()
|
||||||
super(CQiWrapper, self).teardown()
|
super(CQiWrapper, self).teardown()
|
||||||
logger.warning('Disconnected from cqp server.')
|
print('Disconnected from cqp server.')
|
||||||
|
|
||||||
def query_subcorpus(self, query, result_subcorpus_name='Query-results'):
|
def query_subcorpus(self, query, result_subcorpus_name='Query-results'):
|
||||||
'''
|
'''
|
||||||
@ -101,7 +101,7 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
+ result_subcorpus_name)
|
+ result_subcorpus_name)
|
||||||
self.SUBCORPUS_NAMES.append(self.result_subcorpus)
|
self.SUBCORPUS_NAMES.append(self.result_subcorpus)
|
||||||
self.match_count = self.cqp_subcorpus_size(self.result_subcorpus)
|
self.match_count = self.cqp_subcorpus_size(self.result_subcorpus)
|
||||||
logger.warning('Nr of all matches is: {}'.format(self.match_count))
|
print('Nr of all matches is: {}'.format(self.match_count))
|
||||||
|
|
||||||
def show_subcorpora(self):
|
def show_subcorpora(self):
|
||||||
'''
|
'''
|
||||||
@ -137,7 +137,7 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
)
|
)
|
||||||
self.nr_matches = min(result_len, self.match_count)
|
self.nr_matches = min(result_len, self.match_count)
|
||||||
if self.match_count == 0:
|
if self.match_count == 0:
|
||||||
logger.warning('Query resulted in 0 matches.')
|
print('Query resulted in 0 matches.')
|
||||||
self.results = {'code': 0,
|
self.results = {'code': 0,
|
||||||
'result': {'matches': [],
|
'result': {'matches': [],
|
||||||
'match_count': self.match_count,
|
'match_count': self.match_count,
|
||||||
@ -151,15 +151,15 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
# pair of cpositions
|
# pair of cpositions
|
||||||
# [(1355, 1357), (1477, 1479)] Example for two boundry pairs
|
# [(1355, 1357), (1477, 1479)] Example for two boundry pairs
|
||||||
offset_start = 0 if result_offset == 0 else result_offset
|
offset_start = 0 if result_offset == 0 else result_offset
|
||||||
logger.warning('Offset start is: {}'.format(offset_start))
|
print('Offset start is: {}'.format(offset_start))
|
||||||
offset_end = min((self.nr_matches + result_offset - 1), self.match_count - 1)
|
offset_end = min((self.nr_matches + result_offset - 1), self.match_count - 1)
|
||||||
logger.warning('Offset end is: {}'.format(offset_end))
|
print('Offset end is: {}'.format(offset_end))
|
||||||
match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus,
|
match_boundaries = zip(self.cqp_dump_subcorpus(self.result_subcorpus,
|
||||||
CQi.CONST_FIELD_MATCH,
|
CONST_FIELD_MATCH,
|
||||||
offset_start,
|
offset_start,
|
||||||
offset_end),
|
offset_end),
|
||||||
self.cqp_dump_subcorpus(self.result_subcorpus,
|
self.cqp_dump_subcorpus(self.result_subcorpus,
|
||||||
CQi.CONST_FIELD_MATCHEND,
|
CONST_FIELD_MATCHEND,
|
||||||
offset_start,
|
offset_start,
|
||||||
offset_end))
|
offset_end))
|
||||||
|
|
||||||
@ -189,8 +189,8 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
len_all_cpos = len(all_cpos)
|
len_all_cpos = len(all_cpos)
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
t_total = t1 - t0
|
t_total = t1 - t0
|
||||||
logger.warning('Time to create all CPOS for query: {}'.format(t_total))
|
print('Time to create all CPOS for query: {}'.format(t_total))
|
||||||
logger.warning('Requesting {} CPOS with one query.'.format(len_all_cpos))
|
print('Requesting {} CPOS with one query.'.format(len_all_cpos))
|
||||||
|
|
||||||
# Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for
|
# Get cpos informations like CORPUS_NAME.word or CORPUS_NAME.lemma for
|
||||||
# all cpos entries in all_cpos_list
|
# all cpos entries in all_cpos_list
|
||||||
@ -199,7 +199,7 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
all_cpos_infos, text_lookup = self.get_cpos_infos(all_cpos)
|
all_cpos_infos, text_lookup = self.get_cpos_infos(all_cpos)
|
||||||
t3 = time.time()
|
t3 = time.time()
|
||||||
t_final = t3 - t2
|
t_final = t3 - t2
|
||||||
logger.warning('Got infos for {} CPOS in {} seconds:'.format(len_all_cpos,
|
print('Got infos for {} CPOS in {} seconds:'.format(len_all_cpos,
|
||||||
t_final))
|
t_final))
|
||||||
self.results = {'code': 0,
|
self.results = {'code': 0,
|
||||||
'result': {'matches': all_matches,
|
'result': {'matches': all_matches,
|
||||||
@ -233,28 +233,28 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
tmp_info[struct_attr_key].append(id)
|
tmp_info[struct_attr_key].append(id)
|
||||||
else:
|
else:
|
||||||
structs_to_check.append({key: struct_attr_key})
|
structs_to_check.append({key: struct_attr_key})
|
||||||
logger.warning('Structs to check: {}'.format(structs_to_check))
|
print('Structs to check: {}'.format(structs_to_check))
|
||||||
struct_attr_values = list(tmp_info.values())
|
struct_attr_values = list(tmp_info.values())
|
||||||
# logger.warning('Struct attr value list: {}'.format(struct_attr_values))
|
# print('Struct attr value list: {}'.format(struct_attr_values))
|
||||||
struct_attr_keys = list(tmp_info.keys())
|
struct_attr_keys = list(tmp_info.keys())
|
||||||
# logger.warning('Struct attr key list: {}'.format(struct_attr_keys))
|
# print('Struct attr key list: {}'.format(struct_attr_keys))
|
||||||
|
|
||||||
# Build textlookup dictionary
|
# Build textlookup dictionary
|
||||||
text_lookup_ids = list(set(struct_attr_values[0])) # every CPOS is associated with one text id. A set is build to only gather text_lookup informations for every unique text id
|
text_lookup_ids = list(set(struct_attr_values[0])) # every CPOS is associated with one text id. A set is build to only gather text_lookup informations for every unique text id
|
||||||
text_lookup = {} # final dict containing all info of one text identified by its id
|
text_lookup = {} # final dict containing all info of one text identified by its id
|
||||||
for d in structs_to_check:
|
for d in structs_to_check:
|
||||||
s_key, s_value = zip(*d.items())
|
s_key, s_value = zip(*d.items())
|
||||||
logger.warning('dict entries: {}: {}'.format(s_key, s_value))
|
print('dict entries: {}: {}'.format(s_key, s_value))
|
||||||
s_value = s_value[0].split('_', 1)[-1]
|
s_value = s_value[0].split('_', 1)[-1]
|
||||||
logger.warning('S_VALUE: {}'.format(s_value))
|
print('S_VALUE: {}'.format(s_value))
|
||||||
struct_values = self.cl_struc2str(s_key[0], text_lookup_ids)
|
struct_values = self.cl_struc2str(s_key[0], text_lookup_ids)
|
||||||
logger.warning('Extracted Value with key {}: {}'.format(s_key[0], struct_values))
|
print('Extracted Value with key {}: {}'.format(s_key[0], struct_values))
|
||||||
zipped = dict(zip(text_lookup_ids, struct_values))
|
zipped = dict(zip(text_lookup_ids, struct_values))
|
||||||
for zip_key, zip_value in zipped.items():
|
for zip_key, zip_value in zipped.items():
|
||||||
logger.warning('Text id as key is: {}'.format(zip_key))
|
print('Text id as key is: {}'.format(zip_key))
|
||||||
logger.warning('Value of this text is: {}'.format(zip_value))
|
print('Value of this text is: {}'.format(zip_value))
|
||||||
check = text_lookup.get(zip_key)
|
check = text_lookup.get(zip_key)
|
||||||
logger.warning('check: {}'.format(check))
|
print('check: {}'.format(check))
|
||||||
if check is None:
|
if check is None:
|
||||||
text_lookup[zip_key] = {s_value: zip_value}
|
text_lookup[zip_key] = {s_value: zip_value}
|
||||||
else:
|
else:
|
||||||
@ -287,14 +287,14 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
first_cpos, last_cpos = match_cpos_list[0], match_cpos_list[-1]
|
first_cpos, last_cpos = match_cpos_list[0], match_cpos_list[-1]
|
||||||
context_sentences = {}
|
context_sentences = {}
|
||||||
s_ids = self.cl_cpos2struc(key, [first_cpos, last_cpos])
|
s_ids = self.cl_cpos2struc(key, [first_cpos, last_cpos])
|
||||||
logger.warning('s id match: {}'.format(s_ids))
|
print('s id match: {}'.format(s_ids))
|
||||||
for s_id in s_ids:
|
for s_id in s_ids:
|
||||||
s_start, s_end = self.cl_struc2cpos(key, s_id)
|
s_start, s_end = self.cl_struc2cpos(key, s_id)
|
||||||
s_cpos = list(range(s_start, s_end + 1))
|
s_cpos = list(range(s_start, s_end + 1))
|
||||||
context_sentences[s_id] = s_cpos
|
context_sentences[s_id] = s_cpos
|
||||||
if get_surrounding_s:
|
if get_surrounding_s:
|
||||||
max_s_id = self.cl_attribute_size(key) - 1
|
max_s_id = self.cl_attribute_size(key) - 1
|
||||||
logger.warning('max sid: {}'.format(max_s_id))
|
print('max sid: {}'.format(max_s_id))
|
||||||
additional_s_ids = []
|
additional_s_ids = []
|
||||||
additional_s = list(range(max(s_ids[0]
|
additional_s = list(range(max(s_ids[0]
|
||||||
- l_r_s_context_additional_len,
|
- l_r_s_context_additional_len,
|
||||||
@ -304,7 +304,7 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
max_s_id) + 1))
|
max_s_id) + 1))
|
||||||
additional_s_ids.extend(additional_s)
|
additional_s_ids.extend(additional_s)
|
||||||
for s_id in additional_s_ids:
|
for s_id in additional_s_ids:
|
||||||
logger.warning('s id additional: {}'.format(s_id))
|
print('s id additional: {}'.format(s_id))
|
||||||
s_start, s_end = self.cl_struc2cpos(key, s_id)
|
s_start, s_end = self.cl_struc2cpos(key, s_id)
|
||||||
s_cpos = list(range(s_start, s_end + 1))
|
s_cpos = list(range(s_start, s_end + 1))
|
||||||
context_sentences[s_id] = s_cpos
|
context_sentences[s_id] = s_cpos
|
||||||
@ -315,7 +315,7 @@ class CQiWrapper(CQi.APIClient):
|
|||||||
all_cpos_infos, text_lookup = self.get_cpos_infos(all_cpos)
|
all_cpos_infos, text_lookup = self.get_cpos_infos(all_cpos)
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
t_total = t1 - t0
|
t_total = t1 - t0
|
||||||
logger.warning('Got all sentences informations in {} seconds'. format(t_total))
|
print('Got all sentences informations in {} seconds'. format(t_total))
|
||||||
match_context = {'context_s_cpos': context_sentences,
|
match_context = {'context_s_cpos': context_sentences,
|
||||||
'cpos_lookup': all_cpos_infos,
|
'cpos_lookup': all_cpos_infos,
|
||||||
'text_lookup': text_lookup,
|
'text_lookup': text_lookup,
|
@ -1,9 +1,11 @@
|
|||||||
from app import db, logger, socketio
|
from app import db, logger, socketio
|
||||||
from app.events import connected_sessions
|
from app.events import connected_sessions
|
||||||
from app.models import Corpus, User
|
from app.models import Corpus, User
|
||||||
|
# from .cqi import CQiClient
|
||||||
|
from .cqi import CQiWrapper
|
||||||
from flask import current_app, request
|
from flask import current_app, request
|
||||||
from flask_login import current_user, login_required
|
from flask_login import current_user, login_required
|
||||||
from .CQiWrapper.CQiWrapper import CQiWrapper
|
# import math
|
||||||
|
|
||||||
'''
|
'''
|
||||||
' A dictionary containing lists of, with corpus ids associated, Socket.IO
|
' A dictionary containing lists of, with corpus ids associated, Socket.IO
|
||||||
@ -34,8 +36,10 @@ def corpus_analysis_query(query):
|
|||||||
socketio.emit('query', '[424]: Failed Dependency', room=request.sid)
|
socketio.emit('query', '[424]: Failed Dependency', room=request.sid)
|
||||||
return
|
return
|
||||||
client.select_corpus('CORPUS')
|
client.select_corpus('CORPUS')
|
||||||
|
# corpus = client.corpora.get('CORPUS')
|
||||||
try:
|
try:
|
||||||
client.query_subcorpus(query)
|
client.query_subcorpus(query)
|
||||||
|
# results = corpus.query(query)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(e)
|
logger.warning(e)
|
||||||
response = str(e)
|
response = str(e)
|
||||||
@ -52,6 +56,23 @@ def corpus_analysis_query(query):
|
|||||||
result_offset=chunk_start)
|
result_offset=chunk_start)
|
||||||
socketio.emit('corpus_analysis_query', chunk, room=request.sid)
|
socketio.emit('corpus_analysis_query', chunk, room=request.sid)
|
||||||
chunk_start += chunk_size
|
chunk_start += chunk_size
|
||||||
|
'''
|
||||||
|
chunk_size = 100
|
||||||
|
chunk_start = 0
|
||||||
|
context = 100
|
||||||
|
progress = 0
|
||||||
|
while chunk_start <= results.size:
|
||||||
|
chunk = results.dump_results(context=context,
|
||||||
|
first_result=chunk_start,
|
||||||
|
num_results=chunk_size)
|
||||||
|
progress = ((chunk_start + chunk_size) / results.size) * 100
|
||||||
|
progress = min(100, int(math.ceil(progress)))
|
||||||
|
socketio.emit('corpus_analysis_query',
|
||||||
|
{'chunk': chunk, 'progress': progress,
|
||||||
|
'match_count': results.size},
|
||||||
|
room=request.sid)
|
||||||
|
chunk_start += chunk_size
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
@socketio.on('inspect_match')
|
@socketio.on('inspect_match')
|
||||||
@ -86,6 +107,7 @@ def corpus_analysis_session_handler(app, corpus_id, user_id, session_id):
|
|||||||
db.session.refresh(corpus)
|
db.session.refresh(corpus)
|
||||||
socketio.sleep(3)
|
socketio.sleep(3)
|
||||||
client = CQiWrapper(host='corpus_{}_analysis'.format(corpus_id))
|
client = CQiWrapper(host='corpus_{}_analysis'.format(corpus_id))
|
||||||
|
# client = CQiClient('corpus_{}_analysis'.format(corpus_id))
|
||||||
try:
|
try:
|
||||||
client.connect()
|
client.connect()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -75,7 +75,6 @@ function recieveResults(response) {
|
|||||||
let queryResultsInteractionElement;
|
let queryResultsInteractionElement;
|
||||||
let queryResultsHeadElement;
|
let queryResultsHeadElement;
|
||||||
let queryStatus;
|
let queryStatus;
|
||||||
|
|
||||||
queryFinished = false;
|
queryFinished = false;
|
||||||
|
|
||||||
// ERROR code checking
|
// ERROR code checking
|
||||||
@ -97,6 +96,7 @@ function recieveResults(response) {
|
|||||||
}
|
}
|
||||||
// logs the current recieved chunk
|
// logs the current recieved chunk
|
||||||
chunk = response["result"];
|
chunk = response["result"];
|
||||||
|
//chunk = response["chunk"];
|
||||||
console.log("### corpus_analysis chunk ###");
|
console.log("### corpus_analysis chunk ###");
|
||||||
console.log(chunk);
|
console.log(chunk);
|
||||||
// logs and extends/push/update the current recieved chunk to the
|
// logs and extends/push/update the current recieved chunk to the
|
||||||
@ -106,6 +106,7 @@ function recieveResults(response) {
|
|||||||
Object.assign(result["cpos_lookup"], chunk["cpos_lookup"]);
|
Object.assign(result["cpos_lookup"], chunk["cpos_lookup"]);
|
||||||
Object.assign(result["text_lookup"], chunk["text_lookup"]);
|
Object.assign(result["text_lookup"], chunk["text_lookup"]);
|
||||||
result["match_count"] = chunk["match_count"];
|
result["match_count"] = chunk["match_count"];
|
||||||
|
//result["match_count"] = response["match_count"];
|
||||||
console.log("Before Current match count", result["loaded_match_count"]);
|
console.log("Before Current match count", result["loaded_match_count"]);
|
||||||
queryData = getQueryData(queryFormElement);
|
queryData = getQueryData(queryFormElement);
|
||||||
result["query"] = queryData["query"];
|
result["query"] = queryData["query"];
|
||||||
@ -396,4 +397,4 @@ function download(downloadElem, dataStr, filename, type, filenameSlug) {
|
|||||||
downloadElem.href = url;
|
downloadElem.href = url;
|
||||||
downloadElem.download = filename;
|
downloadElem.download = filename;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -45,7 +45,7 @@
|
|||||||
<script src="{{ url_for('static', filename='js/JSONPatch.js/jsonpatch.min.js') }}"></script>
|
<script src="{{ url_for('static', filename='js/JSONPatch.js/jsonpatch.min.js') }}"></script>
|
||||||
<script src="{{ url_for('static', filename='js/Dark_Reader/darkreader.js') }}"></script>
|
<script src="{{ url_for('static', filename='js/Dark_Reader/darkreader.js') }}"></script>
|
||||||
<script src="{{ url_for('static', filename='js/List.js/list.min.js') }}"></script>
|
<script src="{{ url_for('static', filename='js/List.js/list.min.js') }}"></script>
|
||||||
<script src="{{ url_for('static', filename='js/Socket.IO/socket.io.slim.js') }}"></script>
|
<script src="{{ url_for('static', filename='js/Socket.IO/socket.io.js') }}"></script>
|
||||||
<script src="{{ url_for('static', filename='js/nopaque.js') }}"></script>
|
<script src="{{ url_for('static', filename='js/nopaque.js') }}"></script>
|
||||||
<script src="{{ url_for('static', filename='js/nopaque.lists.js') }}"></script>
|
<script src="{{ url_for('static', filename='js/nopaque.lists.js') }}"></script>
|
||||||
</head>
|
</head>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user