Merge branch 'visualizations-update' into development

This commit is contained in:
Patrick Jentsch 2023-07-26 11:32:11 +02:00
commit 983400b925
60 changed files with 4296 additions and 1584 deletions

View File

@ -74,8 +74,10 @@ def create_app(config: Config = Config) -> Flask:
app.register_blueprint(contributions_blueprint, url_prefix='/contributions') app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
from .corpora import bp as corpora_blueprint from .corpora import bp as corpora_blueprint
from .corpora.cqi_over_sio import CQiNamespace
default_breadcrumb_root(corpora_blueprint, '.corpora') default_breadcrumb_root(corpora_blueprint, '.corpora')
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora') app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
socketio.on_namespace(CQiNamespace('/cqi_over_sio'))
from .errors import bp as errors_bp from .errors import bp as errors_bp
app.register_blueprint(errors_bp) app.register_blueprint(errors_bp)
@ -100,4 +102,7 @@ def create_app(config: Config = Config) -> Flask:
default_breadcrumb_root(users_blueprint, '.users') default_breadcrumb_root(users_blueprint, '.users')
app.register_blueprint(users_blueprint, url_prefix='/users') app.register_blueprint(users_blueprint, url_prefix='/users')
from .workshops import bp as workshops_blueprint
app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
return app return app

View File

@ -16,4 +16,4 @@ def before_request():
pass pass
from . import cli, cqi_over_socketio, files, followers, routes, json_routes from . import cli, files, followers, routes, json_routes

View File

@ -19,6 +19,9 @@ def reset():
for corpus in [x for x in Corpus.query.all() if x.status in status]: for corpus in [x for x in Corpus.query.all() if x.status in status]:
print(f'Resetting corpus {corpus}') print(f'Resetting corpus {corpus}')
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True) shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
os.mkdir(os.path.join(corpus.path, 'cwb'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
corpus.status = CorpusStatus.UNPREPARED corpus.status = CorpusStatus.UNPREPARED
corpus.num_analysis_sessions = 0 corpus.num_analysis_sessions = 0
db.session.commit() db.session.commit()

View File

@ -0,0 +1,199 @@
from cqi import CQiClient
from cqi.errors import CQiException
from cqi.status import CQiStatus
from flask import session
from flask_login import current_user
from flask_socketio import Namespace
from inspect import signature
from threading import Lock
from typing import Callable, Dict, List
from app import db, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusStatus
from . import extensions
'''
This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
Basic concept:
1. A client connects to the "/cqi_over_sio" namespace.
2. The client emits the "init" event and provides a corpus id for the corpus
that should be analysed in this session.
1.1 The analysis session counter of the corpus is incremented.
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
1.3 Wait until the CQP server is running.
1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient, the Lock and the corpus id in the session for
subsequential use.
2. The client emits the "exec" event provides the name of a CQi API function
arguments (optional).
- The event "exec" handler will execute the function, make sure that the
result is serializable and returns the result back to the client.
4. Wait for more events
5. The client disconnects from the "/cqi_over_sio" namespace
1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
'''
CQI_API_FUNCTION_NAMES: List[str] = [
'ask_feature_cl_2_3',
'ask_feature_cqi_1_0',
'ask_feature_cqp_2_3',
'cl_alg2cpos',
'cl_attribute_size',
'cl_cpos2alg',
'cl_cpos2id',
'cl_cpos2lbound',
'cl_cpos2rbound',
'cl_cpos2str',
'cl_cpos2struc',
'cl_drop_attribute',
'cl_id2cpos',
'cl_id2freq',
'cl_id2str',
'cl_idlist2cpos',
'cl_lexicon_size',
'cl_regex2id',
'cl_str2id',
'cl_struc2cpos',
'cl_struc2str',
'corpus_alignment_attributes',
'corpus_charset',
'corpus_drop_corpus',
'corpus_full_name',
'corpus_info',
'corpus_list_corpora',
'corpus_positional_attributes',
'corpus_properties',
'corpus_structural_attribute_has_values',
'corpus_structural_attributes',
'cqp_drop_subcorpus',
'cqp_dump_subcorpus',
'cqp_fdist_1',
'cqp_fdist_2',
'cqp_list_subcorpora',
'cqp_query',
'cqp_subcorpus_has_field',
'cqp_subcorpus_size',
'ctrl_bye',
'ctrl_connect',
'ctrl_last_general_error',
'ctrl_ping',
'ctrl_user_abort'
]
class CQiNamespace(Namespace):
@socketio_login_required
def on_connect(self):
pass
@socketio_login_required
def on_init(self, db_corpus_hashid: str):
db_corpus_id = hashids.decode(db_corpus_hashid)
db_corpus = Corpus.query.get(db_corpus_id)
if db_corpus is None:
return {'code': 404, 'msg': 'Not Found'}
if not (db_corpus.user == current_user
or current_user.is_following_corpus(db_corpus)
or current_user.is_administrator()):
return {'code': 403, 'msg': 'Forbidden'}
if db_corpus.status not in [
CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION
]:
return {'code': 424, 'msg': 'Failed Dependency'}
if db_corpus.num_analysis_sessions is None:
db_corpus.num_analysis_sessions = 0
db.session.commit()
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
db.session.commit()
retry_counter = 20
while db_corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
if retry_counter == 0:
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
return {'code': 408, 'msg': 'Request Timeout'}
socketio.sleep(3)
retry_counter -= 1
db.session.refresh(db_corpus)
cqi_client = CQiClient(f'cqpserver_{db_corpus_id}', timeout=float('inf'))
session['cqi_over_sio'] = {}
session['cqi_over_sio']['cqi_client'] = cqi_client
session['cqi_over_sio']['cqi_client_lock'] = Lock()
session['cqi_over_sio']['db_corpus_id'] = db_corpus_id
return {'code': 200, 'msg': 'OK'}
@socketio_login_required
def on_exec(self, fn_name: str, fn_args: Dict = {}):
try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
except KeyError:
return {'code': 424, 'msg': 'Failed Dependency'}
if fn_name in CQI_API_FUNCTION_NAMES:
fn: Callable = getattr(cqi_client.api, fn_name)
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
fn: Callable = getattr(extensions, fn_name)
else:
return {'code': 400, 'msg': 'Bad Request'}
for param in signature(fn).parameters.values():
if param.default is param.empty:
if param.name not in fn_args:
return {'code': 400, 'msg': 'Bad Request'}
else:
if param.name not in fn_args:
continue
if type(fn_args[param.name]) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
cqi_client_lock.acquire()
try:
fn_return_value = fn(**fn_args)
except BrokenPipeError as e:
return {'code': 500, 'msg': 'Internal Server Error'}
except CQiException as e:
return {
'code': 502,
'msg': 'Bad Gateway',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.__class__.__name__
}
}
finally:
cqi_client_lock.release()
if isinstance(fn_return_value, CQiStatus):
payload = {
'code': fn_return_value.code,
'msg': fn_return_value.__class__.__name__
}
else:
payload = fn_return_value
return {'code': 200, 'msg': 'OK', 'payload': payload}
def on_disconnect(self):
try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
except KeyError:
return
cqi_client_lock.acquire()
try:
session.pop('cqi_over_sio')
except KeyError:
pass
try:
cqi_client.api.ctrl_bye()
except (BrokenPipeError, CQiException):
pass
cqi_client_lock.release()
db_corpus = Corpus.query.get(db_corpus_id)
if db_corpus is not None:
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()

View File

@ -0,0 +1,316 @@
from collections import Counter
from cqi import CQiClient
from cqi.models.corpora import Corpus as CQiCorpus
from cqi.models.attributes import (
PositionalAttribute as CQiPositionalAttribute,
StructuralAttribute as CQiStructuralAttribute
)
from cqi.status import StatusOk as CQiStatusOk
from flask import session
from typing import Dict, List
import gzip
import json
import math
import os
from app import db
from app.models import Corpus
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
'ext_corpus_update_db',
'ext_corpus_static_data',
'ext_corpus_paginate_corpus',
'ext_cqp_paginate_subcorpus',
'ext_cqp_partial_export_subcorpus',
'ext_cqp_export_subcorpus',
]
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
db_corpus.num_tokens = cqi_corpus.size
db.session.commit()
return CQiStatusOk()
def ext_corpus_static_data(corpus: str) -> Dict:
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
cache_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
if os.path.exists(cache_file_path):
with open(cache_file_path, 'rb') as f:
return f.read()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
cqi_p_attrs: Dict[str, CQiPositionalAttribute] = {
p_attr.name: p_attr
for p_attr in cqi_corpus.positional_attributes.list()
}
cqi_s_attrs: Dict[str, CQiStructuralAttribute] = {
s_attr.name: s_attr
for s_attr in cqi_corpus.structural_attributes.list()
}
static_corpus_data = {
'corpus': {
'bounds': [0, cqi_corpus.size - 1],
'counts': {
'token': cqi_corpus.size
},
'freqs': {}
},
'p_attrs': {},
's_attrs': {},
'values': {'p_attrs': {}, 's_attrs': {}}
}
for p_attr in cqi_p_attrs.values():
static_corpus_data['corpus']['freqs'][p_attr.name] = {}
chunk_size = 10000
p_attr_id_list = list(range(p_attr.lexicon_size))
chunks = [p_attr_id_list[i:i+chunk_size] for i in range(0, len(p_attr_id_list), chunk_size)]
del p_attr_id_list
for chunk in chunks:
# print(f'corpus.freqs.{p_attr.name}: {chunk[0]} - {chunk[-1]}')
static_corpus_data['corpus']['freqs'][p_attr.name].update(
dict(zip(chunk, p_attr.freqs_by_ids(chunk)))
)
del chunks
static_corpus_data['p_attrs'][p_attr.name] = {}
cpos_list = list(range(cqi_corpus.size))
chunks = [cpos_list[i:i+chunk_size] for i in range(0, len(cpos_list), chunk_size)]
del cpos_list
for chunk in chunks:
# print(f'p_attrs.{p_attr.name}: {chunk[0]} - {chunk[-1]}')
static_corpus_data['p_attrs'][p_attr.name].update(
dict(zip(chunk, p_attr.ids_by_cpos(chunk)))
)
del chunks
static_corpus_data['values']['p_attrs'][p_attr.name] = {}
p_attr_id_list = list(range(p_attr.lexicon_size))
chunks = [p_attr_id_list[i:i+chunk_size] for i in range(0, len(p_attr_id_list), chunk_size)]
del p_attr_id_list
for chunk in chunks:
# print(f'values.p_attrs.{p_attr.name}: {chunk[0]} - {chunk[-1]}')
static_corpus_data['values']['p_attrs'][p_attr.name].update(
dict(zip(chunk, p_attr.values_by_ids(chunk)))
)
del chunks
for s_attr in cqi_s_attrs.values():
if s_attr.has_values:
continue
static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size
static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
static_corpus_data['values']['s_attrs'][s_attr.name] = {}
##########################################################################
# A faster way to get cpos boundaries for smaller s_attrs #
##########################################################################
# if s_attr.name in ['s', 'ent']:
# cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
# first_match = 0
# last_match = cqi_subcorpus.size - 1
# match_boundaries = zip(
# range(first_match, last_match + 1),
# cqi_subcorpus.dump(cqi_subcorpus.fields['match'], first_match, last_match),
# cqi_subcorpus.dump(cqi_subcorpus.fields['matchend'], first_match, last_match)
# )
# for id, lbound, rbound in match_boundaries:
# static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
# static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
# static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
# static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
# cqi_subcorpus.drop()
for id in range(0, s_attr.size):
# print(f's_attrs.{s_attr.name}.lexicon.{id}')
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {
'bounds': None,
'counts': None,
'freqs': None
}
if s_attr.name != 'text':
continue
lbound, rbound = s_attr.cpos_by_id(id)
# print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
# print(f's_attrs.{s_attr.name}.lexicon.{id}.counts')
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
cpos_list = list(range(lbound, rbound + 1))
chunks = [cpos_list[i:i+chunk_size] for i in range(0, len(cpos_list), chunk_size)]
del cpos_list
ent_ids = set()
for chunk in chunks:
# print(f'Gather ent_ids from cpos: {chunk[0]} - {chunk[-1]}')
ent_ids.update({x for x in cqi_s_attrs['ent'].ids_by_cpos(chunk) if x != -1})
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len(ent_ids)
del ent_ids
s_ids = set()
for chunk in chunks:
# print(f'Gather s_ids from cpos: {chunk[0]} - {chunk[-1]}')
s_ids.update({x for x in cqi_s_attrs['s'].ids_by_cpos(chunk) if x != -1})
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len(s_ids)
del s_ids
# print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs')
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
for p_attr in cqi_p_attrs.values():
p_attr_ids = []
for chunk in chunks:
# print(f'Gather p_attr_ids from cpos: {chunk[0]} - {chunk[-1]}')
p_attr_ids.extend(p_attr.ids_by_cpos(chunk))
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
del p_attr_ids
del chunks
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
s_attr_value_names: List[str] = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
s_attr_id_list = list(range(s_attr.size))
chunks = [s_attr_id_list[i:i+chunk_size] for i in range(0, len(s_attr_id_list), chunk_size)]
del s_attr_id_list
sub_s_attr_values = []
for sub_s_attr in sub_s_attrs:
tmp = []
for chunk in chunks:
tmp.extend(sub_s_attr.values_by_ids(chunk))
sub_s_attr_values.append(tmp)
del tmp
del chunks
# print(f's_attrs.{s_attr.name}.values')
static_corpus_data['s_attrs'][s_attr.name]['values'] = s_attr_value_names
# print(f'values.s_attrs.{s_attr.name}')
static_corpus_data['values']['s_attrs'][s_attr.name] = {
s_attr_id: {
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
for s_attr_value_name_idx, s_attr_value_name in enumerate(
static_corpus_data['s_attrs'][s_attr.name]['values']
)
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
}
del sub_s_attr_values
with gzip.open(cache_file_path, 'wt') as f:
json.dump(static_corpus_data, f)
del static_corpus_data
with open(cache_file_path, 'rb') as f:
return f.read()
def ext_corpus_paginate_corpus(
corpus: str,
page: int = 1,
per_page: int = 20
) -> Dict:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_corpus.size > 0
and page > math.ceil(cqi_corpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
# the lookups for the items
payload['lookups'] = lookups
# the total number of items matching the query
payload['total'] = cqi_corpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return payload
def ext_cqp_paginate_subcorpus(
subcorpus: str,
context: int = 50,
page: int = 1,
per_page: int = 20
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_subcorpus.size > 0
and page > math.ceil(cqi_subcorpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
payload['items'] = cqi_results_export.pop('matches')
# the lookups for the items
payload['lookups'] = cqi_results_export
# the total number of items matching the query
payload['total'] = cqi_subcorpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return payload
def ext_cqp_partial_export_subcorpus(
subcorpus: str,
match_id_list: list,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return cqi_subcorpus_partial_export
def ext_cqp_export_subcorpus(
subcorpus: str,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
return cqi_subcorpus_export

View File

@ -1,64 +1,10 @@
from flask import session from cqi.models.corpora import Corpus
from functools import wraps from cqi.models.subcorpora import Subcorpus
from inspect import signature from typing import Dict, List
import cqi from app.models import Corpus
def cqi_over_socketio(f): def lookups_by_cpos(corpus: Corpus, cpos_list: List[int]) -> Dict:
@wraps(f)
def wrapped(*args):
if 'd' not in session:
return {'code': 424, 'msg': 'Failed Dependency'}
f_args = {}
# Check for missing args and if all provided args are of the right type
for param in signature(f).parameters.values():
if param.name == 'corpus_name':
f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}'
continue
if param.name == 'cqi_client':
f_args[param.name] = session['d']['cqi_client']
continue
if param.default is param.empty:
# args
if param.name not in args[0]:
return {'code': 400, 'msg': 'Bad Request'}
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
else:
# kwargs
if param.name not in args[0]:
continue
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
session['d']['cqi_client_lock'].acquire()
try:
return_value = f(**f_args)
except BrokenPipeError:
return_value = {
'code': 500,
'msg': 'Internal Server Error'
}
except cqi.errors.CQiException as e:
return_value = {
'code': 500,
'msg': 'Internal Server Error',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.name
}
}
finally:
session['d']['cqi_client_lock'].release()
return return_value
return wrapped
def lookups_by_cpos(corpus, cpos_list):
lookups = {} lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list} lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list(): for attr in corpus.positional_attributes.list():
@ -93,18 +39,22 @@ def lookups_by_cpos(corpus, cpos_list):
return lookups return lookups
def partial_export_subcorpus(subcorpus, match_id_list, context=25): def partial_export_subcorpus(
if subcorpus.attrs['size'] == 0: subcorpus: Subcorpus,
match_id_list: List[int],
context: int = 25
) -> Dict:
if subcorpus.size == 0:
return {"matches": []} return {"matches": []}
match_boundaries = [] match_boundaries = []
for match_id in match_id_list: for match_id in match_id_list:
if match_id < 0 or match_id >= subcorpus.attrs['size']: if match_id < 0 or match_id >= subcorpus.size:
continue continue
match_boundaries.append( match_boundaries.append(
( (
match_id, match_id,
subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0], subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0] subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
) )
) )
cpos_set = set() cpos_set = set()
@ -120,14 +70,14 @@ def partial_export_subcorpus(subcorpus, match_id_list, context=25):
lc_rbound = match_start - 1 lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound) lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0: if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None rc = None
cpos_list_rbound = match_end cpos_list_rbound = match_end
else: else:
rc_lbound = match_end + 1 rc_lbound = match_end + 1
rc_rbound = min( rc_rbound = min(
(match_end + context), (match_end + context),
(subcorpus.collection.corpus.attrs['size'] - 1) (subcorpus.collection.corpus.size - 1)
) )
rc = (rc_lbound, rc_rbound) rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound cpos_list_rbound = rc_rbound
@ -138,15 +88,20 @@ def partial_export_subcorpus(subcorpus, match_id_list, context=25):
return {'matches': matches, **lookups} return {'matches': matches, **lookups}
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0): def export_subcorpus(
if subcorpus.attrs['size'] == 0: subcorpus: Subcorpus,
context: int = 25,
cutoff: float = float('inf'),
offset: int = 0
) -> Dict:
if subcorpus.size == 0:
return {"matches": []} return {"matches": []}
first_match = max(0, offset) first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1)) last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
match_boundaries = zip( match_boundaries = zip(
list(range(first_match, last_match + 1)), range(first_match, last_match + 1),
subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match), subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match) subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
) )
cpos_set = set() cpos_set = set()
matches = [] matches = []
@ -160,14 +115,14 @@ def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
lc_rbound = match_start - 1 lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound) lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0: if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None rc = None
cpos_list_rbound = match_end cpos_list_rbound = match_end
else: else:
rc_lbound = match_end + 1 rc_lbound = match_end + 1
rc_rbound = min( rc_rbound = min(
(match_end + context), (match_end + context),
(subcorpus.collection.corpus.attrs['size'] - 1) (subcorpus.collection.corpus.size - 1)
) )
rc = (rc_lbound, rc_rbound) rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound cpos_list_rbound = rc_rbound

View File

@ -1,115 +0,0 @@
from flask import session
from flask_login import current_user
from flask_socketio import ConnectionRefusedError
from threading import Lock
import cqi
from app import db, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusStatus
'''
This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
This module only handles the SIO connect/disconnect, which handles the setup
and teardown of necessary ressources for later use. Each CQi function has a
corresponding SIO event. The event handlers are spread across the different
modules within this package.
Basic concept:
1. A client connects to the SIO namespace and provides the id of a corpus to be
analysed.
1.1 The analysis session counter of the corpus is incremented.
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
1.3 Wait until the CQP server is running.
1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient and the Lock in the session for subsequential use.
2. A client emits an event and may provide a single json object with necessary
arguments for the targeted CQi function.
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
- The event handler function defines all arguments. Hence the client
is sent as a single json object, the decorator decomposes it to fit
the functions signature. This also includes type checking and proper
use of the lock (acquire/release) mechanism.
4. Wait for more events
5. The client disconnects from the SIO namespace
1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
'''
NAMESPACE = '/corpora/corpus/corpus_analysis'
# Import all CQi over Socket.IO event handlers
from .cqi_corpora_corpus_subcorpora import * # noqa
from .cqi_corpora_corpus_structural_attributes import * # noqa
from .cqi_corpora_corpus_positional_attributes import * # noqa
from .cqi_corpora_corpus_alignment_attributes import * # noqa
from .cqi_corpora_corpus import * # noqa
from .cqi_corpora import * # noqa
from .cqi import * # noqa
@socketio.on('connect', namespace=NAMESPACE)
@socketio_login_required
def connect(auth):
# the auth variable is used in a hacky way. It contains the corpus id for
# which a corpus analysis session should be started.
corpus_id = hashids.decode(auth['corpus_id'])
corpus = Corpus.query.get(corpus_id)
if corpus is None:
# return {'code': 404, 'msg': 'Not Found'}
raise ConnectionRefusedError('Not Found')
if not (corpus.user == current_user
or current_user.is_following_corpus(corpus)
or current_user.is_administrator()):
# return {'code': 403, 'msg': 'Forbidden'}
raise ConnectionRefusedError('Forbidden')
if corpus.status not in [
CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION
]:
# return {'code': 424, 'msg': 'Failed Dependency'}
raise ConnectionRefusedError('Failed Dependency')
if corpus.num_analysis_sessions is None:
corpus.num_analysis_sessions = 0
db.session.commit()
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
db.session.commit()
retry_counter = 20
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
if retry_counter == 0:
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
return {'code': 408, 'msg': 'Request Timeout'}
socketio.sleep(3)
retry_counter -= 1
db.session.refresh(corpus)
cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}')
session['d'] = {
'corpus_id': corpus_id,
'cqi_client': cqi_client,
'cqi_client_lock': Lock(),
}
# return {'code': 200, 'msg': 'OK'}
@socketio.on('disconnect', namespace=NAMESPACE)
def disconnect():
if 'd' not in session:
return
session['d']['cqi_client_lock'].acquire()
try:
session['d']['cqi_client'].disconnect()
except (BrokenPipeError, cqi.errors.CQiException):
pass
session['d']['cqi_client_lock'].release()
corpus = Corpus.query.get(session['d']['corpus_id'])
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
session.pop('d')
# return {'code': 200, 'msg': 'OK'}

View File

@ -1,43 +0,0 @@
from socket import gaierror
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.connect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_connect(cqi_client: cqi.CQiClient):
try:
cqi_status = cqi_client.connect()
except gaierror as e:
return {
'code': 500,
'msg': 'Internal Server Error',
'payload': {'code': e.args[0], 'desc': e.args[1]}
}
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.disconnect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_disconnect(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.disconnect()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.ping', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_ping(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.ping()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,22 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = {**cqi_corpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_list(cqi_client: cqi.CQiClient):
payload = [{**x.attrs} for x in cqi_client.corpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,85 +0,0 @@
from flask import session
import cqi
import math
from app import db, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, lookups_by_cpos
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.drop()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.query(subcorpus_name, query)
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
corpus.num_tokens = cqi_client.corpora.get(corpus_name).attrs['size']
db.session.commit()
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_corpus.attrs['size'] > 0
and page > math.ceil(cqi_corpus.attrs['size'] / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
# the lookups for the items
payload['lookups'] = lookups
# the total number of items matching the query
payload['total'] = cqi_corpus.attrs['size']
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
payload = {**cqi_alignment_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
payload = {**cqi_positional_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
payload = {**cqi_structural_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,125 +0,0 @@
import cqi
import math
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
payload = {**cqi_subcorpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_status = cqi_subcorpus.drop()
payload = {'code': cqi_status,
'msg': cqi.api.specification.lookup[cqi_status]}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient):
return {'code': 501, 'msg': 'Not Implemented'}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_subcorpus.attrs['size'] > 0
and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
payload['items'] = cqi_results_export.pop('matches')
# the lookups for the items
payload['lookups'] = cqi_results_export
# the total number of items matching the query
payload['total'] = cqi_subcorpus.attrs['size']
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}

View File

@ -7,6 +7,8 @@ from app.decorators import content_negotiation
from app.models import Corpus, CorpusFollowerRole from app.models import Corpus, CorpusFollowerRole
from . import bp from . import bp
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
import nltk
from string import punctuation
@bp.route('/<hashid:corpus_id>', methods=['DELETE']) @bp.route('/<hashid:corpus_id>', methods=['DELETE'])
@ -56,6 +58,18 @@ def build_corpus(corpus_id):
} }
return response_data, 202 return response_data, 202
@bp.route('/stopwords')
@content_negotiation(produces='application/json')
def get_stopwords():
nltk.download('stopwords')
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
stopwords = {}
for language in languages:
stopwords[language] = nltk.corpus.stopwords.words(language)
stopwords['punctuation'] = list(punctuation) + ['', '|', '', '', '', '--']
stopwords['user_stopwords'] = []
response_data = stopwords
return response_data, 202
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST']) # @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
# @corpus_follower_permission_required('MANAGE_FOLLOWERS') # @corpus_follower_permission_required('MANAGE_FOLLOWERS')

View File

@ -28,19 +28,19 @@ def _create_build_corpus_service(corpus):
''' ## Command ## ''' ''' ## Command ## '''
command = ['bash', '-c'] command = ['bash', '-c']
command.append( command.append(
f'mkdir /corpora/data/nopaque_{corpus.id}' f'mkdir /corpora/data/nopaque-{corpus.hashid.lower()}'
' && ' ' && '
'cwb-encode' 'cwb-encode'
' -c utf8' ' -c utf8'
f' -d /corpora/data/nopaque_{corpus.id}' f' -d /corpora/data/nopaque-{corpus.hashid.lower()}'
' -f /root/files/corpus.vrt' ' -f /root/files/corpus.vrt'
f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}' f' -R /usr/local/share/cwb/registry/nopaque-{corpus.hashid.lower()}'
' -P pos -P lemma -P simple_pos' ' -P pos -P lemma -P simple_pos'
' -S ent:0+type -S s:0' ' -S ent:0+type -S s:0'
' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title' ' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
' -xsB -9' ' -xsB -9'
' && ' ' && '
f'cwb-make -V NOPAQUE_{corpus.id}' f'cwb-make -V NOPAQUE-{corpus.hashid.upper()}'
) )
''' ## Constraints ## ''' ''' ## Constraints ## '''
constraints = ['node.role==worker'] constraints = ['node.role==worker']
@ -149,11 +149,15 @@ def _create_cqpserver_container(corpus):
''' ### Corpus data volume ### ''' ''' ### Corpus data volume ### '''
data_volume_source = os.path.join(corpus.path, 'cwb', 'data') data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
data_volume_target = '/corpora/data' data_volume_target = '/corpora/data'
# data_volume_source = os.path.join(corpus.path, 'cwb', 'data', f'nopaque_{corpus.id}')
# data_volume_target = f'/corpora/data/nopaque_{corpus.hashid.lower()}'
data_volume = f'{data_volume_source}:{data_volume_target}:rw' data_volume = f'{data_volume_source}:{data_volume_target}:rw'
volumes.append(data_volume) volumes.append(data_volume)
''' ### Corpus registry volume ### ''' ''' ### Corpus registry volume ### '''
registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry') registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_volume_target = '/usr/local/share/cwb/registry' registry_volume_target = '/usr/local/share/cwb/registry'
# registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry', f'nopaque_{corpus.id}')
# registry_volume_target = f'/usr/local/share/cwb/registry/nopaque_{corpus.hashid.lower()}'
registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw' registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
volumes.append(registry_volume) volumes.append(registry_volume)
# Check if a cqpserver container already exists. If this is the case, # Check if a cqpserver container already exists. If this is the case,

View File

@ -42,7 +42,6 @@ def job_log(job_id):
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file: with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
log = log_file.read() log = log_file.read()
response_data = { response_data = {
'message': '',
'jobLog': log 'jobLog': log
} }
return response_data, 200 return response_data, 200

View File

@ -1607,9 +1607,14 @@ class Corpus(HashidMixin, db.Model):
return corpus return corpus
def build(self): def build(self):
build_dir = os.path.join(self.path, 'cwb')
shutil.rmtree(build_dir, ignore_errors=True)
os.mkdir(build_dir)
os.mkdir(os.path.join(build_dir, 'data'))
os.mkdir(os.path.join(build_dir, 'registry'))
corpus_element = ET.fromstring('<corpus>\n</corpus>') corpus_element = ET.fromstring('<corpus>\n</corpus>')
for corpus_file in self.files: for corpus_file in self.files:
normalized_vrt_path = os.path.join(self.path, 'cwb', f'{corpus_file.id}.norm.vrt') normalized_vrt_path = os.path.join(build_dir, f'{corpus_file.id}.norm.vrt')
try: try:
normalize_vrt_file(corpus_file.path, normalized_vrt_path) normalize_vrt_file(corpus_file.path, normalized_vrt_path)
except: except:
@ -1636,7 +1641,7 @@ class Corpus(HashidMixin, db.Model):
# corpus_element.insert(1, text_element) # corpus_element.insert(1, text_element)
corpus_element.append(text_element) corpus_element.append(text_element)
ET.ElementTree(corpus_element).write( ET.ElementTree(corpus_element).write(
os.path.join(self.path, 'cwb', 'corpus.vrt'), os.path.join(build_dir, 'corpus.vrt'),
encoding='utf-8' encoding='utf-8'
) )
self.status = CorpusStatus.SUBMITTED self.status = CorpusStatus.SUBMITTED

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 182 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 155 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

View File

@ -8,19 +8,19 @@ class App {
this.socket.on('PATCH', (patch) => {this.onPatch(patch);}); this.socket.on('PATCH', (patch) => {this.onPatch(patch);});
} }
getUser(userId, backrefs=true, relationships=true) { getUser(userId) {
if (userId in this.data.promises.getUser) { if (userId in this.data.promises.getUser) {
return this.data.promises.getUser[userId]; return this.data.promises.getUser[userId];
} }
this.data.promises.getUser[userId] = new Promise((resolve, reject) => { this.data.promises.getUser[userId] = new Promise((resolve, reject) => {
this.socket.emit('GET /users/<user_id>', userId, backrefs, relationships, (response) => { this.socket.emit('GET /users/<user_id>', userId, (response) => {
if (response.status !== 200) { if (response.status === 200) {
reject(response);
return;
}
this.data.users[userId] = response.body; this.data.users[userId] = response.body;
resolve(this.data.users[userId]); resolve(this.data.users[userId]);
} else {
reject(`[${response.status}] ${response.statusText}`);
}
}); });
}); });

View File

@ -1,485 +0,0 @@
class CQiClient {
constructor(corpusId) {
this.socket = io(
'/corpora/corpus/corpus_analysis',
{
auth: {corpus_id: corpusId},
transports: ['websocket'],
upgrade: false
}
);
this.connected = false;
this.corpora = new CQiCorpusCollection(this.socket);
}
connect() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.connect', response => {
if (response.code === 200) {
this.connected = true;
resolve(response.payload);
} else {
reject(response);
}
});
});
}
disconnect() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.disconnect', response => {
if (response.code === 200) {
this.connected = false;
resolve(response.payload);
} else {
reject(response);
}
});
});
}
ping() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.ping', response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
}
class CQiCorpusCollection {
constructor(socket) {
this.socket = socket;
}
get(corpusName) {
return new Promise((resolve, reject) => {
const args = {corpus_name: corpusName};
this.socket.emit('cqi.corpora.get', args, response => {
if (response.code === 200) {
resolve(new CQiCorpus(this.socket, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
this.socket.emit('cqi.corpora.list', response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiCorpus {
constructor(socket, attrs) {
this.socket = socket;
this.charset = attrs.charset;
this.name = attrs.name;
this.properties = attrs.properties;
this.size = attrs.size;
this.alignmentAttributes = new CQiAlignmentAttributeCollection(this.socket, this);
this.positionalAttributes = new CQiPositionalAttributeCollection(this.socket, this);
this.structuralAttributes = new CQiStructuralAttributeCollection(this.socket, this);
this.subcorpora = new CQiSubcorpusCollection(this.socket, this);
}
drop() {
return new Promise((resolve, reject) => {
const args = {corpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.drop', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
query(subcorpus_name, queryString) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.name,
subcorpus_name: subcorpus_name,
query: queryString
};
this.socket.emit('cqi.corpora.corpus.query', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
// nopaque specific CQi extension
paginate(page=1, perPage=20) {
return new Promise((resolve, reject) => {
const args = {corpus_name: this.name, page: page, per_page: perPage};
this.socket.emit('cqi.corpora.corpus.paginate', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
updateDb() {
const args = {corpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.update_db', args);
}
}
class CQiAlignmentAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(alignmentAttributeName) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
alignment_attribute_name: alignmentAttributeName
};
this.socket.emit('cqi.corpora.corpus.alignment_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiAlignmentAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
const args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpus.alignment_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiAlignmentAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiAlignmentAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiPositionalAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(positionalAttributeName) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
positional_attribute_name: positionalAttributeName
};
this.socket.emit('cqi.corpora.corpus.positional_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiPositionalAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
const args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpus.positional_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiPositionalAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiPositionalAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.lexiconSize = attrs.lexicon_size;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiStructuralAttributeCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(structuralAttributeName) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
structural_attribute_name: structuralAttributeName
};
this.socket.emit('cqi.corpora.corpus.structural_attributes.get', args, response => {
if (response.code === 200) {
resolve(new CQiStructuralAttribute(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
const args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpus.structural_attributes.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiStructuralAttribute(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiStructuralAttribute {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.hasValues = attrs.has_values;
this.name = attrs.name;
this.size = attrs.size;
}
}
class CQiSubcorpusCollection {
constructor(socket, corpus) {
this.corpus = corpus;
this.socket = socket;
}
get(subcorpusName) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
subcorpus_name: subcorpusName
};
this.socket.emit('cqi.corpora.corpus.subcorpora.get', args, response => {
if (response.code === 200) {
resolve(new CQiSubcorpus(this.socket, this.corpus, response.payload));
} else {
reject(response);
}
});
});
}
list() {
return new Promise((resolve, reject) => {
const args = {corpus_name: this.corpus.name};
this.socket.emit('cqi.corpora.corpus.subcorpora.list', args, response => {
if (response.code === 200) {
resolve(response.payload.map(x => {return new CQiSubcorpus(this.socket, this.corpus, x);}));
} else {
reject(response);
}
});
});
}
}
class CQiSubcorpus {
constructor(socket, corpus, attrs) {
this.socket = socket;
this.corpus = corpus;
this.fields = attrs.fields;
this.name = attrs.name;
this.size = attrs.size;
}
drop() {
return new Promise((resolve, reject) => {
const args = {corpus_name: this.corpus.name, subcorpus_name: this.name};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.drop', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
dump(field, first, last) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
field: field,
first: first,
last: last
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.dump', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
export(context=50) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
context: context
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.export', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
partial_export(matchIdList, context=50) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
match_id_list: matchIdList,
context: context
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
fdst_1(cutoff, field, attribute) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
cutoff: cutoff,
field: field,
attribute: attribute
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
fdst_2(cutoff, field1, attribute1, field2, attribute2) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
cutoff: cutoff,
field1: field1,
attribute1: attribute1,
field2: field2,
attribute2: attribute2
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
// nopaque specific CQi extension
paginate(page=1, perPage=20, context=50) {
return new Promise((resolve, reject) => {
const args = {
corpus_name: this.corpus.name,
subcorpus_name: this.name,
page: page,
per_page: perPage,
context: context
};
this.socket.emit('cqi.corpora.corpus.subcorpora.subcorpus.paginate', args, response => {
if (response.code === 200) {
resolve(response.payload);
} else {
reject(response);
}
});
});
}
}

View File

@ -1,13 +1,15 @@
class CorpusAnalysisApp { class CorpusAnalysisApp {
constructor(corpusId) { constructor(corpusId) {
this.corpusId = corpusId;
this.data = {}; this.data = {};
// HTML elements // HTML elements
this.elements = { this.elements = {
container: document.querySelector('#corpus-analysis-app-container'), container: document.querySelector('#corpus-analysis-app-container'),
extensionCards: document.querySelector('#corpus-analysis-app-extension-cards'),
extensionTabs: document.querySelector('#corpus-analysis-app-extension-tabs'), extensionTabs: document.querySelector('#corpus-analysis-app-extension-tabs'),
initModal: document.querySelector('#corpus-analysis-app-init-modal'), initModal: document.querySelector('#corpus-analysis-app-init-modal')
overview: document.querySelector('#corpus-analysis-app-overview')
}; };
// Materialize elements // Materialize elements
this.elements.m = { this.elements.m = {
@ -17,80 +19,99 @@ class CorpusAnalysisApp {
this.extensions = {}; this.extensions = {};
this.settings = { this.settings = {};
corpusId: corpusId
};
} }
init() { async init() {
this.disableActionElements(); this.disableActionElements();
this.elements.m.initModal.open(); this.elements.m.initModal.open();
// Init data try {
this.data.cQiClient = new CQiClient(this.settings.corpusId); // Setup CQi over SocketIO connection and gather data from the CQPServer
this.data.cQiClient.connect() const statusTextElement = this.elements.initModal.querySelector('.status-text');
.then(cQiStatus => { statusTextElement.innerText = 'Creating CQi over SocketIO client...';
return this.data.cQiClient.corpora.get(`NOPAQUE_${this.settings.corpusId}`); const cqiClient = new cqi.CQiClient('/cqi_over_sio');
}) statusTextElement.innerText += ' Done';
.then( statusTextElement.innerHTML = 'Waiting for the CQP server...';
cQiCorpus => { const response = await cqiClient.api.socket.emitWithAck('init', this.corpusId);
this.data.corpus = {o: cQiCorpus}; if (response.code !== 200) {throw new Error();}
// TODO: Don't do this here statusTextElement.innerText += ' Done';
cQiCorpus.updateDb(); statusTextElement.innerHTML = 'Connecting to the CQP server...';
this.enableActionElements(); await cqiClient.connect('anonymous', '');
for (let extension of Object.values(this.extensions)) {extension.init();} statusTextElement.innerText += ' Done';
this.elements.m.initModal.close(); statusTextElement.innerHTML = 'Building and receiving corpus data cache from the server (This may take a while)...';
}, const cqiCorpus = await cqiClient.corpora.get(`NOPAQUE-${this.corpusId.toUpperCase()}`);
cQiError => { statusTextElement.innerText += ' Done';
let errorsElement = this.elements.initModal.querySelector('.errors'); // TODO: Don't do this hgere
let progressElement = this.elements.initModal.querySelector('.progress'); await cqiCorpus.updateDb();
errorsElement.innerText = JSON.stringify(cQiError); this.data.cqiClient = cqiClient;
this.data.cqiCorpus = cqiCorpus;
this.data.corpus = {o: cqiCorpus}; // legacy
// Initialize extensions
for (const extension of Object.values(this.extensions)) {
statusTextElement.innerHTML = `Initializing ${extension.name} extension...`;
await extension.init();
statusTextElement.innerText += ' Done'
}
} catch (error) {
let errorString = '';
if ('code' in error && error.code !== undefined && error.code !== null) {
errorString += `[${error.code}] `;
}
errorString += `${error.constructor.name}`;
if ('description' in error && error.description !== undefined && error.description !== null) {
errorString += `: ${error.description}`;
}
const errorsElement = this.elements.initModal.querySelector('.errors');
const progressElement = this.elements.initModal.querySelector('.progress');
errorsElement.innerText = errorString;
errorsElement.classList.remove('hide'); errorsElement.classList.remove('hide');
progressElement.classList.add('hide'); progressElement.classList.add('hide');
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) { return;
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
} }
}
);
// Add event listeners for (const extensionSelectorElement of this.elements.extensionCards.querySelectorAll('.extension-selector')) {
for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) {
extensionSelectorElement.addEventListener('click', () => { extensionSelectorElement.addEventListener('click', () => {
this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target); this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target);
}); });
} }
this.enableActionElements();
this.elements.m.initModal.close();
} }
registerExtension(extension) { registerExtension(extension) {
if (extension.name in this.extensions) { if (extension.name in this.extensions) {return;}
console.error(`Can't register extension ${extension.name}: Already registered`);
return;
}
this.extensions[extension.name] = extension; this.extensions[extension.name] = extension;
if ('cQiClient' in this.data && this.data.cQiClient.connected) {extension.init();}
} }
disableActionElements() { disableActionElements() {
let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action'); const actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
for (let actionElement of actionElements) { for (const actionElement of actionElements) {
if (actionElement.nodeName === 'INPUT') { switch(actionElement.nodeName) {
case 'INPUT':
actionElement.disabled = true; actionElement.disabled = true;
} else if (actionElement.nodeName === 'SELECT') { break;
case 'SELECT':
actionElement.parentNode.querySelector('input.select-dropdown').disabled = true; actionElement.parentNode.querySelector('input.select-dropdown').disabled = true;
} else { break;
default:
actionElement.classList.add('disabled'); actionElement.classList.add('disabled');
} }
} }
} }
enableActionElements() { enableActionElements() {
let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action'); const actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
for (let actionElement of actionElements) { for (const actionElement of actionElements) {
if (actionElement.nodeName === 'INPUT') { switch(actionElement.nodeName) {
case 'INPUT':
actionElement.disabled = false; actionElement.disabled = false;
} else if (actionElement.nodeName === 'SELECT') { break;
case 'SELECT':
actionElement.parentNode.querySelector('input.select-dropdown').disabled = false; actionElement.parentNode.querySelector('input.select-dropdown').disabled = false;
} else { break;
default:
actionElement.classList.remove('disabled'); actionElement.classList.remove('disabled');
} }
} }

View File

@ -30,33 +30,22 @@ class CorpusAnalysisConcordance {
this.app.registerExtension(this); this.app.registerExtension(this);
} }
init() { async submitForm() {
// Init data
this.data.corpus = this.app.data.corpus;
this.data.subcorpora = {};
// Add event listeners
this.elements.form.addEventListener('submit', event => {
event.preventDefault();
this.app.disableActionElements(); this.app.disableActionElements();
let query = this.elements.form.query.value.trim(); let query = this.elements.form.query.value.trim();
let subcorpusName = this.elements.form['subcorpus-name'].value; let subcorpusName = this.elements.form['subcorpus-name'].value;
this.elements.error.innerText = ''; this.elements.error.innerText = '';
this.elements.error.classList.add('hide'); this.elements.error.classList.add('hide');
this.elements.progress.classList.remove('hide'); this.elements.progress.classList.remove('hide');
let subcorpus = {}; try {
this.data.corpus.o.query(subcorpusName, query) const subcorpus = {};
.then(cQiStatus => {
subcorpus.q = query; subcorpus.q = query;
subcorpus.selectedItems = new Set(); subcorpus.selectedItems = new Set();
await this.data.corpus.o.query(subcorpusName, query);
if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;} if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;}
return this.data.corpus.o.subcorpora.get(subcorpusName); const cqiSubcorpus = await this.data.corpus.o.subcorpora.get(subcorpusName);
}) subcorpus.o = cqiSubcorpus;
.then(cQiSubcorpus => { const paginatedSubcorpus = await cqiSubcorpus.paginate(this.settings.context, 1, this.settings.perPage);
subcorpus.o = cQiSubcorpus;
return cQiSubcorpus.paginate(1, this.settings.perPage, this.settings.context);
})
.then(
paginatedSubcorpus => {
subcorpus.p = paginatedSubcorpus; subcorpus.p = paginatedSubcorpus;
this.data.subcorpora[subcorpusName] = subcorpus; this.data.subcorpora[subcorpusName] = subcorpus;
this.settings.selectedSubcorpus = subcorpusName; this.settings.selectedSubcorpus = subcorpusName;
@ -66,27 +55,35 @@ class CorpusAnalysisConcordance {
this.renderSubcorpusItems(); this.renderSubcorpusItems();
this.renderSubcorpusPagination(); this.renderSubcorpusPagination();
this.elements.progress.classList.add('hide'); this.elements.progress.classList.add('hide');
this.app.enableActionElements(); } catch (error) {
}, let errorString = '';
cQiError => { if ('code' in error) {errorString += `[${error.code}] `;}
this.elements.error.innerText = JSON.stringify(cQiError); errorString += `${error.constructor.name}`;
this.elements.error.innerText = errorString;
this.elements.error.classList.remove('hide'); this.elements.error.classList.remove('hide');
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) { app.flash(errorString, 'error');
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
}
this.elements.progress.classList.add('hide'); this.elements.progress.classList.add('hide');
}
this.app.enableActionElements(); this.app.enableActionElements();
} }
);
async init() {
// Init data
this.data.corpus = this.app.data.corpus;
this.data.subcorpora = {};
// Add event listeners
this.elements.form.addEventListener('submit', (event) => {
event.preventDefault();
this.submitForm();
}); });
this.elements.form.addEventListener('change', event => { this.elements.form.addEventListener('change', (event) => {
if (event.target === this.elements.form['context']) { if (event.target === this.elements.form['context']) {
this.settings.context = parseInt(this.elements.form['context'].value); this.settings.context = parseInt(this.elements.form['context'].value);
this.elements.form.submit.click(); this.submitForm();
} }
if (event.target === this.elements.form['per-page']) { if (event.target === this.elements.form['per-page']) {
this.settings.perPage = parseInt(this.elements.form['per-page'].value); this.settings.perPage = parseInt(this.elements.form['per-page'].value);
this.elements.form.submit.click(); this.submitForm();
} }
if (event.target === this.elements.form['text-style']) { if (event.target === this.elements.form['text-style']) {
this.settings.textStyle = parseInt(this.elements.form['text-style'].value); this.settings.textStyle = parseInt(this.elements.form['text-style'].value);
@ -162,7 +159,7 @@ class CorpusAnalysisConcordance {
</a> </a>
`.trim(); `.trim();
M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped')); M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped'));
this.elements.subcorpusActions.querySelector('.subcorpus-export-trigger').addEventListener('click', event => { this.elements.subcorpusActions.querySelector('.subcorpus-export-trigger').addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
let modalElementId = Utils.generateElementId('export-subcorpus-modal-'); let modalElementId = Utils.generateElementId('export-subcorpus-modal-');
@ -219,7 +216,7 @@ class CorpusAnalysisConcordance {
} }
} }
); );
exportButton.addEventListener('click', event => { exportButton.addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
this.app.disableActionElements(); this.app.disableActionElements();
this.elements.progress.classList.remove('hide'); this.elements.progress.classList.remove('hide');
@ -236,12 +233,12 @@ class CorpusAnalysisConcordance {
app.flash('No matches selected', 'error'); app.flash('No matches selected', 'error');
return; return;
} }
promise = subcorpus.o.partial_export([...subcorpus.selectedItems], 50); promise = subcorpus.o.partialExport([...subcorpus.selectedItems], 50);
} else { } else {
promise = subcorpus.o.export(50); promise = subcorpus.o.export(50);
} }
promise.then( promise.then(
data => { (data) => {
let blob; let blob;
if (exportFormat === 'csv') { if (exportFormat === 'csv') {
let csvContent = 'sep=,\r\n'; let csvContent = 'sep=,\r\n';
@ -287,11 +284,11 @@ class CorpusAnalysisConcordance {
}); });
modal.open(); modal.open();
}); });
this.elements.subcorpusActions.querySelector('.subcorpus-delete-trigger').addEventListener('click', event => { this.elements.subcorpusActions.querySelector('.subcorpus-delete-trigger').addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
subcorpus.o.drop().then( subcorpus.o.drop().then(
cQiStatus => { (cQiStatus) => {
app.flash(`${subcorpus.o.name} deleted`, 'corpus'); app.flash(`${subcorpus.o.name} deleted`, 'corpus');
delete this.data.subcorpora[subcorpus.o.name]; delete this.data.subcorpora[subcorpus.o.name];
this.settings.selectedSubcorpus = undefined; this.settings.selectedSubcorpus = undefined;
@ -312,8 +309,9 @@ class CorpusAnalysisConcordance {
this.clearSubcorpusPagination(); this.clearSubcorpusPagination();
} }
}, },
cQiError => { (cqiError) => {
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error'); let errorString = `${cqiError.code}: ${cqiError.constructor.name}`;
app.flash(errorString, 'error');
} }
); );
}); });
@ -362,7 +360,7 @@ class CorpusAnalysisConcordance {
this.setTextStyle(); this.setTextStyle();
this.setTokenRepresentation(); this.setTokenRepresentation();
for (let gotoReaderTriggerElement of this.elements.subcorpusItems.querySelectorAll('.goto-reader-trigger')) { for (let gotoReaderTriggerElement of this.elements.subcorpusItems.querySelectorAll('.goto-reader-trigger')) {
gotoReaderTriggerElement.addEventListener('click', event => { gotoReaderTriggerElement.addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
let corpusAnalysisReader = this.app.extensions.Reader; let corpusAnalysisReader = this.app.extensions.Reader;
let itemId = parseInt(gotoReaderTriggerElement.closest('.item').dataset.id); let itemId = parseInt(gotoReaderTriggerElement.closest('.item').dataset.id);
@ -384,7 +382,7 @@ class CorpusAnalysisConcordance {
}); });
} }
for (let selectTriggerElement of this.elements.subcorpusItems.querySelectorAll('.select-trigger')) { for (let selectTriggerElement of this.elements.subcorpusItems.querySelectorAll('.select-trigger')) {
selectTriggerElement.addEventListener('click', event => { selectTriggerElement.addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
let itemElement = selectTriggerElement.closest('.item'); let itemElement = selectTriggerElement.closest('.item');
let itemId = parseInt(itemElement.dataset.id); let itemId = parseInt(itemElement.dataset.id);
@ -446,14 +444,14 @@ class CorpusAnalysisConcordance {
</li> </li>
`.trim(); `.trim();
for (let paginationTriggerElement of this.elements.subcorpusPagination.querySelectorAll('.pagination-trigger[data-target]')) { for (let paginationTriggerElement of this.elements.subcorpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
paginationTriggerElement.addEventListener('click', event => { paginationTriggerElement.addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
this.app.disableActionElements(); this.app.disableActionElements();
this.elements.progress.classList.remove('hide'); this.elements.progress.classList.remove('hide');
let page = parseInt(paginationTriggerElement.dataset.target); let page = parseInt(paginationTriggerElement.dataset.target);
subcorpus.o.paginate(page, this.settings.perPage, this.settings.context) subcorpus.o.paginate(this.settings.context, page, this.settings.perPage)
.then( .then(
paginatedSubcorpus => { (paginatedSubcorpus) => {
subcorpus.p = paginatedSubcorpus; subcorpus.p = paginatedSubcorpus;
this.renderSubcorpusItems(); this.renderSubcorpusItems();
this.renderSubcorpusPagination(); this.renderSubcorpusPagination();

View File

@ -19,47 +19,52 @@ class CorpusAnalysisReader {
this.settings = { this.settings = {
perPage: parseInt(this.elements.form['per-page'].value), perPage: parseInt(this.elements.form['per-page'].value),
textStyle: parseInt(this.elements.form['text-style'].value), textStyle: parseInt(this.elements.form['text-style'].value),
tokenRepresentation: this.elements.form['token-representation'].value tokenRepresentation: this.elements.form['token-representation'].value,
pagination: {
innerWindow: 5,
outerWindow: 1
}
} }
this.app.registerExtension(this); this.app.registerExtension(this);
} }
init() { async submitForm() {
// Init data
this.data.corpus = this.app.data.corpus;
this.data.subcorpora = {};
// Add event listeners
this.elements.form.addEventListener('submit', (event) => {
event.preventDefault();
this.app.disableActionElements(); this.app.disableActionElements();
this.elements.error.innerText = ''; this.elements.error.innerText = '';
this.elements.error.classList.add('hide'); this.elements.error.classList.add('hide');
this.elements.progress.classList.remove('hide'); this.elements.progress.classList.remove('hide');
this.data.corpus.o.paginate(1, this.settings.perPage) try {
.then( const paginatedCorpus = await this.data.corpus.o.paginate(1, this.settings.perPage);
paginatedCorpus => {
this.data.corpus.p = paginatedCorpus; this.data.corpus.p = paginatedCorpus;
this.renderCorpus(); this.renderCorpus();
this.renderCorpusPagination(); this.renderCorpusPagination();
this.elements.progress.classList.add('hide'); this.elements.progress.classList.add('hide');
this.app.enableActionElements(); } catch (error) {
}, let errorString = '';
error => { if ('code' in error) {errorString += `[${error.code}] `;}
this.elements.error.innerText = JSON.stringify(error); errorString += `${error.constructor.name}`;
if ('description' in error) {errorString += `: ${error.description}`;}
this.elements.error.innerText = errorString;
this.elements.error.classList.remove('hide'); this.elements.error.classList.remove('hide');
if ('payload' in error && 'code' in error.payload && 'msg' in error.payload) { app.flash(errorString, 'error');
app.flash(`${error.payload.code}: ${error.payload.msg}`, 'error');
}
this.elements.progress.classList.add('hide'); this.elements.progress.classList.add('hide');
}
this.app.enableActionElements(); this.app.enableActionElements();
} }
);
async init() {
// Init data
this.data.corpus = this.app.data.corpus;
// Add event listeners
this.elements.form.addEventListener('submit', (event) => {
event.preventDefault();
this.submitForm();
}); });
this.elements.form.addEventListener('change', event => { this.elements.form.addEventListener('change', (event) => {
if (event.target === this.elements.form['per-page']) { if (event.target === this.elements.form['per-page']) {
this.settings.perPage = parseInt(this.elements.form['per-page'].value); this.settings.perPage = parseInt(this.elements.form['per-page'].value);
this.elements.form.submit.click(); this.submitForm();
} }
if (event.target === this.elements.form['text-style']) { if (event.target === this.elements.form['text-style']) {
this.settings.textStyle = parseInt(this.elements.form['text-style'].value); this.settings.textStyle = parseInt(this.elements.form['text-style'].value);
@ -71,7 +76,7 @@ class CorpusAnalysisReader {
} }
}); });
// Load initial data // Load initial data
this.elements.form.submit.click(); await this.submitForm();
} }
clearCorpus() { clearCorpus() {
@ -144,7 +149,7 @@ class CorpusAnalysisReader {
} }
// render page buttons (5 before and 5 after current page) // render page buttons (5 before and 5 after current page)
for (let i = this.data.corpus.p.page -5; i <= this.data.corpus.p.page; i++) { for (let i = this.data.corpus.p.page - this.settings.pagination.innerWindow; i <= this.data.corpus.p.page; i++) {
if (i <= 0) {continue;} if (i <= 0) {continue;}
pageElement = Utils.HTMLToElement( pageElement = Utils.HTMLToElement(
` `
@ -155,7 +160,7 @@ class CorpusAnalysisReader {
); );
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
}; };
for (let i = this.data.corpus.p.page +1; i <= this.data.corpus.p.page +5; i++) { for (let i = this.data.corpus.p.page +1; i <= this.data.corpus.p.page + this.settings.pagination.innerWindow; i++) {
if (i > this.data.corpus.p.pages) {break;} if (i > this.data.corpus.p.pages) {break;}
pageElement = Utils.HTMLToElement( pageElement = Utils.HTMLToElement(
` `
@ -203,7 +208,7 @@ class CorpusAnalysisReader {
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
for (let paginateTriggerElement of this.elements.corpusPagination.querySelectorAll('.pagination-trigger[data-target]')) { for (let paginateTriggerElement of this.elements.corpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
paginateTriggerElement.addEventListener('click', event => { paginateTriggerElement.addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
let page = parseInt(paginateTriggerElement.dataset.target); let page = parseInt(paginateTriggerElement.dataset.target);
this.page(page); this.page(page);
@ -247,7 +252,7 @@ class CorpusAnalysisReader {
this.elements.progress.classList.remove('hide'); this.elements.progress.classList.remove('hide');
this.data.corpus.o.paginate(pageNum, this.settings.perPage) this.data.corpus.o.paginate(pageNum, this.settings.perPage)
.then( .then(
paginatedCorpus => { (paginatedCorpus) => {
this.data.corpus.p = paginatedCorpus; this.data.corpus.p = paginatedCorpus;
this.renderCorpus(); this.renderCorpus();
this.renderCorpusPagination(); this.renderCorpusPagination();

View File

@ -0,0 +1,443 @@
class CorpusAnalysisStaticVisualization {
name = 'Static Visualization (beta)';
constructor(app) {
this.app = app;
this.data = {
stopwords: undefined,
originalStopwords: {},
stopwordCache: {},
promises: {getStopwords: undefined},
tokenSet: new Set()
};
this.app.registerExtension(this);
}
init() {
// Init data
this.data.corpus = this.app.data.corpus;
this.renderGeneralCorpusInfo();
this.renderTextInfoList();
this.renderTextProportionsGraphic();
this.renderTokenList();
// this.renderFrequenciesGraphic();
// Add event listeners
let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal');
let frequenciesStopwordSettingModalButton = document.querySelector('#frequencies-stopwords-setting-modal-button');
frequenciesStopwordSettingModalButton.addEventListener('click', () => {
this.data.stopwordCache = structuredClone(this.data.stopwords);
this.renderStopwordSettingsModal(this.data.stopwords);
M.Modal.init(frequenciesStopwordSettingModal, {dismissible: false});
});
let textProportionsGraphModeButtons = document.querySelectorAll('.text-proportions-graph-mode-button');
textProportionsGraphModeButtons.forEach(graphModeButton => {
graphModeButton.addEventListener('click', (event) => {
textProportionsGraphModeButtons.forEach(btn => {
btn.classList.remove('disabled');
});
event.target.closest('.text-proportions-graph-mode-button').classList.add('disabled');
this.renderTextProportionsGraphic();
});
});
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
this.renderTokenList();
});
let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
frequenciesGraphModeButtons.forEach(graphModeButton => {
graphModeButton.addEventListener('click', (event) => {
frequenciesGraphModeButtons.forEach(btn => {
btn.classList.remove('disabled');
});
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
this.renderFrequenciesGraphic(this.data.tokenSet);
});
});
for (let actionButton of document.querySelectorAll('.frequencies-stopword-setting-modal-action-buttons')) {
actionButton.addEventListener('click', (event) => {
let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action;
if (action === 'submit') {
this.renderTokenList();
} else if (action === 'cancel') {
this.data.stopwords = structuredClone(this.data.stopwordCache);
}
});
}
}
getStopwords() {
this.data.promises.getStopwords = new Promise((resolve, reject) => {
Requests.corpora.entity.getStopwords()
.then((response) => {
response.json()
.then((json) => {
this.data.originalStopwords = structuredClone(json);
this.data.stopwords = structuredClone(json);
resolve(this.data.stopwords);
})
.catch((error) => {
reject(error);
});
});
});
return this.data.promises.getStopwords;
}
renderGeneralCorpusInfo() {
let corpusData = this.data.corpus.o.staticData;
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.counts.token;
document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.counts.s;
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length;
}
renderTextInfoList() {
let corpusData = this.data.corpus.o.staticData;
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
let texts = corpusData.s_attrs.text.lexicon;
let textData = [];
for (let i = 0; i < Object.entries(texts).length; i++) {
let resource = {
title: corpusData.values.s_attrs.text[i].title,
publishing_year: corpusData.values.s_attrs.text[i].publishing_year,
num_tokens: corpusData.s_attrs.text.lexicon[i].counts.token,
num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s,
num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length,
num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length,
num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length,
num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length
};
textData.push(resource);
}
corpusTextInfoList.add(textData);
let textCountChipElement = document.querySelector('.text-count-chip');
textCountChipElement.innerHTML = `Text count: ${corpusData.corpus.counts.text}`;
}
renderTextProportionsGraphic() {
let corpusData = this.data.corpus.o.staticData;
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
let graphtype = document.querySelector('.text-proportions-graph-mode-button.disabled').dataset.graphType;
let textProportionsTitleElement = document.querySelector('#text-proportions-title-element');
if (graphtype === 'bar') {
textProportionsTitleElement.innerHTML = 'Bounds';
} else if (graphtype === 'pie') {
textProportionsTitleElement.innerHTML = 'Proportions';
}
let graphData = this.createTextProportionsGraphData(texts, graphtype);
let graphLayout = {
barmode: graphtype === 'bar' ? 'relative' : '',
type: graphtype,
showgrid: false,
height: 447,
margin: {
l: 10,
r: 10,
b: graphtype === 'bar' ? 80 : 10,
t: graphtype === 'bar' ? 80 : 10,
},
legend: {
"orientation": "h",
font: {
size: 10
}
},
xaxis: {
rangemode: 'nonnegative',
autorange: true
},
yaxis: {
autorange: true,
showticklabels: false
}
};
let config = {
responsive: true,
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
displaylogo: false
};
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
}
createTextProportionsGraphData(texts, graphtype) {
let corpusData = this.data.corpus.o.staticData;
let graphData = [];
switch (graphtype) {
case 'bar':
for (let text of texts) {
let textData = {
type: 'bar',
orientation: 'h',
x: [text[1].bounds[1] - text[1].bounds[0]],
y: [0.5],
text: [`${text[1].bounds[0]} - ${text[1].bounds[1]}`],
name: `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`,
hovertemplate: `${text[1].bounds[0]} - ${text[1].bounds[1]}`,
};
graphData.push(textData);
}
break;
default:
graphData = [
{
values: texts.map(text => text[1].counts.token),
labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
type: graphtype
}
];
break;
}
return graphData;
}
async renderTokenList() {
let corpusTokenListElement = document.querySelector('.corpus-token-list');
let corpusTokenList = new CorpusTokenList(corpusTokenListElement);
let filteredData = this.filterData();
let stopwords = this.data.stopwords;
if (this.data.stopwords === undefined) {
stopwords = await this.getStopwords();
}
stopwords = Object.values(stopwords).flat();
let mostFrequent = Object.entries(filteredData)
.sort((a, b) => b[1].count - a[1].count)
.filter(item => !stopwords.includes(item[0].toLowerCase()))
.slice(0, 4)
.map(item => item[0])
let tokenData = [];
Object.entries(filteredData).forEach(item => {
let resource = {
term: item[0],
count: item[1].count,
mostFrequent: mostFrequent.includes(item[0])
};
if (!Object.values(stopwords).includes(resource.term)) {
tokenData.push(resource);
}
});
corpusTokenList.add(tokenData);
}
filterData() {
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let corpusData = this.data.corpus.o.staticData;
let filteredData = {};
for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) {
let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase();
let count = corpusData.corpus.freqs[tokenCategory][i];
if (filteredData[term]) {
filteredData[term].count += count;
filteredData[term].originalIds.push(i);
} else {
filteredData[term] = {
count: count,
originalIds: [i]
};
}
}
return filteredData;
}
renderFrequenciesGraphic(tokenSet) {
this.data.tokenSet = tokenSet;
let corpusData = this.data.corpus.o.staticData;
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet);
let graphLayout = {
barmode: graphtype === 'bar' ? 'stack' : '',
yaxis: {
showticklabels: graphtype === 'markers' ? false : true
},
height: 627,
margin: {
l: 33
}
};
let config = {
responsive: true,
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
displaylogo: false
};
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
}
createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) {
let corpusData = this.data.corpus.o.staticData;
let graphData = [];
let filteredData = this.filterData();
switch (graphtype) {
case 'markers':
for (let item of tokenSet) {
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
let tokenCountPerText = [];
for (let originalId of filteredData[item].originalIds) {
for (let i = 0; i < texts.length; i++) {
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
}
}
let data = {
x: textTitles,
y: texts.map(text => item),
name: item,
text: texts.map(text => `${item}<br>${tokenCountPerText || 0}`),
mode: 'markers',
marker: {
size: tokenCountPerText,
sizeref: 0.4
}
};
graphData.push(data);
}
break;
default:
for (let item of tokenSet) {
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
let tokenCountPerText = [];
for (let originalId of filteredData[item].originalIds) {
for (let i = 0; i < texts.length; i++) {
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
}
}
let data = {
x: textTitles,
y: tokenCountPerText,
name: item,
type: graphtype
};
graphData.push(data);
}
break;
}
return graphData;
}
renderStopwordSettingsModal(stopwords) {
let stopwordInputField = document.querySelector('#stopword-input-field');
let userStopwordListContainer = document.querySelector('#user-stopword-list-container');
let stopwordLanguageSelection = document.querySelector('#stopword-language-selection');
let stopwordLanguageChipList = document.querySelector('#stopword-language-chip-list');
let deleteLanguageStopwordListEntriesButton = document.querySelector('#delete-language-stopword-list-entries-button');
let resetLanguageStopwordListEntriesButton = document.querySelector('#reset-language-stopword-list-entries-button');
stopwordLanguageChipList.innerHTML = '';
userStopwordListContainer.innerHTML = '';
stopwordInputField.value = '';
// Render stopword language selection. Set english as default language. Filter out user_stopwords.
if (stopwordLanguageSelection.children.length === 0) {
Object.keys(stopwords).forEach(language => {
if (language !== 'user_stopwords') {
let optionElement = Utils.HTMLToElement(`<option value="${language}" ${language === 'english' ? 'selected' : ''}>${language}</option>`);
stopwordLanguageSelection.appendChild(optionElement);
}
});
}
// Render user stopwords over input field.
if (this.data.stopwords['user_stopwords'].length > 0) {
for (let word of this.data.stopwords['user_stopwords']) {
let chipElement = Utils.HTMLToElement(`<div class="chip">${word}<i class="close material-icons">close</i></div>`);
chipElement.addEventListener('click', (event) => {
let removedListItem = event.target.closest('.chip').firstChild.textContent;
this.data.stopwords['user_stopwords'] = structuredClone(this.data.stopwords['user_stopwords'].filter(item => item !== removedListItem));
});
userStopwordListContainer.appendChild(chipElement);
}
}
// Render english stopwords as default ...
let selectedLanguage = document.querySelector('#stopword-language-selection').value;
this.renderStopwordLanguageChipList(selectedLanguage, stopwords[selectedLanguage]);
// ... or render selected language stopwords.
stopwordLanguageSelection.addEventListener('change', (event) => {
this.renderStopwordLanguageChipList(event.target.value, stopwords[event.target.value]);
});
// Eventlistener for deleting all stopwords of a language.
deleteLanguageStopwordListEntriesButton.addEventListener('click', (event) => {
let selectedLanguage = stopwordLanguageSelection.value;
this.data.stopwords[selectedLanguage] = [];
stopwordLanguageChipList.innerHTML = '';
this.buttonRendering();
});
// Eventlistener for resetting all stopwords of a language to the original stopwords.
resetLanguageStopwordListEntriesButton.addEventListener('click', () => {
let selectedLanguage = stopwordLanguageSelection.value;
this.data.stopwords[selectedLanguage] = structuredClone(this.data.originalStopwords[selectedLanguage]);
this.renderStopwordLanguageChipList(selectedLanguage, this.data.stopwords[selectedLanguage]);
});
// Initialize Materialize components.
M.Chips.init(
stopwordInputField,
{
placeholder: 'Add stopwords',
onChipAdd: (event) => {
for (let word of event[0].M_Chips.chipsData) {
if (!this.data.stopwords['user_stopwords'].includes(word.tag.toLowerCase())) {
this.data.stopwords['user_stopwords'].push(word.tag.toLowerCase());
}
}
}
}
);
M.FormSelect.init(stopwordLanguageSelection);
}
buttonRendering() {
let deleteLanguageStopwordListEntriesButton = document.querySelector('#delete-language-stopword-list-entries-button');
let resetLanguageStopwordListEntriesButton = document.querySelector('#reset-language-stopword-list-entries-button');
let selectedLanguage = document.querySelector('#stopword-language-selection').value;
let stopwordLength = this.data.stopwords[selectedLanguage].length;
let originalStopwordListLength = this.data.originalStopwords[selectedLanguage].length;
deleteLanguageStopwordListEntriesButton.classList.toggle('disabled', stopwordLength === 0);
resetLanguageStopwordListEntriesButton.classList.toggle('disabled', stopwordLength === originalStopwordListLength);
}
renderStopwordLanguageChipList(language, stopwords) {
let stopwordLanguageChipList = document.querySelector('#stopword-language-chip-list');
stopwordLanguageChipList.innerHTML = '';
for (let word of stopwords) {
let chipElement = Utils.HTMLToElement(`<div class="chip">${word}<i class="close material-icons">close</i></div>`);
chipElement.addEventListener('click', (event) => {
let removedListItem = event.target.closest('.chip').firstChild.textContent;
this.data.stopwords[language] = structuredClone(this.data.stopwords[language].filter(item => item !== removedListItem));
this.buttonRendering();
});
stopwordLanguageChipList.appendChild(chipElement);
}
this.buttonRendering();
}
}

View File

@ -162,9 +162,21 @@ class ConcordanceQueryBuilder {
this.elements.or.addEventListener('click', () => {this.orHandler();}); this.elements.or.addEventListener('click', () => {this.orHandler();});
this.elements.and.addEventListener('click', () => {this.andHandler();}); this.elements.and.addEventListener('click', () => {this.andHandler();});
//#endregion Token Attribute Event Listeners //#endregion Token Attribute Event Listeners
let selectInstances = this.elements.concordanceQueryBuilder.querySelectorAll('select');
M.FormSelect.init(
selectInstances,
{
dropdownOptions: {
alignment: 'bottom',
coverTrigger: false
}
}
)
let dropdownContents = this.elements.concordanceQueryBuilder.querySelectorAll('.dropdown-content');
dropdownContents.forEach((dropdownContent) => {
dropdownContent.style.paddingBottom = '15px';
});
} }
@ -182,6 +194,7 @@ class ConcordanceQueryBuilder {
showPositionalAttrArea() { showPositionalAttrArea() {
this.elements.positionalAttrArea.classList.remove('hide'); this.elements.positionalAttrArea.classList.remove('hide');
this.elements.structuralAttrArea.classList.add('hide');
this.wordBuilder(); this.wordBuilder();
this.elements.tokenQueryFilled = false; this.elements.tokenQueryFilled = false;
@ -195,6 +208,7 @@ class ConcordanceQueryBuilder {
} }
queryChipFactory(dataType, prettyQueryText, queryText) { queryChipFactory(dataType, prettyQueryText, queryText) {
this.elements.counter++;
window.location.href = '#query-container'; window.location.href = '#query-container';
queryText = Utils.escape(queryText); queryText = Utils.escape(queryText);
prettyQueryText = Utils.escape(prettyQueryText); prettyQueryText = Utils.escape(prettyQueryText);
@ -274,9 +288,9 @@ class ConcordanceQueryBuilder {
queryPreviewBuilder() { queryPreviewBuilder() {
this.elements.yourQueryContent = []; this.elements.yourQueryContent = [];
for (let element of this.elements.yourQuery.childNodes) { for (let element of this.elements.yourQuery.childNodes) {
let queryElement = decodeURI(element.dataset.query); let queryElement = element.dataset.query;
if (queryElement !== undefined) {
queryElement = Utils.escape(queryElement); queryElement = Utils.escape(queryElement);
if (queryElement !== 'undefined') {
this.elements.yourQueryContent.push(queryElement); this.elements.yourQueryContent.push(queryElement);
} }
} }
@ -306,7 +320,7 @@ class ConcordanceQueryBuilder {
this.validateValue(); this.validateValue();
if (this.elements.valueValidator) { if (this.elements.valueValidator) {
for (let element of this.elements.yourQuery.childNodes) { for (let element of this.elements.yourQuery.childNodes) {
let queryElement = decodeURI(element.dataset.query); let queryElement = element.dataset.query;
if (queryElement !== 'undefined') { if (queryElement !== 'undefined') {
this.elements.yourQueryContent.push(queryElement); this.elements.yourQueryContent.push(queryElement);
} }
@ -632,8 +646,11 @@ class ConcordanceQueryBuilder {
englishPosHandler() { englishPosHandler() {
this.hideEverything(); this.hideEverything();
this.elements.englishPosBuilder.classList.remove('hide'); this.elements.englishPosBuilder.classList.remove('hide');
// this.elements.incidenceModifiersButton.classList.remove('hide'); this.elements.incidenceModifiersButton.classList.remove('hide');
this.elements.conditionContainer.classList.remove('hide'); this.elements.conditionContainer.classList.remove('hide');
this.elements.incidenceModifiersButton.firstElementChild.classList.remove('disabled');
this.elements.or.classList.remove('disabled');
this.elements.and.classList.remove('disabled');
// Resets materialize select dropdown // Resets materialize select dropdown
let selectInstance = M.FormSelect.getInstance(this.elements.englishPos); let selectInstance = M.FormSelect.getInstance(this.elements.englishPos);
@ -644,8 +661,11 @@ class ConcordanceQueryBuilder {
germanPosHandler() { germanPosHandler() {
this.hideEverything(); this.hideEverything();
this.elements.germanPosBuilder.classList.remove('hide'); this.elements.germanPosBuilder.classList.remove('hide');
// this.elements.incidenceModifiersButton.classList.remove('hide'); this.elements.incidenceModifiersButton.classList.remove('hide');
this.elements.conditionContainer.classList.remove('hide'); this.elements.conditionContainer.classList.remove('hide');
this.elements.incidenceModifiersButton.firstElementChild.classList.remove('disabled');
this.elements.or.classList.remove('disabled');
this.elements.and.classList.remove('disabled');
// Resets materialize select dropdown // Resets materialize select dropdown
let selectInstance = M.FormSelect.getInstance(this.elements.germanPos); let selectInstance = M.FormSelect.getInstance(this.elements.germanPos);
@ -656,14 +676,27 @@ class ConcordanceQueryBuilder {
simplePosBuilder() { simplePosBuilder() {
this.hideEverything(); this.hideEverything();
this.elements.simplePosBuilder.classList.remove('hide'); this.elements.simplePosBuilder.classList.remove('hide');
// this.elements.incidenceModifiersButton.classList.remove('hide'); this.elements.incidenceModifiersButton.classList.remove('hide');
this.elements.conditionContainer.classList.remove('hide'); this.elements.conditionContainer.classList.remove('hide');
this.elements.simplePos.selectedIndex = 0; this.elements.simplePos.selectedIndex = 0;
this.elements.incidenceModifiersButton.firstElementChild.classList.remove('disabled');
this.elements.or.classList.remove('disabled');
this.elements.and.classList.remove('disabled');
// Resets materialize select dropdown // Resets materialize select dropdown
let selectInstance = M.FormSelect.getInstance(this.elements.simplePos); let selectInstance = M.FormSelect.getInstance(this.elements.simplePos);
selectInstance.input.value = 'simple_pos tagset'; selectInstance.input.value = 'simple_pos tagset';
this.elements.simplePos.value = 'default'; this.elements.simplePos.value = 'default';
M.FormSelect.init(
selectInstance,
{
dropdownOptions: {
direction: 'bottom',
coverTrigger: false
}
}
)
} }
emptyTokenHandler() { emptyTokenHandler() {
@ -671,6 +704,8 @@ class ConcordanceQueryBuilder {
this.elements.tokenQueryFilled = true; this.elements.tokenQueryFilled = true;
this.hideEverything(); this.hideEverything();
this.elements.incidenceModifiersButton.classList.remove('hide'); this.elements.incidenceModifiersButton.classList.remove('hide');
this.elements.incidenceModifiersButton.firstElementChild.classList.remove('disabled');
} }
//#endregion Dropdown Select Handler //#endregion Dropdown Select Handler
@ -686,7 +721,7 @@ class ConcordanceQueryBuilder {
} }
if (elem === this.elements.optionGroup) { if (elem === this.elements.optionGroup) {
input.value += '( option1 | option2 )'; input.value += '(option1|option2)';
let firstIndex = input.value.indexOf('option1'); let firstIndex = input.value.indexOf('option1');
let lastIndex = firstIndex + 'option1'.length; let lastIndex = firstIndex + 'option1'.length;
input.focus(); input.focus();
@ -694,6 +729,7 @@ class ConcordanceQueryBuilder {
} else if (elem === this.elements.wildcardChar) { } else if (elem === this.elements.wildcardChar) {
input.value += '.'; input.value += '.';
} }
this.inputFieldHandler();
} }
nSubmitHandler() { nSubmitHandler() {
@ -807,8 +843,7 @@ class ConcordanceQueryBuilder {
} else { } else {
input = this.elements.lemmaInput; input = this.elements.lemmaInput;
} }
input.value += elem.dataset.token;
input.value += ' ' + elem.dataset.token;
} }
} }

View File

@ -22,9 +22,11 @@ Requests.JSONfetch = (input, init={}) => {
response.json() response.json()
.then( .then(
(json) => { (json) => {
let message = json.message || json; let message = json.message;
let category = json.category || 'message'; let category = json.category || 'message';
if (message) {
app.flash(message, category); app.flash(message, category);
}
}, },
(error) => { (error) => {
app.flash(`[${response.status}]: ${response.statusText}`, 'error'); app.flash(`[${response.status}]: ${response.statusText}`, 'error');

View File

@ -31,6 +31,14 @@ Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => {
return Requests.JSONfetch(input, init); return Requests.JSONfetch(input, init);
}; };
Requests.corpora.entity.getStopwords = () => {
let input = `/corpora/stopwords`;
let init = {
method: 'GET'
};
return Requests.JSONfetch(input, init);
};
Requests.corpora.entity.isPublic = {}; Requests.corpora.entity.isPublic = {};
Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => { Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
@ -43,4 +51,3 @@ Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
}; };

View File

@ -91,8 +91,7 @@ class AdminUserList extends ResourceList {
let listAction = listActionElement === null ? 'view' : listActionElement.dataset.listAction; let listAction = listActionElement === null ? 'view' : listActionElement.dataset.listAction;
switch (listAction) { switch (listAction) {
case 'delete': { case 'delete': {
console.log('delete', itemId); Requests.users.entity.delete(itemId);
Utils.deleteUserRequest(itemId);
if (itemId === currentUserId) {window.location.href = '/';} if (itemId === currentUserId) {window.location.href = '/';}
break; break;
} }

View File

@ -0,0 +1,112 @@
class CorpusTextInfoList extends ResourceList {
static autoInit() {
for (let corpusTextInfoListElement of document.querySelectorAll('.corpus-text-info-list:not(.no-autoinit)')) {
new CorpusTextInfoList(corpusTextInfoListElement);
}
}
static defaultOptions = {
page: 5
};
constructor(listContainerElement, options = {}) {
let _options = Utils.mergeObjectsDeep(
CorpusTextInfoList.defaultOptions,
options
);
super(listContainerElement, _options);
this.isInitialized = false;
let sortElements = this.listContainerElement.querySelectorAll('.sort');
sortElements.forEach((sortElement) => {
sortElement.addEventListener('click', (event) => {this.renderSortElement(sortElement)});
});
}
get item() {
return (values) => {
return `
<tr class="list-item clickable hoverable">
<td><span class="title"></span> (<span class="publishing_year"></span>)</td>
<td><span class="num_tokens"></span></td>
<td><span class="num_sentences"></span></td>
<td><span class="num_unique_words"></span></td>
<td><span class="num_unique_lemmas"></span></td>
<td><span class="num_unique_pos"></span></td>
<td><span class="num_unique_simple_pos"></span></td>
</tr>
`.trim();
}
}
get valueNames() {
return [
'title',
'publishing_year',
'num_tokens',
'num_sentences',
'num_unique_words',
'num_unique_lemmas',
'num_unique_pos',
'num_unique_simple_pos'
];
}
initListContainerElement() {
if (!this.listContainerElement.hasAttribute('id')) {
this.listContainerElement.id = Utils.generateElementId('corpus-file-list-');
}
let listSearchElementId = Utils.generateElementId(`${this.listContainerElement.id}-search-`);
this.listContainerElement.innerHTML = `
<div class="input-field">
<i class="material-icons prefix">search</i>
<input id="${listSearchElementId}" class="search" type="text"></input>
<label for="${listSearchElementId}">Search corpus file</label>
</div>
<table>
<thead>
<tr>
<th>Text<span class="sort right material-icons" data-sort="title" style="cursor:pointer; color:#aa9cc9">arrow_drop_down</span></th>
<th>Tokens<span class="sort right material-icons" data-sort="num_tokens" style="cursor:pointer">arrow_drop_down</span></th>
<th>Sentences<span class="sort right material-icons" data-sort="num_sentences" style="cursor:pointer">arrow_drop_down</span></th>
<th>Unique words<span class="sort right material-icons" data-sort="num_unique_words" style="cursor:pointer">arrow_drop_down</span></th>
<th>Unique lemmas<span class="sort right material-icons" data-sort="num_unique_lemmas" style="cursor:pointer">arrow_drop_down</span></th>
<th>Unique pos<span class="sort right material-icons" data-sort="num_unique_pos" style="cursor:pointer">arrow_drop_down</span></th>
<th>Unique simple pos<span class="sort right material-icons" data-sort="num_unique_simple_pos" style="cursor:pointer">arrow_drop_down</span></th>
</tr>
</thead>
<tbody class="list"></tbody>
</table>
<ul class="pagination"></ul>
`.trim();
}
mapResourceToValue(corpusTextData) {
return {
title: corpusTextData.title,
publishing_year: corpusTextData.publishing_year,
num_tokens: corpusTextData.num_tokens,
num_sentences: corpusTextData.num_sentences,
num_unique_words: corpusTextData.num_unique_words,
num_unique_lemmas: corpusTextData.num_unique_lemmas,
num_unique_pos: corpusTextData.num_unique_pos,
num_unique_simple_pos: corpusTextData.num_unique_simple_pos
};
}
sort() {
this.listjs.sort('title');
}
renderSortElement(clickedSortElement) {
this.listContainerElement.querySelectorAll('.sort').forEach((sortElement) => {
if (sortElement !== clickedSortElement) {
sortElement.classList.remove('asc', 'desc');
sortElement.style.color = 'black';
sortElement.innerHTML = 'arrow_drop_down';
};
});
clickedSortElement.style.color = '#aa9cc9';
clickedSortElement.innerHTML = clickedSortElement.classList.contains('asc') ? 'arrow_drop_down' : 'arrow_drop_up';
}
}

View File

@ -0,0 +1,141 @@
class CorpusTokenList extends ResourceList {
static autoInit() {
for (let corpusTokenListElement of document.querySelectorAll('.corpus-token-list:not(.no-autoinit)')) {
new CorpusTokenList(corpusTokenListElement);
}
}
static defaultOptions = {
page: 7
};
constructor(listContainerElement, options = {}) {
let _options = Utils.mergeObjectsDeep(
CorpusTokenList.defaultOptions,
options
);
super(listContainerElement, _options);
this.listjs.list.addEventListener('click', (event) => {this.onClick(event)});
this.selectedItemTerms = new Set();
this.listjs.on('sortComplete', () => {
let listItems = Array.from(this.listjs.items).filter(item => item.elm);
for (let item of listItems) {
let termElement = item.elm.querySelector('.term');
let mostFrequent = item.elm.dataset.mostfrequent === 'true';
if (mostFrequent) {
this.selectedItemTerms.add(termElement.textContent);
}
}
corpusAnalysisApp.extensions['Static Visualization (beta)'].renderFrequenciesGraphic(this.selectedItemTerms);
});
let tokenListResetButtonElement = this.listContainerElement.querySelector('#token-list-reset-button');
tokenListResetButtonElement.addEventListener('click', () => {
this.selectedItemTerms.clear();
let listItems = Array.from(this.listjs.items).filter(item => item.elm);
for (let item of listItems) {
let termElement = item.elm.querySelector('.term');
let mostFrequent = item.elm.dataset.mostfrequent === 'true';
if (mostFrequent) {
item.elm.querySelector('.select-checkbox').checked = true;
this.selectedItemTerms.add(termElement.textContent);
} else {
item.elm.querySelector('.select-checkbox').checked = false;
}
}
corpusAnalysisApp.extensions['Static Visualization (beta)'].renderFrequenciesGraphic(this.selectedItemTerms);
});
}
get item() {
return (values) => {
return `
<tr class="list-item clickable hoverable">
<td>
<label class="list-action-trigger" data-list-action="select">
<input class="select-checkbox" type="checkbox" ${values.mostFrequent ? 'checked="checked"' : ''}>
<span class="disable-on-click"></span>
</label>
</td>
<td><span class="term"></span></td>
<td><span class="count"></span></td>
<td><span class="frequency"></span></td>
</tr>
`.trim();
}
}
get valueNames() {
return [
'term',
'count',
{data: ['mostFrequent']},
'frequency'
];
}
initListContainerElement() {
if (!this.listContainerElement.hasAttribute('id')) {
this.listContainerElement.id = Utils.generateElementId('corpus-token-list-');
}
let listSearchElementId = Utils.generateElementId(`${this.listContainerElement.id}-search-`);
this.listContainerElement.innerHTML = `
<div class="input-field">
<i class="material-icons prefix">search</i>
<input id="${listSearchElementId}" class="search" type="text"></input>
<label for="${listSearchElementId}">Search token</label>
</div>
<table>
<thead>
<tr>
<th style="width:15%;">
<span class="material-icons" style="cursor:pointer" id="token-list-reset-button">refresh</span>
</th>
<th>Term</th>
<th>Count</th>
<th>Frequency</th>
</tr>
</thead>
<tbody class="list"></tbody>
</table>
<ul class="pagination"></ul>
`.trim();
}
mapResourceToValue(corpusTokenData) {
return {
term: corpusTokenData.term,
count: corpusTokenData.count,
mostFrequent: corpusTokenData.mostFrequent,
frequency: '-'
};
}
sort() {
this.listjs.sort('count', {order: 'desc'});
}
onClick(event) {
if (event.target.closest('.disable-on-click') !== null) {return;}
let listItemElement = event.target.closest('.list-item');
if (listItemElement === null) {return;}
let item = listItemElement.querySelector('.term').textContent;
let listActionElement = event.target.closest('.list-action-trigger[data-list-action]');
let listAction = listActionElement === null ? '' : listActionElement.dataset.listAction;
switch (listAction) {
case 'select': {
if (event.target.checked) {
this.selectedItemTerms.add(item);
} else {
this.selectedItemTerms.delete(item);
}
corpusAnalysisApp.extensions['Static Visualization (beta)'].renderFrequenciesGraphic(this.selectedItemTerms);
break;
}
default: {
break;
}
}
}
}

View File

@ -15,6 +15,8 @@ class ResourceList {
UserList.autoInit(); UserList.autoInit();
AdminUserList.autoInit(); AdminUserList.autoInit();
CorpusFollowerList.autoInit(); CorpusFollowerList.autoInit();
CorpusTextInfoList.autoInit();
CorpusTokenList.autoInit();
} }
static defaultOptions = { static defaultOptions = {

View File

@ -0,0 +1,688 @@
cqi.api.APIClient = class APIClient {
/**
* @param {string} host
* @param {number} [timeout=60] timeout
* @param {string} [version=0.1] version
*/
constructor(host, timeout = 60, version = '0.1') {
this.host = host;
this.timeout = timeout * 1000; // convert seconds to milliseconds
this.version = version;
this.socket = io(
this.host,
{
transports: ['websocket'],
upgrade: false
}
);
}
/**
* @param {string} fn_name
* @param {object} [fn_args={}]
* @returns {Promise}
*/
async #request(fn_name, fn_args = {}) {
// TODO: implement timeout
let response = await this.socket.emitWithAck('exec', fn_name, fn_args);
if (response.code === 200) {
return response.payload;
} else if (response.code === 500) {
throw new Error(`[${response.code}] ${response.msg}`);
} else if (response.code === 502) {
if (response.payload.code in cqi.errors.lookup) {
throw new cqi.errors.lookup[response.payload.code]();
} else {
throw new cqi.errors.CQiError();
}
}
}
/**
* @param {string} username
* @param {string} password
* @returns {Promise<cqi.status.StatusConnectOk>}
*/
async ctrl_connect(username, password) {
const fn_name = 'ctrl_connect';
const fn_args = {username: username, password: password};
let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code]();
}
/**
* @returns {Promise<cqi.status.StatusByeOk>}
*/
async ctrl_bye() {
const fn_name = 'ctrl_bye';
let payload = await this.#request(fn_name);
return new cqi.status.lookup[payload.code]();
}
/**
* @returns {Promise<null>}
*/
async ctrl_user_abort() {
const fn_name = 'ctrl_user_abort';
return await this.#request(fn_name);
}
/**
* @returns {Promise<cqi.status.StatusPingOk>}
*/
async ctrl_ping() {
const fn_name = 'ctrl_ping';
let payload = await this.#request(fn_name);
return new cqi.status.lookup[payload.code]();
}
/**
* Full-text error message for the last general error reported
* by the CQi server
*
* @returns {Promise<string>}
*/
async ctrl_last_general_error() {
const fn_name = 'ctrl_last_general_error';
return await this.#request(fn_name);
}
/**
* @returns {Promise<boolean>}
*/
async ask_feature_cqi_1_0() {
const fn_name = 'ask_feature_cqi_1_0';
return await this.#request(fn_name);
}
/**
* @returns {Promise<boolean>}
*/
async ask_feature_cl_2_3() {
const fn_name = 'ask_feature_cl_2_3';
return await this.#request(fn_name);
}
/**
* @returns {Promise<boolean>}
*/
async ask_feature_cqp_2_3() {
const fn_name = 'ask_feature_cqp_2_3';
return await this.#request(fn_name);
}
/**
* @returns {Promise<string[]>}
*/
async corpus_list_corpora() {
const fn_name = 'corpus_list_corpora';
return await this.#request(fn_name);
}
/**
* @param {string} corpus
* @returns {Promise<string>}
*/
async corpus_charset(corpus) {
const fn_name = 'corpus_charset';
const fn_args = {corpus: corpus};
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} corpus
* @returns {Promise<string[]>}
*/
async corpus_properties(corpus) {
const fn_name = 'corpus_properties';
const fn_args = {corpus: corpus};
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} corpus
* @returns {Promise<string[]>}
*/
async corpus_positional_attributes(corpus) {
const fn_name = 'corpus_positional_attributes';
const fn_args = {corpus: corpus};
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} corpus
* @returns {Promise<string[]>}
*/
async corpus_structural_attributes(corpus) {
const fn_name = 'corpus_structural_attributes';
const fn_args = {corpus: corpus};
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} corpus
* @param {string} attribute
* @returns {Promise<boolean>}
*/
async corpus_structural_attribute_has_values(corpus, attribute) {
const fn_name = 'corpus_structural_attribute_has_values';
const fn_args = {corpus: corpus, attribute: attribute};
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} corpus
* @returns {Promise<string[]>}
*/
async corpus_alignment_attributes(corpus) {
const fn_name = 'corpus_alignment_attributes';
const fn_args = {corpus: corpus};
return await this.#request(fn_name, fn_args);
}
/**
* the full name of <corpus> as specified in its registry entry
*
* @param {string} corpus
* @returns {Promise<string>}
*/
async corpus_full_name(corpus) {
const fn_name = 'corpus_full_name';
const fn_args = {corpus: corpus};
return await this.#request(fn_name, fn_args);
}
/**
* returns the contents of the .info file of <corpus> as a list of lines
*
* @param {string} corpus
* @returns {Promise<string[]>}
*/
async corpus_info(corpus) {
const fn_name = 'corpus_info';
const fn_args = {corpus: corpus};
return await this.#request(fn_name, fn_args);
}
/**
* try to unload a corpus and all its attributes from memory
*
* @param {string} corpus
* @returns {Promise<cqi.status.StatusOk>}
*/
async corpus_drop_corpus(corpus) {
const fn_name = 'corpus_drop_corpus';
const fn_args = {corpus: corpus};
let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code]();
}
/**
* returns the size of <attribute>:
* - number of tokens (positional)
* - number of regions (structural)
* - number of alignments (alignment)
*
* @param {string} attribute
* @returns {Promise<number>}
*/
async cl_attribute_size(attribute) {
const fn_name = 'cl_attribute_size';
const fn_args = {attribute: attribute};
return await this.#request(fn_name, fn_args);
}
/**
* returns the number of entries in the lexicon of a positional attribute;
*
* valid lexicon IDs range from 0 .. (lexicon_size - 1)
*
* @param {string} attribute
* @returns {Promise<number>}
*/
async cl_lexicon_size(attribute) {
const fn_name = 'cl_lexicon_size';
const fn_args = {attribute: attribute};
return await this.#request(fn_name, fn_args);
}
/**
* unload attribute from memory
*
* @param {string} attribute
* @returns {Promise<cqi.status.StatusOk>}
*/
async cl_drop_attribute(attribute) {
const fn_name = 'cl_drop_attribute';
const fn_args = {attribute: attribute};
let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code]();
}
/**
* NOTE: simple (scalar) mappings are applied to lists (the returned list
* has exactly the same length as the list passed as an argument)
*/
/**
* returns -1 for every string in <strings> that is not found in the lexicon
*
* @param {string} attribute
* @param {strings[]} string
* @returns {Promise<number[]>}
*/
async cl_str2id(attribute, strings) {
const fn_name = 'cl_str2id';
const fn_args = {attribute: attribute, strings: strings};
return await this.#request(fn_name, fn_args);
}
/**
* returns "" for every ID in <id> that is out of range
*
* @param {string} attribute
* @param {number[]} id
* @returns {Promise<string[]>}
*/
async cl_id2str(attribute, id) {
const fn_name = 'cl_id2str';
const fn_args = {attribute: attribute, id: id};
return await this.#request(fn_name, fn_args);
}
/**
* returns 0 for every ID in <id> that is out of range
*
* @param {string} attribute
* @param {number[]} id
* @returns {Promise<number[]>}
*/
async cl_id2freq(attribute, id) {
const fn_name = 'cl_id2freq';
const fn_args = {attribute: attribute, id: id};
return await this.#request(fn_name, fn_args);
}
/**
* returns -1 for every corpus position in <cpos> that is out of range
*
* @param {string} attribute
* @param {number[]} cpos
* @returns {Promise<number[]>}
*/
async cl_cpos2id(attribute, cpos) {
const fn_name = 'cl_cpos2id';
const fn_args = {attribute: attribute, cpos: cpos};
return await this.#request(fn_name, fn_args);
}
/**
* returns "" for every corpus position in <cpos> that is out of range
*
* @param {string} attribute
* @param {number[]} cpos
* @returns {Promise<string[]>}
*/
async cl_cpos2str(attribute, cpos) {
const fn_name = 'cl_cpos2str';
const fn_args = {attribute: attribute, cpos: cpos};
return await this.#request(fn_name, fn_args);
}
/**
* returns -1 for every corpus position not inside a structure region
*
* @param {string} attribute
* @param {number[]} cpos
* @returns {Promise<number[]>}
*/
async cl_cpos2struc(attribute, cpos) {
const fn_name = 'cl_cpos2struc';
const fn_args = {attribute: attribute, cpos: cpos};
return await this.#request(fn_name, fn_args);
}
/**
* NOTE: temporary addition for the Euralex2000 tutorial, but should
* probably be included in CQi specs
*/
/**
* returns left boundary of s-attribute region enclosing cpos,
* -1 if not in region
*
* @param {string} attribute
* @param {number[]} cpos
* @returns {Promise<number[]>}
*/
async cl_cpos2lbound(attribute, cpos) {
const fn_name = 'cl_cpos2lbound';
const fn_args = {attribute: attribute, cpos: cpos};
return await this.#request(fn_name, fn_args);
}
/**
* returns right boundary of s-attribute region enclosing cpos,
* -1 if not in region
*
* @param {string} attribute
* @param {number[]} cpos
* @returns {Promise<number[]>}
*/
async cl_cpos2rbound(attribute, cpos) {
const fn_name = 'cl_cpos2rbound';
const fn_args = {attribute: attribute, cpos: cpos};
return await this.#request(fn_name, fn_args);
}
/**
* returns -1 for every corpus position not inside an alignment
*
* @param {string} attribute
* @param {number[]} cpos
* @returns {Promise<number[]>}
*/
async cl_cpos2alg(attribute, cpos) {
const fn_name = 'cl_cpos2alg';
const fn_args = {attribute: attribute, cpos: cpos};
return await this.#request(fn_name, fn_args);
}
/**
* returns annotated string values of structure regions in <strucs>;
* "" if out of range
*
* check corpus_structural_attribute_has_values(<attribute>) first
*
* @param {string} attribute
* @param {number[]} strucs
* @returns {Promise<string[]>}
*/
async cl_struc2str(attribute, strucs) {
const fn_name = 'cl_struc2str';
const fn_args = {attribute: attribute, strucs: strucs};
return await this.#request(fn_name, fn_args);
}
/**
* NOTE: the following mappings take a single argument and return multiple
* values, including lists of arbitrary size
*/
/**
* returns all corpus positions where the given token occurs
*
* @param {string} attribute
* @param {number} id
* @returns {Promise<number[]>}
*/
async cl_id2cpos(attribute, id) {
const fn_name = 'cl_id2cpos';
const fn_args = {attribute: attribute, id: id};
return await this.#request(fn_name, fn_args);
}
/**
* returns all corpus positions where one of the tokens in <id_list> occurs;
* the returned list is sorted as a whole, not per token id
*
* @param {string} attribute
* @param {number[]} id_list
* @returns {Promise<number[]>}
*/
async cl_idlist2cpos(attribute, id_list) {
const fn_name = 'cl_idlist2cpos';
const fn_args = {attribute: attribute, id_list: id_list};
return await this.#request(fn_name, fn_args);
}
/**
* returns lexicon IDs of all tokens that match <regex>;
* the returned list may be empty (size 0);
*
* @param {string} attribute
* @param {string} regex
* @returns {Promise<number[]>}
*/
async cl_regex2id(attribute, regex) {
const fn_name = 'cl_regex2id';
const fn_args = {attribute: attribute, regex: regex};
return await this.#request(fn_name, fn_args);
}
/**
* returns start and end corpus positions of structure region <struc>
*
* @param {string} attribute
* @param {number} struc
* @returns {Promise<[number, number]>}
*/
async cl_struc2cpos(attribute, struc) {
const fn_name = 'cl_struc2cpos';
const fn_args = {attribute: attribute, struc: struc};
return await this.#request(fn_name, fn_args);
}
/**
* returns (src_start, src_end, target_start, target_end)
*
* @param {string} attribute
* @param {number} alg
* @returns {Promise<[number, number, number, number]>}
*/
async alg2cpos(attribute, alg) {
const fn_name = 'alg2cpos';
const fn_args = {attribute: attribute, alg: alg};
return await this.#request(fn_name, fn_args);
}
/**
* <query> must include the ';' character terminating the query.
*
* @param {string} mother_corpus
* @param {string} subcorpus_name
* @param {string} query
* @returns {Promise<cqi.status.StatusOk>}
*/
async cqp_query(mother_corpus, subcorpus_name, query) {
const fn_name = 'cqp_query';
const fn_args = {mother_corpus: mother_corpus, subcorpus_name: subcorpus_name, query: query};
let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code]();
}
/**
* @param {string} corpus
* @returns {Promise<string[]>}
*/
async cqp_list_subcorpora(corpus) {
const fn_name = 'cqp_list_subcorpora';
const fn_args = {corpus: corpus};
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} subcorpus
* @returns {Promise<number>}
*/
async cqp_subcorpus_size(subcorpus) {
const fn_name = 'cqp_subcorpus_size';
const fn_args = {subcorpus: subcorpus};
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} subcorpus
* @param {number} field
* @returns {Promise<boolean>}
*/
async cqp_subcorpus_has_field(subcorpus, field) {
const fn_name = 'cqp_subcorpus_has_field';
const fn_args = {subcorpus: subcorpus, field: field};
return await this.#request(fn_name, fn_args);
}
/**
* Dump the values of <field> for match ranges <first> .. <last>
* in <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants.
*
* @param {string} subcorpus
* @param {number} field
* @param {number} first
* @param {number} last
* @returns {Promise<number[]>}
*/
async cqp_dump_subcorpus(subcorpus, field, first, last) {
const fn_name = 'cqp_dump_subcorpus';
const fn_args = {subcorpus: subcorpus, field: field, first: first, last: last};
return await this.#request(fn_name, fn_args);
}
/**
* delete a subcorpus from memory
*
* @param {string} subcorpus
* @returns {Promise<cqi.status.StatusOk>}
*/
async cqp_drop_subcorpus(subcorpus) {
const fn_name = 'cqp_drop_subcorpus';
const fn_args = {subcorpus: subcorpus};
let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code]();
}
/**
* NOTE: The following two functions are temporarily included for the
* Euralex 2000 tutorial demo
*/
/**
* frequency distribution of single tokens
*
* returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
* field is one of
* - CQI_CONST_FIELD_MATCH
* - CQI_CONST_FIELD_TARGET
* - CQI_CONST_FIELD_KEYWORD
*
* NB: pairs are sorted by frequency desc.
*
* @param {string} subcorpus
* @param {number} cutoff
* @param {number} field
* @param {string} attribute
* @returns {Promise<number[]>}
*/
async cqp_fdist_1(subcorpus, cutoff, field, attribute) {
const fn_name = 'cqp_fdist_1';
const fn_args = {subcorpus: subcorpus, cutoff: cutoff, field: field, attribute: attribute};
return await this.#request(fn_name, fn_args);
}
/**
* frequency distribution of pairs of tokens
*
* returns <n> (id1, id2, frequency) pairs flattened into a list of
* size 3*<n>
*
* NB: triples are sorted by frequency desc.
*
* @param {string} subcorpus
* @param {number} cutoff
* @param {number} field1
* @param {string} attribute1
* @param {number} field2
* @param {string} attribute2
* @returns {Promise<number[]>}
*/
async cqp_fdist_2(subcorpus, cutoff, field1, attribute1, field2, attribute2) {
const fn_name = 'cqp_fdist_2';
const fn_args = {subcorpus: subcorpus, cutoff: cutoff, field1: field1, attribute1: attribute1, field2: field2, attribute2: attribute2};
return await this.#request(fn_name, fn_args);
}
/**************************************************************************
* NOTE: The following is not included in the CQi specification. *
**************************************************************************/
/**************************************************************************
* Custom additions for nopaque *
**************************************************************************/
/**
* @param {string} corpus
* @returns {Promise<cqi.status.StatusOk>}
*/
async ext_corpus_update_db(corpus) {
const fn_name = 'ext_corpus_update_db';
const fn_args = {corpus: corpus};
let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code]();
}
/**
* @param {string} corpus
* @returns {Promise<object>}
*/
async ext_corpus_static_data(corpus) {
const fn_name = 'ext_corpus_static_data';
const fn_args = {corpus: corpus};
let compressedEncodedData = await this.#request(fn_name, fn_args);
let data = pako.inflate(compressedEncodedData, {to: 'string'});
return JSON.parse(data);
}
/**
* @param {string} corpus
* @param {number=} page
* @param {number=} per_page
* @returns {Promise<object>}
*/
async ext_corpus_paginate_corpus(corpus, page, per_page) {
const fn_name = 'ext_corpus_paginate_corpus';
const fn_args = {corpus: corpus}
if (page !== undefined) {fn_args.page = page;}
if (per_page !== undefined) {fn_args.per_page = per_page;}
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} subcorpus
* @param {number=} context
* @param {number=} page
* @param {number=} per_page
* @returns {Promise<object>}
*/
async ext_cqp_paginate_subcorpus(subcorpus, context, page, per_page) {
const fn_name = 'ext_cqp_paginate_subcorpus';
const fn_args = {subcorpus: subcorpus}
if (context !== undefined) {fn_args.context = context;}
if (page !== undefined) {fn_args.page = page;}
if (per_page !== undefined) {fn_args.per_page = per_page;}
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} subcorpus
* @param {number[]} match_id_list
* @param {number=} context
* @returns {Promise<object>}
*/
async ext_cqp_partial_export_subcorpus(subcorpus, match_id_list, context) {
const fn_name = 'ext_cqp_partial_export_subcorpus';
const fn_args = {subcorpus: subcorpus, match_id_list: match_id_list};
if (context !== undefined) {fn_args.context = context;}
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} subcorpus
* @param {number=} context
* @returns {Promise<object>}
*/
async ext_cqp_export_subcorpus(subcorpus, context) {
const fn_name = 'ext_cqp_export_subcorpus';
const fn_args = {subcorpus: subcorpus};
if (context !== undefined) {fn_args.context = context;}
return await this.#request(fn_name, fn_args);
}
};

View File

@ -0,0 +1 @@
cqi.api = {};

View File

@ -0,0 +1,57 @@
cqi.CQiClient = class CQiClient {
/**
* @param {string} host
* @param {number} [timeout=60] timeout
* @param {string} [version=0.1] version
*/
constructor(host, timeout = 60, version = '0.1') {
/** @type {cqi.api.APIClient} */
this.api = new cqi.api.APIClient(host, timeout, version);
}
/**
* @returns {cqi.models.corpora.CorpusCollection}
*/
get corpora() {
return new cqi.models.corpora.CorpusCollection(this);
}
/**
* @returns {Promise<cqi.status.StatusByeOk>}
*/
async bye() {
return await this.api.ctrl_bye();
}
/**
* @param {string} username
* @param {string} password
* @returns {Promise<cqi.status.StatusConnectOk>}
*/
async connect(username, password) {
return await this.api.ctrl_connect(username, password);
}
/**
* @returns {Promise<cqi.status.StatusPingOk>}
*/
async ping() {
return await this.api.ctrl_ping();
}
/**
* @returns {Promise<null>}
*/
async userAbort() {
return await this.api.ctrl_user_abort();
}
/**
* Alias for "bye" method
*
* @returns {Promise<cqi.status.StatusByeOk>}
*/
async disconnect() {
return await this.api.ctrl_bye();
}
};

185
app/static/js/cqi/errors.js Normal file
View File

@ -0,0 +1,185 @@
cqi.errors = {};
/**
* A base class from which all other errors inherit.
* If you want to catch all errors that the CQi package might throw,
* catch this base error.
*/
cqi.errors.CQiError = class CQiError extends Error {
constructor(message) {
super(message);
this.code = undefined;
this.description = undefined;
}
};
cqi.errors.Error = class Error extends cqi.errors.CQiError {
constructor(message) {
super(message);
this.code = 2;
}
};
cqi.errors.ErrorGeneralError = class ErrorGeneralError extends cqi.errors.Error {
constructor(message) {
super(message);
this.code = 513;
}
};
cqi.errors.ErrorConnectRefused = class ErrorConnectRefused extends cqi.errors.Error {
constructor(message) {
super(message);
this.code = 514;
}
};
cqi.errors.ErrorUserAbort = class ErrorUserAbort extends cqi.errors.Error {
constructor(message) {
super(message);
this.code = 515;
}
};
cqi.errors.ErrorSyntaxError = class ErrorSyntaxError extends cqi.errors.Error {
constructor(message) {
super(message);
this.code = 516;
}
};
cqi.errors.CLError = class Error extends cqi.errors.CQiError {
constructor(message) {
super(message);
this.code = 4;
}
};
cqi.errors.CLErrorNoSuchAttribute = class CLErrorNoSuchAttribute extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1025;
this.description = "CQi server couldn't open attribute";
}
};
cqi.errors.CLErrorWrongAttributeType = class CLErrorWrongAttributeType extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1026;
}
};
cqi.errors.CLErrorOutOfRange = class CLErrorOutOfRange extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1027;
}
};
cqi.errors.CLErrorRegex = class CLErrorRegex extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1028;
}
};
cqi.errors.CLErrorCorpusAccess = class CLErrorCorpusAccess extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1029;
}
};
cqi.errors.CLErrorOutOfMemory = class CLErrorOutOfMemory extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1030;
this.description = 'CQi server has run out of memory; try discarding some other corpora and/or subcorpora';
}
};
cqi.errors.CLErrorInternal = class CLErrorInternal extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1031;
this.description = "The classical 'please contact technical support' error";
}
};
cqi.errors.CQPError = class Error extends cqi.errors.CQiError {
constructor(message) {
super(message);
this.code = 5;
}
};
cqi.errors.CQPErrorGeneral = class CQPErrorGeneral extends cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1281;
}
};
cqi.errors.CQPErrorNoSuchCorpus = class CQPErrorNoSuchCorpus extends cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1282;
}
};
cqi.errors.CQPErrorInvalidField = class CQPErrorInvalidField extends cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1283;
}
};
cqi.errors.CQPErrorOutOfRange = class CQPErrorOutOfRange extends cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1284;
this.description = 'A number is out of range';
}
};
cqi.errors.lookup = {
2: cqi.errors.Error,
513: cqi.errors.ErrorGeneralError,
514: cqi.errors.ErrorConnectRefused,
515: cqi.errors.ErrorUserAbort,
516: cqi.errors.ErrorSyntaxError,
4: cqi.errors.CLError,
1025: cqi.errors.CLErrorNoSuchAttribute,
1026: cqi.errors.CLErrorWrongAttributeType,
1027: cqi.errors.CLErrorOutOfRange,
1028: cqi.errors.CLErrorRegex,
1029: cqi.errors.CLErrorCorpusAccess,
1030: cqi.errors.CLErrorOutOfMemory,
1031: cqi.errors.CLErrorInternal,
5: cqi.errors.CQPError,
1281: cqi.errors.CQPErrorGeneral,
1282: cqi.errors.CQPErrorNoSuchCorpus,
1283: cqi.errors.CQPErrorInvalidField,
1284: cqi.errors.CQPErrorOutOfRange
};

View File

@ -0,0 +1,289 @@
cqi.models.attributes = {};
cqi.models.attributes.Attribute = class Attribute extends cqi.models.resource.Model {
/**
* @returns {string}
*/
get apiName() {
return this.attrs.api_name;
}
/**
* @returns {string}
*/
get name() {
return this.attrs.name;
}
/**
* @returns {number}
*/
get size() {
return this.attrs.size;
}
/**
* @returns {Promise<cqi.status.StatusOk>}
*/
async drop() {
return await this.client.api.cl_drop_attribute(this.apiName);
}
};
cqi.models.attributes.AttributeCollection = class AttributeCollection extends cqi.models.resource.Collection {
/** @type{typeof cqi.models.attributes.Attribute} */
static model = cqi.models.attributes.Attribute;
/**
* @param {cqi.CQiClient} client
* @param {cqi.models.corpora.Corpus} corpus
*/
constructor(client, corpus) {
super(client);
/** @type {cqi.models.corpora.Corpus} */
this.corpus = corpus;
}
/**
* @param {string} attributeName
* @returns {Promise<object>}
*/
async _get(attributeName) {
/** @type{string} */
let apiName = `${this.corpus.apiName}.${attributeName}`;
return {
api_name: apiName,
name: attributeName,
size: await this.client.api.cl_attribute_size(apiName)
}
}
/**
* @param {string} attributeName
* @returns {Promise<cqi.models.attributes.Attribute>}
*/
async get(attributeName) {
return this.prepareModel(await this._get(attributeName));
}
};
cqi.models.attributes.AlignmentAttribute = class AlignmentAttribute extends cqi.models.attributes.Attribute {
/**
* @param {number} id
* @returns {Promise<[number, number, number, number]>}
*/
async cposById(id) {
return await this.client.api.cl_alg2cpos(this.apiName, id);
}
/**
* @param {number[]} cposList
* @returns {Promise<number[]>}
*/
async idsByCpos(cposList) {
return await this.client.api.cl_cpos2alg(this.apiName, cposList);
}
};
cqi.models.attributes.AlignmentAttributeCollection = class AlignmentAttributeCollection extends cqi.models.attributes.AttributeCollection {
/** @type{typeof cqi.models.attributes.AlignmentAttribute} */
static model = cqi.models.attributes.AlignmentAttribute;
/**
* @returns {Promise<cqi.models.attributes.AlignmentAttribute[]>}
*/
async list() {
/** @type {string[]} */
let alignmentAttributeNames = await this.client.api.corpus_alignment_attributes(this.corpus.apiName);
/** @type {cqi.models.attributes.AlignmentAttribute[]} */
let alignmentAttributes = [];
for (let alignmentAttributeName of alignmentAttributeNames) {
alignmentAttributes.push(await this.get(alignmentAttributeName));
}
return alignmentAttributes;
}
};
cqi.models.attributes.PositionalAttribute = class PositionalAttribute extends cqi.models.attributes.Attribute {
/**
* @returns {number}
*/
get lexiconSize() {
return this.attrs.lexicon_size;
}
/**
* @param {number} id
* @returns {Promise<number[]>}
*/
async cposById(id) {
return await this.client.api.cl_id2cpos(this.apiName, id);
}
/**
* @param {number[]} idList
* @returns {Promise<number[]>}
*/
async cposByIds(idList) {
return await this.client.api.cl_idlist2cpos(this.apiName, idList);
}
/**
* @param {number[]} idList
* @returns {Promise<number[]>}
*/
async freqsByIds(idList) {
return await this.client.api.cl_id2freq(this.apiName, idList);
}
/**
* @param {number[]} cposList
* @returns {Promise<number[]>}
*/
async idsByCpos(cposList) {
return await this.client.api.cl_cpos2id(this.apiName, cposList);
}
/**
* @param {string} regex
* @returns {Promise<number[]>}
*/
async idsByRegex(regex) {
return await this.client.api.cl_regex2id(this.apiName, regex);
}
/**
* @param {string[]} valueList
* @returns {Promise<number[]>}
*/
async idsByValues(valueList) {
return await this.client.api.cl_str2id(this.apiName, valueList);
}
/**
* @param {number[]} cposList
* @returns {Promise<string[]>}
*/
async valuesByCpos(cposList) {
return await this.client.api.cl_cpos2str(this.apiName, cposList);
}
/**
* @param {number[]} idList
* @returns {Promise<string[]>}
*/
async valuesByIds(idList) {
return await this.client.api.cl_id2str(this.apiName, idList);
}
};
cqi.models.attributes.PositionalAttributeCollection = class PositionalAttributeCollection extends cqi.models.attributes.AttributeCollection {
/** @type{typeof cqi.models.attributes.PositionalAttribute} */
static model = cqi.models.attributes.PositionalAttribute;
/**
* @param {string} positionalAttributeName
* @returns {Promise<object>}
*/
async _get(positionalAttributeName) {
let positionalAttribute = await super._get(positionalAttributeName);
positionalAttribute.lexicon_size = await this.client.api.cl_lexicon_size(positionalAttribute.api_name);
return positionalAttribute;
}
/**
* @returns {Promise<cqi.models.attributes.PositionalAttribute[]>}
*/
async list() {
let positionalAttributeNames = await this.client.api.corpus_positional_attributes(this.corpus.apiName);
let positionalAttributes = [];
for (let positionalAttributeName of positionalAttributeNames) {
positionalAttributes.push(await this.get(positionalAttributeName));
}
return positionalAttributes;
}
};
cqi.models.attributes.StructuralAttribute = class StructuralAttribute extends cqi.models.attributes.Attribute {
/**
* @returns {boolean}
*/
get hasValues() {
return this.attrs.has_values;
}
/**
* @param {number} id
* @returns {Promise<[number, number]>}
*/
async cposById(id) {
return await this.client.api.cl_struc2cpos(this.apiName, id);
}
/**
* @param {number[]} cposList
* @returns {Promise<number[]>}
*/
async idsByCpos(cposList) {
return await this.client.api.cl_cpos2struc(this.apiName, cposList);
}
/**
* @param {number[]} cposList
* @returns {Promise<number[]>}
*/
async lboundByCpos(cposList) {
return await this.client.api.cl_cpos2lbound(this.apiName, cposList);
}
/**
* @param {number[]} cposList
* @returns {Promise<number[]>}
*/
async rboundByCpos(cposList) {
return await this.client.api.cl_cpos2rbound(this.apiName, cposList);
}
/**
* @param {number[]} idList
* @returns {Promise<string[]>}
*/
async valuesByIds(idList) {
return await this.client.api.cl_struc2str(this.apiName, idList);
}
};
cqi.models.attributes.StructuralAttributeCollection = class StructuralAttributeCollection extends cqi.models.attributes.AttributeCollection {
/** @type{typeof cqi.models.attributes.StructuralAttribute} */
static model = cqi.models.attributes.StructuralAttribute;
/**
* @param {string} structuralAttributeName
* @returns {Promise<object>}
*/
async _get(structuralAttributeName) {
let structuralAttribute = await super._get(structuralAttributeName);
structuralAttribute.has_values = await this.client.api.cl_has_values(structuralAttribute.api_name);
return structuralAttribute;
}
/**
* @returns {Promise<cqi.models.attributes.StructuralAttribute[]>}
*/
async list() {
let structuralAttributeNames = await this.client.api.corpus_structural_attributes(this.corpus.apiName);
let structuralAttributes = [];
for (let structuralAttributeName of structuralAttributeNames) {
structuralAttributes.push(await this.get(structuralAttributeName));
}
return structuralAttributes;
}
};

View File

@ -0,0 +1,166 @@
cqi.models.corpora = {};
cqi.models.corpora.Corpus = class Corpus extends cqi.models.resource.Model {
/**
* @returns {string}
*/
get apiName() {
return this.attrs.api_name;
}
/**
* @returns {string}
*/
get name() {
return this.attrs.name;
}
/**
* @returns {number}
*/
get size() {
return this.attrs.size;
}
/**
* @returns {string}
*/
get charset() {
return this.attrs.charset;
}
/**
* @returns {string[]}
*/
get properties() {
return this.attrs?.properties;
}
/**
* @returns {cqi.models.attributes.AlignmentAttributeCollection}
*/
get alignmentAttributes() {
return new cqi.models.attributes.AlignmentAttributeCollection(this.client, this);
}
/**
* @returns {cqi.models.attributes.PositionalAttributeCollection}
*/
get positionalAttributes() {
return new cqi.models.attributes.PositionalAttributeCollection(this.client, this);
}
/**
* @returns {cqi.models.attributes.StructuralAttributeCollection}
*/
get structuralAttributes() {
return new cqi.models.attributes.StructuralAttributeCollection(this.client, this);
}
/**
* @returns {cqi.models.subcorpora.SubcorpusCollection}
*/
get subcorpora() {
return new cqi.models.subcorpora.SubcorpusCollection(this.client, this);
}
/**
* @returns {Promise<cqi.status.StatusOk>}
*/
async drop() {
return await this.client.api.corpus_drop_corpus(this.apiName);
}
/**
* @param {string} subcorpusName
* @param {string} query
* @returns {Promise<cqi.status.StatusOk>}
*/
async query(subcorpusName, query) {
return await this.client.api.cqp_query(this.apiName, subcorpusName, query);
}
/**************************************************************************
* NOTE: The following is not included in the CQi specification. *
**************************************************************************/
/**************************************************************************
* Custom additions for nopaque *
**************************************************************************/
/**
* @returns {string}
*/
get staticData() {
return this.attrs.static_data;
}
/**
* @returns {cqi.status.StatusOk}
*/
async updateDb() {
return await this.client.api.ext_corpus_update_db(this.apiName);
}
/**
* @param {number=} page
* @param {number=} per_page
* @returns {Promise<object>}
*/
async paginate(page, per_page) {
return await this.client.api.ext_corpus_paginate_corpus(this.apiName, page, per_page);
}
};
cqi.models.corpora.CorpusCollection = class CorpusCollection extends cqi.models.resource.Collection {
/** @type {typeof cqi.models.corpora.Corpus} */
static model = cqi.models.corpora.Corpus;
/**
* @param {string} corpusName
* @returns {Promise<object>}
*/
async _get(corpusName) {
const returnValue = {
api_name: corpusName,
charset: await this.client.api.corpus_charset(corpusName),
// full_name: await this.client.api.corpus_full_name(corpusName),
// info: await this.client.api.corpus_info(corpusName),
name: corpusName,
properties: await this.client.api.corpus_properties(corpusName),
size: await this.client.api.cl_attribute_size(`${corpusName}.word`)
};
/************************************************************************
* NOTE: The following is not included in the CQi specification. *
************************************************************************/
/************************************************************************
* Custom additions for nopaque *
************************************************************************/
returnValue.static_data = await this.client.api.ext_corpus_static_data(corpusName);
return returnValue;
}
/**
* @param {string} corpusName
* @returns {Promise<cqi.models.corpora.Corpus>}
*/
async get(corpusName) {
return this.prepareModel(await this._get(corpusName));
}
/**
* @returns {Promise<cqi.models.corpora.Corpus[]>}
*/
async list() {
/** @type {string[]} */
let corpusNames = await this.client.api.corpus_list_corpora();
/** @type {cqi.models.corpora.Corpus[]} */
let corpora = [];
for (let corpusName of corpusNames) {
corpora.push(await this.get(corpusName));
}
return corpora;
}
};

View File

@ -0,0 +1 @@
cqi.models = {};

View File

@ -0,0 +1,90 @@
cqi.models.resource = {};
/**
* A base class for representing a single object on the server.
*/
cqi.models.resource.Model = class Model {
/**
* @param {object} attrs
* @param {cqi.CQiClient} client
* @param {cqi.models.resource.Collection} collection
*/
constructor(attrs, client, collection) {
/**
* A client pointing at the server that this object is on.
*
* @type {cqi.CQiClient}
*/
this.client = client;
/**
* The collection that this model is part of.
*
* @type {cqi.models.resource.Collection}
*/
this.collection = collection;
/**
* The raw representation of this object from the API
*
* @type {object}
*/
this.attrs = attrs;
}
/**
* @returns {string}
*/
get apiName() {
throw new Error('Not implemented');
}
/**
* @returns {Promise<void>}
*/
async reload() {
this.attrs = await this.collection.get(this.apiName).attrs;
}
};
/**
* A base class for representing all objects of a particular type on the server.
*/
cqi.models.resource.Collection = class Collection {
/**
* The type of object this collection represents, set by subclasses
*
* @type {typeof cqi.models.resource.Model}
*/
static model;
/**
* @param {cqi.CQiClient} client
*/
constructor(client) {
/**
* A client pointing at the server that this object is on.
*
* @type {cqi.CQiClient}
*/
this.client = client;
}
async list() {
throw new Error('Not implemented');
}
async get() {
throw new Error('Not implemented');
}
/**
* Create a model from a set of attributes.
*
* @param {object} attrs
* @returns {cqi.models.resource.Model}
*/
prepareModel(attrs) {
return new this.constructor.model(attrs, this.client, this);
}
};

View File

@ -0,0 +1,189 @@
cqi.models.subcorpora = {};
cqi.models.subcorpora.Subcorpus = class Subcorpus extends cqi.models.resource.Model {
/**
* @returns {string}
*/
get apiName() {
return this.attrs.api_name;
}
/**
* @returns {object}
*/
get fields() {
return this.attrs.fields;
}
/**
* @returns {string}
*/
get name() {
return this.attrs.name;
}
/**
* @returns {number}
*/
get size() {
return this.attrs.size;
}
/**
* @returns {Promise<cqi.status.StatusOk>}
*/
async drop() {
return await this.client.api.cqp_drop_subcorpus(this.apiName);
}
/**
* @param {number} field
* @param {number} first
* @param {number} last
* @returns {Promise<number[]>}
*/
async dump(field, first, last) {
return await this.client.api.cqp_dump_subcorpus(
this.apiName,
field,
first,
last
);
}
/**
* @param {number} cutoff
* @param {number} field
* @param {cqi.models.attributes.PositionalAttribute} attribute
* @returns {Promise<number[]>}
*/
async fdist1(cutoff, field, attribute) {
return await this.client.api.cqp_fdist_1(
this.apiName,
cutoff,
field,
attribute.apiName
);
}
/**
* @param {number} cutoff
* @param {number} field1
* @param {cqi.models.attributes.PositionalAttribute} attribute1
* @param {number} field2
* @param {cqi.models.attributes.PositionalAttribute} attribute2
* @returns {Promise<number[]>}
*/
async fdist2(cutoff, field1, attribute1, field2, attribute2) {
return await this.client.api.cqp_fdist_2(
this.apiName,
cutoff,
field1,
attribute1.apiName,
field2,
attribute2.apiName
);
}
/**************************************************************************
* NOTE: The following is not included in the CQi specification. *
**************************************************************************/
/**************************************************************************
* Custom additions for nopaque *
**************************************************************************/
/**
* @param {number=} context
* @param {number=} page
* @param {number=} perPage
* @returns {Promise<object>}
*/
async paginate(context, page, perPage) {
return await this.client.api.ext_cqp_paginate_subcorpus(this.apiName, context, page, perPage);
}
/**
* @param {number[]} matchIdList
* @param {number=} context
* @returns {Promise<object>}
*/
async partialExport(matchIdList, context) {
return await this.client.api.ext_cqp_partial_export_subcorpus(this.apiName, matchIdList, context);
}
/**
* @param {number=} context
* @returns {Promise<object>}
*/
async export(context) {
return await this.client.api.ext_cqp_export_subcorpus(this.apiName, context);
}
};
cqi.models.subcorpora.SubcorpusCollection = class SubcorpusCollection extends cqi.models.resource.Collection {
/** @type {typeof cqi.models.subcorpora.Subcorpus} */
static model = cqi.models.subcorpora.Subcorpus;
/**
* @param {cqi.CQiClient} client
* @param {cqi.models.corpora.Corpus} corpus
*/
constructor(client, corpus) {
super(client);
/** @type {cqi.models.corpora.Corpus} */
this.corpus = corpus;
}
/**
* @param {string} subcorpusName
* @returns {Promise<object>}
*/
async _get(subcorpusName) {
/** @type {string} */
let apiName = `${this.corpus.apiName}:${subcorpusName}`;
/** @type {object} */
let fields = {};
if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_MATCH)) {
fields.match = cqi.CONST_FIELD_MATCH;
}
if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_MATCHEND)) {
fields.matchend = cqi.CONST_FIELD_MATCHEND
}
if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_TARGET)) {
fields.target = cqi.CONST_FIELD_TARGET
}
if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_KEYWORD)) {
fields.keyword = cqi.CONST_FIELD_KEYWORD
}
return {
api_name: apiName,
fields: fields,
name: subcorpusName,
size: await this.client.api.cqp_subcorpus_size(apiName)
}
}
/**
* @param {string} subcorpusName
* @returns {Promise<cqi.models.subcorpora.Subcorpus>}
*/
async get(subcorpusName) {
return this.prepareModel(await this._get(subcorpusName));
}
/**
* @returns {Promise<cqi.models.subcorpora.Subcorpus[]>}
*/
async list() {
/** @type {string[]} */
let subcorpusNames = await this.client.api.cqp_list_subcorpora(this.corpus.apiName);
/** @type {cqi.models.subcorpora.Subcorpus[]} */
let subcorpora = [];
for (let subcorpusName of subcorpusNames) {
subcorpora.push(await this.get(subcorpusName));
}
return subcorpora;
}
};

View File

@ -0,0 +1,6 @@
var cqi = {};
cqi.CONST_FIELD_KEYWORD = 9;
cqi.CONST_FIELD_MATCH = 16;
cqi.CONST_FIELD_MATCHEND = 17;
cqi.CONST_FIELD_TARGET = 0;

View File

@ -0,0 +1,51 @@
cqi.status = {};
/**
* A base class from which all other status inherit.
*/
cqi.status.CQiStatus = class CQiStatus {
constructor() {
this.code = undefined;
}
};
cqi.status.StatusOk = class StatusOk extends cqi.status.CQiStatus {
constructor() {
super();
this.code = 257;
}
};
cqi.status.StatusConnectOk = class StatusConnectOk extends cqi.status.CQiStatus {
constructor() {
super();
this.code = 258;
}
};
cqi.status.StatusByeOk = class StatusByeOk extends cqi.status.CQiStatus {
constructor() {
super();
this.code = 259;
}
};
cqi.status.StatusPingOk = class StatusPingOk extends cqi.status.CQiStatus {
constructor() {
super();
this.code = 260;
}
};
cqi.status.lookup = {
257: cqi.status.StatusOk,
258: cqi.status.StatusConnectOk,
259: cqi.status.StatusByeOk,
260: cqi.status.StatusPingOk
};

View File

@ -1,15 +1,35 @@
<script src="https://cdnjs.cloudflare.com/ajax/libs/fast-json-patch/3.1.1/fast-json-patch.min.js" integrity="sha512-5uDdefwnzyq4N+SkmMBmekZLZNmc6dLixvVxCdlHBfqpyz0N3bzLdrJ55OLm7QrZmgZuhLGgHLDtJwU6RZoFCA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/fast-json-patch/3.1.1/fast-json-patch.min.js" integrity="sha512-5uDdefwnzyq4N+SkmMBmekZLZNmc6dLixvVxCdlHBfqpyz0N3bzLdrJ55OLm7QrZmgZuhLGgHLDtJwU6RZoFCA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/2.3.1/list.min.js" integrity="sha512-93wYgwrIFL+b+P3RvYxi/WUFRXXUDSLCT2JQk9zhVGXuS2mHl2axj6d+R6pP+gcU5isMHRj1u0oYE/mWyt/RjA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/2.3.1/list.min.js" integrity="sha512-93wYgwrIFL+b+P3RvYxi/WUFRXXUDSLCT2JQk9zhVGXuS2mHl2axj6d+R6pP+gcU5isMHRj1u0oYE/mWyt/RjA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.5.4/socket.io.min.js" integrity="sha512-HTENHrkQ/P0NGDFd5nk6ibVtCkcM7jhr2c7GyvXp5O+4X6O5cQO9AhqFzM+MdeBivsX7Hoys2J7pp2wdgMpCvw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.7.1/socket.io.min.js" integrity="sha512-+NaO7d6gQ1YPxvc/qHIqZEchjGm207SszoNeMgppoqD/67fEqmc1edS8zrbxPD+4RQI3gDgT/83ihpFW61TG/Q==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/plotly.js/2.24.2/plotly.min.js" integrity="sha512-dAXqGCq94D0kgLSPnfvd/pZpCMoJQpGj2S2XQmFQ9Ay1+96kbjss02ISEh+TBNXMggGg/1qoMcOHcxg+Op/Jmw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pako/2.1.0/pako_inflate.min.js" integrity="sha512-mlnC6JeOvg9V4vBpWMxGKscsCdScB6yvGVCeFF2plnQMRmwH69s9F8SHPbC0oirqfePmRBhqx2s3Bx7WIvHfWg==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
{%- assets
filters='rjsmin',
output='gen/cqi.%(version)s.js',
'js/cqi/package.js',
'js/cqi/errors.js',
'js/cqi/status.js',
'js/cqi/api/package.js',
'js/cqi/api/client.js',
'js/cqi/models/package.js',
'js/cqi/models/resource.js',
'js/cqi/models/attributes.js',
'js/cqi/models/subcorpora.js',
'js/cqi/models/corpora.js',
'js/cqi/client.js'
%}
<script src="{{ ASSET_URL }}"></script>
{%- endassets %}
{%- assets {%- assets
filters='rjsmin', filters='rjsmin',
output='gen/app.%(version)s.js', output='gen/app.%(version)s.js',
'js/App.js', 'js/App.js',
'js/Utils.js', 'js/Utils.js',
'js/CorpusAnalysis/CQiClient.js',
'js/CorpusAnalysis/CorpusAnalysisApp.js', 'js/CorpusAnalysis/CorpusAnalysisApp.js',
'js/CorpusAnalysis/CorpusAnalysisConcordance.js', 'js/CorpusAnalysis/CorpusAnalysisConcordance.js',
'js/CorpusAnalysis/CorpusAnalysisReader.js', 'js/CorpusAnalysis/CorpusAnalysisReader.js',
'js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js',
'js/CorpusAnalysis/QueryBuilder.js', 'js/CorpusAnalysis/QueryBuilder.js',
'js/XMLtoObject.js' 'js/XMLtoObject.js'
%} %}
@ -49,7 +69,9 @@
'js/ResourceLists/UserList.js', 'js/ResourceLists/UserList.js',
'js/ResourceLists/AdminUserList.js', 'js/ResourceLists/AdminUserList.js',
'js/ResourceLists/CorpusFollowerList.js', 'js/ResourceLists/CorpusFollowerList.js',
'js/ResourceLists/DetailledPublicCorpusList.js' 'js/ResourceLists/CorpusTextInfoList.js',
'js/ResourceLists/DetailledPublicCorpusList.js',
'js/ResourceLists/CorpusTokenList.js'
%} %}
<script src="{{ ASSET_URL }}"></script> <script src="{{ ASSET_URL }}"></script>
{%- endassets %} {%- endassets %}

View File

@ -154,6 +154,410 @@ Query your corpus with the CQP query language utilizing a KWIC view.
</div> </div>
</div> </div>
</div> </div>
<div class="modal" id="concordance-query-builder">
<div class="modal-content">
<div>
<nav>
<div class="nav-wrapper" id="query-builder-nav">
<a href="#!" class="brand-logo"><i class="material-icons">build</i>Query Builder (beta)</a>
<i class="material-icons close right" id="close-query-builder">close</i>
<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal">
<i class="material-icons right tooltipped" id="query-builder-tutorial-info-icon" data-position="bottom" data-tooltip="Click here if you are unsure how to use the Query Builder <br>and want to find out what other options it offers.">help</i>
</a>
</div>
</nav>
</div>
<p></p>
<div id="query-container" class="hide">
<div class="row">
<h6 class="col s2">Your Query:
<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal">
<i class="material-icons left" id="general-options-query-builder-tutorial-info-icon">help_outline</i></a>
</h6>
</div>
<div class="row">
<div class="col s10" id="your-query"></div>
<a class="btn-small waves-effect waves-teal col s1" id="insert-query-button">
<i class="material-icons">send</i>
</a>
</div>
<p><i> Preview:</i></p>
<p id="query-preview"></p>
<br>
</div>
<h6>Use the following options to build your query. If you need help, click on the question mark in the upper right corner!</h6>
<p></p>
<a class="btn-large waves-effect waves-light tooltipped" id="positional-attr-button" data-position="bottom" data-tooltip="Search for any token, for example a word, a lemma or a part-of-speech tag">Add new token to your query</a>
<a class="btn-large waves-effect waves-light tooltipped" id="structural-attr-button" data-position="bottom" data-tooltip="Structure your query with structural attributes, for example sentences, entities or annotate the text">Add structural attributes to your query</a>
<div id="structural-attr" class="hide">
<p></p>
<h6>Which structural attribute do you want to add to your query?<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="add-structural-attribute-tutorial-info-icon">help_outline</i></a></h6>
<p></p>
<div class="row">
<div class="col s12">
<a class="btn-small waves-effect waves-light" id="sentence">sentence</a>
<a class="btn-small waves-effect waves-light" id="entity">entity</a>
<a class="btn-small waves-effect waves-light" id="text-annotation">Meta Data</a>
</div>
</div>
<div id="entity-builder" class="hide">
<p></p>
<br>
<div class="row">
<a class="btn waves-effect waves-light col s4" id="empty-entity">Add Entity of any type</a>
<p class="col s1 l1"></p>
<div class= "input-field col s3">
<select name="englishenttype" id="english-ent-type">
<option value="" disabled selected>English ent_type</option>
<option value="CARDINAL">CARDINAL</option>
<option value="DATE">DATE</option>
<option value="EVENT">EVENT</option>
<option value="FAC">FAC</option>
<option value="GPE">GPE</option>
<option value="LANGUAGE">LANGUAGE</option>
<option value="LAW">LAW</option>
<option value="LOC">LOC</option>
<option value="MONEY">MONEY</option>
<option value="NORP">NORP</option>
<option value="ORDINAL">ORDINAL</option>
<option value="ORG">ORG</option>
<option value="PERCENT">PERCENT</option>
<option value="PERSON">PERSON</option>
<option value="PRODUCT">PRODUCT</option>
<option value="QUANTITY">QUANTITY</option>
<option value="TIME">TIME</option>
<option value="WORK_OF_ART">WORK_OF_ART</option>
</select>
<label>Entity Type</label>
</div>
<div class= "input-field col s3">
<select name="germanenttype" id="german-ent-type">
<option value="" disabled selected>German ent_type</option>
<option value="LOC">LOC</option>
<option value="MISC">MISC</option>
<option value="ORG">ORG</option>
<option value="PER">PER</option>
</select>
</div>
</div>
</div>
<div id="text-annotation-builder" class="hide">
<p></p>
<br>
<div class="row">
<div class= "input-field col s4 l3">
<select name="text-annotation-options" id="text-annotation-options">
<option class="btn-small waves-effect waves-light" value="address">address</option>
<option class="btn-small waves-effect waves-light" value="author">author</option>
<option class="btn-small waves-effect waves-light" value="booktitle">booktitle</option>
<option class="btn-small waves-effect waves-light" value="chapter">chapter</option>
<option class="btn-small waves-effect waves-light" value="editor">editor</option>
<option class="btn-small waves-effect waves-light" value="institution">institution</option>
<option class="btn-small waves-effect waves-light" value="journal">journal</option>
<option class="btn-small waves-effect waves-light" value="pages">pages</option>
<option class="btn-small waves-effect waves-light" value="publisher">publisher</option>
<option class="btn-small waves-effect waves-light" value="publishing_year">publishing year</option>
<option class="btn-small waves-effect waves-light" value="school">school</option>
<option class="btn-small waves-effect waves-light" value="title">title</option>
</select>
<label>Meta data</label>
</div>
<div class= "input-field col s7 l5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your text annotation" type="text" id="text-annotation-input">
</div>
<div class="col s1 l1 center-align">
<p class="btn-floating waves-effect waves-light" id="text-annotation-submit">
<i class="material-icons right">send</i>
</p>
</div>
<div class="hide" id="no-value-metadata-message"><i>No value entered!</i></div>
</div>
</div>
</div>
<div id="positional-attr" class="hide">
<p></p>
<div class="row" id="token-kind-selector">
<div class="col s5">
<h6>Which kind of token are you looking for? <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="token-tutorial-info-icon">help_outline</i></a></h6>
</div>
<div class="input-field col s3">
<select id="token-attr">
<option value="word" selected>word</option>
<option value="lemma">lemma</option>
<option value="english-pos">english pos</option>
<option value="german-pos">german pos</option>
<option value="simple-pos-button">simple_pos</option>
<option value="empty-token">empty token</option>
</select>
</div>
</div>
<p></p>
<div id="token-builder-content">
<div class="row" >
<div id="token-query"></div>
<div id="word-builder">
<div class= "input-field col s3 l4">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your word" type="text" id="word-input">
</div>
</div>
<div id="lemma-builder" class="hide" >
<div class= "input-field col s3 l4">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your lemma" type="text" id="lemma-input">
</div>
</div>
<div id="english-pos-builder" class="hide">
<div class="col s6 m4 l4">
<div class="row">
<div class= "input-field col s12">
<select name="englishpos" id="english-pos">
<option value="default" disabled selected>English pos tagset</option>
<option value="ADD">email</option>
<option value="AFX">affix</option>
<option value="CC">conjunction, coordinating</option>
<option value="CD">cardinal number</option>
<option value="DT">determiner</option>
<option value="EX">existential there</option>
<option value="FW">foreign word</option>
<option value="HYPH">punctuation mark, hyphen</option>
<option value="IN">conjunction, subordinating or preposition</option>
<option value="JJ">adjective</option>
<option value="JJR">adjective, comparative</option>
<option value="JJS">adjective, superlative</option>
<option value="LS">list item marker</option>
<option value="MD">verb, modal auxillary</option>
<option value="NFP">superfluous punctuation</option>
<option value="NN">noun, singular or mass</option>
<option value="NNP">noun, proper singular</option>
<option value="NNPS">noun, proper plural</option>
<option value="NNS">noun, plural</option>
<option value="PDT">predeterminer</option>
<option value="POS">possessive ending</option>
<option value="PRP">pronoun, personal</option>
<option value="PRP$">pronoun, possessive</option>
<option value="RBR">adverb, comparative</option>
<option value="RBS">adverb, superlative</option>
<option value="RP">adverb, particle</option>
<option value="SYM">symbol</option>
<option value="TO">infinitival to</option>
<option value="UH">interjection</option>
<option value="VB">verb, base form</option>
<option value="VBD">verb, past tense</option>
<option value="VBG">verb, gerund or present participle</option>
<option value="VBN">verb, past participle</option>
<option value="VBP">verb, non-3rd person singular present</option>
<option value="VBZ">verb, 3rd person singular present</option>
<option value="WDT">wh-determiner</option>
<option value="WP">wh-pronoun, personal</option>
<option value="WP$">wh-pronoun, possessive</option>
<option value="WRB">wh-adverb</option>
<option value="XX">unknown</option>
<option value="``">opening quotation mark</option>
<option value="$">symbol, currency</option>
<option value='""'>closing quotation mark</option>
<option value="-LRB-">left round bracket</option>
<option value="-RRB-">right round bracket</option>
<option value=".">punctuation mark, sentence closer</option>
<option value=":">punctuation mark, colon or ellipsis</option>
</select>
<label>Part-of-speech tags</label>
</div>
</div>
</div>
</div>
<div id="german-pos-builder" class="hide">
<div class="col s6 m4 l4">
<div class="row">
<div class= "input-field col s12">
<select name="germanpos" id="german-pos">
<option value="default" disabled selected>German pos tagset</option>
<option value="ADJA">adjective, attributive</option>
<option value="ADJD">adjective, adverbial or predicative</option>
<option value="ADV">adverb</option>
<option value="APPO">postposition</option>
<option value="APPR">preposition; circumposition left</option>
<option value="APPRART">preposition with article</option>
<option value="APZR">circumposition right</option>
<option value="ART">definite or indefinite article</option>
<option value="CARD">cardinal number</option>
<option value="FM">foreign word</option>
<option value="ITJ">interjection</option>
<option value="KOKOM">comparative conjunction</option>
<option value="KON">coordinating conjunction</option>
<option value="KOUI">subordinating conjunction with "zu" and infinitive</option>
<option value="KOUS">subordinating conjunction with sentence</option>
<option value="NE">proper noun</option>
<option value="NN">noun, singular or mass</option>
<option value="NNE">proper noun</option>
<option value="PDAT">attributive demonstrative pronoun</option>
<option value="PDS">substituting demonstrative pronoun</option>
<option value="PIAT">attributive indefinite pronoun without determiner</option>
<option value="PIS">substituting indefinite pronoun</option>
<option value="PPER">non-reflexive personal pronoun</option>
<option value="PPOSAT">attributive possessive pronoun</option>
<option value="PPOSS">substituting possessive pronoun</option>
<option value="PRELAT">attributive relative pronoun</option>
<option value="PRELS">substituting relative pronoun</option>
<option value="PRF">reflexive personal pronoun</option>
<option value="PROAV">pronominal adverb</option>
<option value="PTKA">particle with adjective or adverb</option>
<option value="PTKANT">answer particle</option>
<option value="PTKNEG">negative particle</option>
<option value="PTKVZ">separable verbal particle</option>
<option value="PTKZU">"zu" before infinitive</option>
<option value="PWAT">attributive interrogative pronoun</option>
<option value="PWAV">adverbial interrogative or relative pronoun</option>
<option value="PWS">substituting interrogative pronoun</option>
<option value="TRUNC">word remnant</option>
<option value="VAFIN">finite verb, auxiliary</option>
<option value="VAIMP">imperative, auxiliary</option>
<option value="VAINF">infinitive, auxiliary</option>
<option value="VAPP">perfect participle, auxiliary</option>
<option value="VMFIN">finite verb, modal</option>
<option value="VMINF">infinitive, modal</option>
<option value="VMPP">perfect participle, modal</option>
<option value="VVFIN">finite verb, full</option>
<option value="VVIMP">imperative, full</option>
<option value="VVINF">infinitive, full</option>
<option value="VVIZU">infinitive with "zu", full</option>
<option value="VVPP">perfect participle, full</option>
<option value="XY">non-word containing non-letter</option>
<option value="$(">other sentence-internal punctuation mark</option>
<option value="$,">comma</option>
<option value="$.">sentence-final punctuation mark</option>
</select>
<label>Part-of-speech tags</label>
</div>
</div>
</div>
</div>
<div id="simplepos-builder" class="hide">
<div class="col s6 m4 l4">
<div class="row">
<div class= "input-field col s12">
<select name="simplepos" id="simple-pos">
<option value="default" disabled selected>simple_pos tagset</option>
<option value="ADJ">adjective</option>
<option value="ADP">adposition</option>
<option value="ADV">adverb</option>
<option value="AUX">auxiliary verb</option>
<option value="CONJ">coordinating conjunction</option>
<option value="DET">determiner</option>
<option value="INTJ">interjection</option>
<option value="NOUN">noun</option>
<option value="NUM">numeral</option>
<option value="PART">particle</option>
<option value="PRON">pronoun</option>
<option value="PROPN">proper noun</option>
<option value="PUNCT">punctuation</option>
<option value="SCONJ">subordinating conjunction</option>
<option value="SYM">symbol</option>
<option value="VERB">verb</option>
<option value="X">other</option>
</select>
<label>Simple part-of-speech tags</label>
</div>
</div>
</div>
</div>
<div class="col s1 l1 center-align">
<p class="btn-floating waves-effect waves-light" id="token-submit">
<i class="material-icons right">send</i>
</p>
</div>
<div class="hide" id="no-value-message"><i>No value entered!</i></div>
</div>
<div id="token-edit-options">
<div class="row">
<h6>Options to edit your token: <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="edit-options-tutorial-info-icon">help_outline</i></a></h6>
</div>
<p></p>
<div class="row">
<div id="input-options" class="col s5 m5 l5 xl4">
<a id="wildcard-char" class="btn-small waves-effect waves-light tooltipped" data-position="top" data-tooltip="Look for a variable character (also called wildcard character)">Wildcard character</a>
<a id="option-group" class="btn-small waves-effect waves-light tooltipped" data-position="top" data-tooltip="Find character sequences from a list of options">Option Group</a>
</div>
<div class="col s3 m3 l3 xl3" id="incidence-modifiers-button">
<a class="dropdown-trigger btn-small waves-effect waves-light" href="#" data-target="incidence-modifiers" data-position="top" data-tooltip="Incidence Modifiers are special characters or patterns, <br>which determine how often a character represented previously should occur.">incidence modifiers</a>
</div>
<ul id="incidence-modifiers" class="dropdown-content">
<li><a id="one-or-more" data-token="+" class="tooltipped" data-position ="top" data-tooltip="...occurrences of the character/token before">one or more (+)</a></li>
<li><a id="zero-or-more" data-token="*" class="tooltipped" data-position ="top" data-tooltip="...occurrences of the character/token before">zero or more (*)</a></li>
<li><a id="zero-or-one" data-token="?" class="tooltipped" data-position ="top" data-tooltip="...occurrences of the character/token before">zero or one (?)</a></li>
<li><a id="exactly-n" class="modal-trigger tooltipped" href="#exactlyN" data-token="{n}" class="" data-position ="top" data-tooltip="...occurrences of the character/token before">exactly n ({n})</a></li>
<li><a id="between-n-m" class="modal-trigger tooltipped" href="#betweenNM" data-token="{n,m}" class="" data-position ="top" data-tooltip="...occurrences of the character/token before">between n and m ({n,m})</a></li>
</ul>
<div id="ignore-case-checkbox" class="col s2 m2 l2 xl2">
<p id="ignore-case">
<label>
<input type="checkbox" class="filled-in" />
<span>Ignore Case</span>
</label>
</p>
</div>
<div class="col s2 m2 l2 xl2" id="condition-container">
<a class="btn-small tooltipped waves-effect waves-light" id="or" data-position="bottom" data-tooltip="You can add another condition to your token. <br>At least one must be fulfilled">or</a>
<a class="btn-small tooltipped waves-effect waves-light" id="and" data-position="bottom" data-tooltip="You can add another condition to your token. <br>Both must be fulfilled">and</a>
</div>
</div>
</div>
</div>
<div id="exactlyN" class="modal">
<div class="row modal-content">
<div class="input-field col s10">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="type in a number for 'n'" type="text" id="n-input">
</div>
<div class="col s2">
<p class="btn-floating waves-effect waves-light" id="n-submit">
<i class="material-icons right">send</i>
</p>
</div>
</div>
</div>
<div id="betweenNM" class="modal">
<div class="row modal-content">
<div class= "input-field col s5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="number for 'n'" type="text" id="n-m-input">
</div>
<div class= "input-field col s5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="number for 'm'" type="text" id="m-input">
</div>
<div class="col s2">
<p class="btn-floating waves-effect waves-light" id="n-m-submit">
<i class="material-icons right">send</i>
</p>
</div>
</div>
</div>
</div>
</div>
</div>
{% endset %} {% endset %}
{% set scripts %} {% set scripts %}

View File

@ -0,0 +1,161 @@
{% set name = 'Static Visualization (beta)' %}
{% set description = '' %}
{% set id_prefix = name.lower().replace(' ', '-') + '-extension' %}
{% set tab_content = '' %}
{% set container_content %}
<div class="row">
<div class="col s12">
<h4><i class="material-icons left">query_stats</i>{{ name }}</h4>
</div>
</div>
<div class="row">
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center;">
<p><b>Tokens</b></p>
<span class="card-title corpus-num-tokens"></span>
</div>
</div>
</div>
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center">
<p><b>Sentences</b></p>
<span class="card-title corpus-num-s"></span>
</div>
</div>
</div>
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center">
<p><b>Unique words</b></p>
<span class="card-title corpus-num-unique-words"></span>
</div>
</div>
</div>
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center">
<p><b>Unique lemmas</b></p>
<span class="card-title corpus-num-unique-lemmas"></span>
</div>
</div>
</div>
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center">
<p><b>Unique pos</b></p>
<span class="card-title corpus-num-unique-pos"></span>
</div>
</div>
</div>
<div class="col s2">
<div class="card hoverable" style="border-radius: 10px !important; background-color:#6b3f89; color:white">
<div class="card-content" style="padding:10px !important; text-align:center">
<p><b>Unique simple_pos</b></p>
<span class="card-title corpus-num-unique-simple-pos"></span>
</div>
</div>
</div>
</div>
<div class="row">
<div class="col s4">
<div class="card hoverable">
<div class="card-content">
<span class="card-title" id="text-proportions-title-element">Proportions</span>
<p>of texts within the corpus</p>
<div id="text-proportions-graphic"></div>
<a class="btn disabled text-proportions-graph-mode-button" data-graph-type="pie"><i class="material-icons">incomplete_circle</i></a>
<a class="btn text-proportions-graph-mode-button" data-graph-type="bar"><i class="material-icons">sort</i></a>
</div>
</div>
</div>
<div class="col s8">
<div class="card hoverable">
<div class="card-content">
<span class="card-title">Text Information Overview</span>
<div class="chip text-count-chip" style="background-color:#6b3f89; color:white""></div>
<div class="corpus-text-info-list no-autoinit"></div>
</div>
</div>
</div>
</div>
<div class="row">
<div class="col s12">
<div class="card hoverable">
<div class="card-content">
<span class="card-title">Frequencies</span>
<div class="row">
<div class="col s4">
<div class="corpus-token-list no-autoinit" style="transform: scale(0.91);"></div>
<a class="dropdown-trigger btn" data-target="frequencies-token-category-dropdown">Word<i class="material-icons right">arrow_drop_down</i></a>
<a class="btn-flat modal-trigger no-autoinit" id="frequencies-stopwords-setting-modal-button" href="#frequencies-stopwords-setting-modal">
<i class="material-icons grey-text text-darken-2">settings</i>
</a>
<ul id="frequencies-token-category-dropdown" class="dropdown-content">
<li><a data-token-category="word">Word</a></li>
<li><a data-token-category="lemma">Lemma</a></li>
<li><a data-token-category="pos">Pos</a></li>
<li><a data-token-category="simple_pos">Simple_pos</a></li>
</ul>
</div>
<div class="col s8">
<div id="frequencies-graphic"></div>
<div>
<a class="btn disabled frequencies-graph-mode-button" data-graph-type="bar"><i class="material-icons">stacked_bar_chart</i></a>
<a class="btn frequencies-graph-mode-button" data-graph-type="scatter"><i class="material-icons">show_chart</i></a>
{# <a class="btn frequencies-graph-mode-button" data-graph-type="markers"><i class="material-icons">bubble_chart</i></a> #}
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="row">
</div>
{% endset %}
{% set modals %}
<div class="modal modal-fixed-footer" id="frequencies-stopwords-setting-modal">
<div class="modal-content">
<h4>Settings</h4>
<div class="row">
<p>Here you can change the stopword-lists. Stopwords are common words in a language,
like "the" or "and," that carry little meaning and are often removed in text analysis
to improve efficiency and accuracy.</p>
<div id="user-stopword-list-container"></div>
<div class="chips col s8 no-autoinit input-field" id="stopword-input-field"></div>
</div>
<div class="row">
<p>Below you can find a list of all stopwords that are always filtered out.
The lists are sorted by language, you can remove single words or remove
whole languages via the settings on the right.</p>
<div class="input-field col s3">
<select id="stopword-language-selection"></select>
<label>Stopword language select</label>
</div>
</div>
<div class="row">
<div class="chip btn white-text red" id="delete-language-stopword-list-entries-button">Delete all below<i class="material-icons right">delete</i></div>
<div class="chip btn white-text blue" id="reset-language-stopword-list-entries-button">Reset stopword list<i class="material-icons right">refresh</i></div>
</div>
<div id="stopword-language-chip-list"></div>
</div>
<div class="modal-footer">
<a class="modal-close waves-effect waves-green btn frequencies-stopword-setting-modal-action-buttons" data-action="cancel">Cancel</a>
<a class="modal-close waves-effect waves-green btn frequencies-stopword-setting-modal-action-buttons" data-action="submit">Submit</a>
</div>
{% endset %}
{% set scripts %}
<script>
const corpusAnalysisStaticVisualization = new CorpusAnalysisStaticVisualization(corpusAnalysisApp);
</script>
{% endset %}

View File

@ -2,26 +2,26 @@
{% import "materialize/wtf.html.j2" as wtf %} {% import "materialize/wtf.html.j2" as wtf %}
{% import 'corpora/_analysis/concordance.html.j2' as concordance_extension %} {% import 'corpora/_analysis/concordance.html.j2' as concordance_extension %}
{% import 'corpora/_analysis/reader.html.j2' as reader_extension %} {% import 'corpora/_analysis/reader.html.j2' as reader_extension %}
{% import 'corpora/_analysis/static_visualization.html.j2' as static_visualization_extension %}
{% set extensions = [concordance_extension, reader_extension] %} {% set extensions = [concordance_extension, reader_extension, static_visualization_extension] %}
{% block main_attribs %} class="service-scheme" data-service="corpus-analysis" id="corpus-analysis-app-container"{% endblock main_attribs %} {% block main_attribs %} class="service-scheme" data-service="corpus-analysis" id="corpus-analysis-app-container"{% endblock main_attribs %}
{% block page_content %} {% block page_content %}
<ul class="row tabs no-autoinit" id="corpus-analysis-app-extension-tabs"> <ul class="row tabs no-autoinit" id="corpus-analysis-app-extension-tabs">
<li class="tab col s3"><a class="active" href="#corpus-analysis-app-overview"><i class="nopaque-icons service-icons left" data-service="corpus-analysis"></i>Corpus analysis</a></li> <li class="tab col s3"><a class="active" href="#corpus-analysis-app-home-container"><i class="nopaque-icons service-icons left" data-service="corpus-analysis"></i>Corpus analysis</a></li>
{% for extension in extensions %} {% for extension in extensions if extension.name != 'Static Visualization (beta)' %}
<li class="tab col s3"><a href="#{{ extension.id_prefix }}-container">{{ extension.tab_content }}</a></li> <li class="tab col s3"><a href="#{{ extension.id_prefix }}-container">{{ extension.tab_content }}</a></li>
{% endfor %} {% endfor %}
</ul> </ul>
<div class="row" id="corpus-analysis-app-overview"> <div id="corpus-analysis-app-home-container">
<div class="col s12">
<h1>{{ title }}</h1> <h1>{{ title }}</h1>
</div>
{% for extension in extensions %} <div class="row" id="corpus-analysis-app-extension-cards">
{% for extension in extensions if extension.name != 'Static Visualization (beta)' %}
<div class="col s3"> <div class="col s3">
<div class="card extension-selector hoverable" data-target="{{ extension.id_prefix }}-container"> <div class="card extension-selector hoverable" data-target="{{ extension.id_prefix }}-container">
<div class="card-content"> <div class="card-content">
@ -31,9 +31,13 @@
</div> </div>
</div> </div>
{% endfor %} {% endfor %}
</div>
{{ static_visualization_extension.container_content }}
</div> </div>
{% for extension in extensions %}
{% for extension in extensions if extension.name != 'Static Visualization (beta)' %}
<div id="{{ extension.id_prefix }}-container"> <div id="{{ extension.id_prefix }}-container">
{{ extension.container_content }} {{ extension.container_content }}
</div> </div>
@ -44,13 +48,15 @@
{{ super() }} {{ super() }}
<div class="modal no-autoinit" id="corpus-analysis-app-init-modal"> <div class="modal no-autoinit" id="corpus-analysis-app-init-modal">
<div class="modal-content"> <div class="modal-content">
<h4>Initializing session</h4> <h4>We are preparing your analysis session</h4>
<p> <p>
Our server works as hard as it can to prepare your analysis session. Please be patient and give it some time.<br>
If initialization takes longer than usual or an error occurs, <a onclick="window.location.reload()" href="#">reload the page</a>. If initialization takes longer than usual or an error occurs, <a onclick="window.location.reload()" href="#">reload the page</a>.
</p> </p>
<div class="progress"> <div class="progress">
<div class="indeterminate"></div> <div class="indeterminate"></div>
</div> </div>
<p class="status-text"></p>
<p class="errors error-color-text hide"></p> <p class="errors error-color-text hide"></p>
</div> </div>
</div> </div>
@ -59,328 +65,6 @@
{{ extension.modals }} {{ extension.modals }}
{% endfor %} {% endfor %}
<div class="modal" id="concordance-query-builder">
<div class="modal-content">
<div>
<nav>
<div class="nav-wrapper" id="query-builder-nav">
<a href="#!" class="brand-logo"><i class="material-icons">build</i>Query Builder (beta)</a>
<i class="material-icons close right" id="close-query-builder">close</i>
<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal">
<i class="material-icons right tooltipped" id="query-builder-tutorial-info-icon" data-position="bottom" data-tooltip="Click here if you are unsure how to use the Query Builder <br>and want to find out what other options it offers.">help</i>
</a>
</div>
</nav>
</div>
<p></p>
<div id="query-container" class="hide">
<div class="row">
<h6 class="col s2">Your Query:
<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal">
<i class="material-icons left" id="general-options-query-builder-tutorial-info-icon">help_outline</i></a>
</h6>
</div>
<div class="row">
<div class="col s10" id="your-query"></div>
<a class="btn-small waves-effect waves-teal col s1" id="insert-query-button">
<i class="material-icons">send</i>
</a>
</div>
<p><i> Preview:</i></p>
<p id="query-preview"></p>
<br>
</div>
<h6>Use the following options to build your query. If you need help, click on the question mark in the upper right corner!</h6>
<p></p>
<a class="btn-large waves-effect waves-light tooltipped" id="positional-attr-button" data-position="bottom" data-tooltip="Search for any token, for example a word, a lemma or a part-of-speech tag">Add new token to your query</a>
<a class="btn-large waves-effect waves-light tooltipped" id="structural-attr-button" data-position="bottom" data-tooltip="Structure your query with structural attributes, for example sentences, entities or annotate the text">Add structural attributes to your query</a>
<div id="structural-attr" class="hide">
<p></p>
<h6>Which structural attribute do you want to add to your query?<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="add-structural-attribute-tutorial-info-icon">help_outline</i></a></h6>
<p></p>
<div class="row">
<div class="col s12">
<a class="btn-small waves-effect waves-light" id="sentence">sentence</a>
<a class="btn-small waves-effect waves-light" id="entity">entity</a>
<a class="btn-small waves-effect waves-light" id="text-annotation">Meta Data</a>
</div>
</div>
<div id="entity-builder" class="hide">
<p></p>
<br>
<div class="row">
<a class="btn waves-effect waves-light col s4" id="empty-entity">Add Entity of any type</a>
<p class="col s1 l1"></p>
<div class= "input-field col s3">
<select name="englishenttype" id="english-ent-type">
<option value="" disabled selected>English ent_type</option>
<option value="CARDINAL">CARDINAL</option>
<option value="DATE">DATE</option>
<option value="EVENT">EVENT</option>
<option value="FAC">FAC</option>
<option value="GPE">GPE</option>
<option value="LANGUAGE">LANGUAGE</option>
<option value="LAW">LAW</option>
<option value="LOC">LOC</option>
<option value="MONEY">MONEY</option>
<option value="NORP">NORP</option>
<option value="ORDINAL">ORDINAL</option>
<option value="ORG">ORG</option>
<option value="PERCENT">PERCENT</option>
<option value="PERSON">PERSON</option>
<option value="PRODUCT">PRODUCT</option>
<option value="QUANTITY">QUANTITY</option>
<option value="TIME">TIME</option>
<option value="WORK_OF_ART">WORK_OF_ART</option>
</select>
<label>Entity Type</label>
</div>
<div class= "input-field col s3">
<select name="germanenttype" id="german-ent-type">
<option value="" disabled selected>German ent_type</option>
<option value="LOC">LOC</option>
<option value="MISC">MISC</option>
<option value="ORG">ORG</option>
<option value="PER">PER</option>
</select>
</div>
</div>
</div>
<div id="text-annotation-builder" class="hide">
<p></p>
<br>
<div class="row">
<div class= "input-field col s4 l3">
<select name="text-annotation-options" id="text-annotation-options">
<option class="btn-small waves-effect waves-light" value="address">address</option>
<option class="btn-small waves-effect waves-light" value="author">author</option>
<option class="btn-small waves-effect waves-light" value="booktitle">booktitle</option>
<option class="btn-small waves-effect waves-light" value="chapter">chapter</option>
<option class="btn-small waves-effect waves-light" value="editor">editor</option>
<option class="btn-small waves-effect waves-light" value="institution">institution</option>
<option class="btn-small waves-effect waves-light" value="journal">journal</option>
<option class="btn-small waves-effect waves-light" value="pages">pages</option>
<option class="btn-small waves-effect waves-light" value="publisher">publisher</option>
<option class="btn-small waves-effect waves-light" value="publishing_year">publishing year</option>
<option class="btn-small waves-effect waves-light" value="school">school</option>
<option class="btn-small waves-effect waves-light" value="title">title</option>
</select>
<label>Meta data</label>
</div>
<div class= "input-field col s7 l5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your text annotation" type="text" id="text-annotation-input">
</div>
<div class="col s1 l1 center-align">
<p class="btn-floating waves-effect waves-light" id="text-annotation-submit">
<i class="material-icons right">send</i>
</p>
</div>
<div class="hide" id="no-value-metadata-message"><i>No value entered!</i></div>
</div>
</div>
</div>
<div id="positional-attr" class="hide">
<p></p>
<div class="row" id="token-kind-selector">
<div class="col s5">
<h6>Which kind of token are you looking for? <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="token-tutorial-info-icon">help_outline</i></a></h6>
</div>
<div class="input-field col s3">
<select id="token-attr">
<option value="word" selected>word</option>
<option value="lemma">lemma</option>
<option value="english-pos">english pos</option>
<option value="german-pos">german pos</option>
<option value="simple-pos-button">simple_pos</option>
<option value="empty-token">empty token</option>
</select>
</div>
</div>
<p></p>
<div id="token-builder-content">
<div class="row" >
<div id="token-query"></div>
<div id="word-builder">
<div class= "input-field col s3 l4">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your word" type="text" id="word-input">
</div>
</div>
<div id="lemma-builder" class="hide" >
<div class= "input-field col s3 l4">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your lemma" type="text" id="lemma-input">
</div>
</div>
<div id="english-pos-builder" class="hide">
<div class="col s6 m4 l4">
<div class="row">
<div class= "input-field col s12">
<select name="englishpos" id="english-pos">
<option value="default" disabled selected>English pos tagset</option>
<option value="ADD">email</option>
<option value="AFX">affix</option>
<option value="CC">conjunction, coordinating</option>
<option value="CD">cardinal number</option>
<option value="DT">determiner</option>
<option value="EX">existential there</option>
<option value="FW">foreign word</option>
<option value="HYPH">punctuation mark, hyphen</option>
<option value="IN">conjunction, subordinating or preposition</option>
<option value="JJ">adjective</option>
<option value="JJR">adjective, comparative</option>
<option value="JJS">adjective, superlative</option>
</select>
<label>Part-of-speech tags</label>
</div>
</div>
</div>
</div>
<div id="german-pos-builder" class="hide">
<div class="col s6 m4 l4">
<div class="row">
<div class= "input-field col s12">
<select name="germanpos" id="german-pos">
<option value="default" disabled selected>German pos tagset</option>
<option value="ADJA">adjective, attributive</option>
<option value="ADJD">adjective, adverbial or predicative</option>
<option value="ADV">adverb</option>
<option value="APPO">postposition</option>
<option value="APPR">preposition; circumposition left</option>
<option value="APPRART">preposition with article</option>
<option value="APZR">circumposition right</option>
<option value="ART">definite or indefinite article</option>
</select>
<label>Part-of-speech tags</label>
</div>
</div>
</div>
</div>
<div id="simplepos-builder" class="hide">
<div class="col s6 m4 l4">
<div class="row">
<div class= "input-field col s12">
<select name="simplepos" id="simple-pos">
<option value="default" disabled selected>simple_pos tagset</option>
<option value="ADJ">adjective</option>
<option value="ADP">adposition</option>
<option value="ADV">adverb</option>
<option value="AUX">auxiliary verb</option>
<option value="CONJ">coordinating conjunction</option>
<option value="DET">determiner</option>
<option value="INTJ">interjection</option>
<option value="NOUN">noun</option>
<option value="NUM">numeral</option>
<option value="PART">particle</option>
<option value="PRON">pronoun</option>
<option value="PROPN">proper noun</option>
<option value="PUNCT">punctuation</option>
<option value="SCONJ">subordinating conjunction</option>
<option value="SYM">symbol</option>
<option value="VERB">verb</option>
<option value="X">other</option>
</select>
<label>Simple part-of-speech tags</label>
</div>
</div>
</div>
</div>
<div class="col s1 l1 center-align">
<p class="btn-floating waves-effect waves-light" id="token-submit">
<i class="material-icons right">send</i>
</p>
</div>
<div class="hide" id="no-value-message"><i>No value entered!</i></div>
</div>
<div id="token-edit-options">
<div class="row">
<h6>Options to edit your token: <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="edit-options-tutorial-info-icon">help_outline</i></a></h6>
</div>
<p></p>
<div class="row">
<div id="input-options" class="col s5 m5 l5 xl4">
<a id="wildcard-char" class="btn-small waves-effect waves-light tooltipped" data-position="top" data-tooltip="Look for a variable character (also called wildcard character)">Wildcard character</a>
<a id="option-group" class="btn-small waves-effect waves-light tooltipped" data-position="top" data-tooltip="Find character sequences from a list of options">Option Group</a>
</div>
<div class="col s3 m3 l3 xl3" id="incidence-modifiers-button">
<a class="dropdown-trigger btn-small waves-effect waves-light" href="#" data-target="incidence-modifiers" data-position="top" data-tooltip="Incidence Modifiers are special characters or patterns, <br>which determine how often a character represented previously should occur.">incidence modifiers</a>
</div>
<ul id="incidence-modifiers" class="dropdown-content">
<li><a id="one-or-more" data-token="+" class="tooltipped" data-position ="top" data-tooltip="...occurrences of the character/token before">one or more (+)</a></li>
<li><a id="zero-or-more" data-token="*" class="tooltipped" data-position ="top" data-tooltip="...occurrences of the character/token before">zero or more (*)</a></li>
<li><a id="zero-or-one" data-token="?" class="tooltipped" data-position ="top" data-tooltip="...occurrences of the character/token before">zero or one (?)</a></li>
<li><a id="exactly-n" class="modal-trigger tooltipped" href="#exactlyN" data-token="{n}" class="" data-position ="top" data-tooltip="...occurrences of the character/token before">exactly n ({n})</a></li>
<li><a id="between-n-m" class="modal-trigger tooltipped" href="#betweenNM" data-token="{n,m}" class="" data-position ="top" data-tooltip="...occurrences of the character/token before">between n and m ({n,m})</a></li>
</ul>
<div id="ignore-case-checkbox" class="col s2 m2 l2 xl2">
<p id="ignore-case">
<label>
<input type="checkbox" class="filled-in" />
<span>Ignore Case</span>
</label>
</p>
</div>
<div class="col s2 m2 l2 xl2" id="condition-container">
<a class="btn-small tooltipped waves-effect waves-light" id="or" data-position="bottom" data-tooltip="You can add another condition to your token. <br>At least one must be fulfilled">or</a>
<a class="btn-small tooltipped waves-effect waves-light" id="and" data-position="bottom" data-tooltip="You can add another condition to your token. <br>Both must be fulfilled">and</a>
</div>
</div>
</div>
</div>
<div id="exactlyN" class="modal">
<div class="row modal-content">
<div class="input-field col s10">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="type in a number for 'n'" type="text" id="n-input">
</div>
<div class="col s2">
<p class="btn-floating waves-effect waves-light" id="n-submit">
<i class="material-icons right">send</i>
</p>
</div>
</div>
</div>
<div id="betweenNM" class="modal">
<div class="row modal-content">
<div class= "input-field col s5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="number for 'n'" type="text" id="n-m-input">
</div>
<div class= "input-field col s5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="number for 'm'" type="text" id="m-input">
</div>
<div class="col s2">
<p class="btn-floating waves-effect waves-light" id="n-m-submit">
<i class="material-icons right">send</i>
</p>
</div>
</div>
</div>
</div>
</div>
</div>
{% endblock modals %} {% endblock modals %}
{% block scripts %} {% block scripts %}

View File

@ -152,11 +152,16 @@
<script> <script>
let jobDisplay = new JobDisplay(document.querySelector('#job-display')); let jobDisplay = new JobDisplay(document.querySelector('#job-display'));
let deleteJobRequestElement = document.querySelector('#delete-job-request'); let deleteJobRequestElement = document.querySelector('#delete-job-request');
let jobLogButtonElement = document.querySelector('#job-log-button');
let restartJobRequestElement = document.querySelector('#restart-job-request'); let restartJobRequestElement = document.querySelector('#restart-job-request');
deleteJobRequestElement.addEventListener('click', (event) => { deleteJobRequestElement.addEventListener('click', (event) => {
Requests.jobs.entity.delete({{ job.hashid|tojson }}); Requests.jobs.entity.delete({{ job.hashid|tojson }});
}); });
restartJobRequestElement.addEventListener('click', (event) => {
Requests.jobs.entity.restart({{ job.hashid|tojson }});
});
if ({{ current_user.is_administrator()|tojson }}) {
let jobLogButtonElement = document.querySelector('#job-log-button');
jobLogButtonElement.addEventListener('click', (event) => { jobLogButtonElement.addEventListener('click', (event) => {
Requests.jobs.entity.log({{ job.hashid|tojson }}) Requests.jobs.entity.log({{ job.hashid|tojson }})
.then( .then(
@ -168,8 +173,6 @@
}); });
}); });
}); });
restartJobRequestElement.addEventListener('click', (event) => { }
Requests.jobs.entity.restart({{ job.hashid|tojson }});
});
</script> </script>
{% endblock scripts %} {% endblock scripts %}

View File

@ -0,0 +1,271 @@
<h2>Workshop Aufgaben</h2>
<h3>Aufgabenblock 1</h3>
<p>
1. Ich möchte alle Ergebnisse für den Begriff "jüdisch" finden. Groß- und
Kleinschreibung soll dabei nicht berücksichtigt werden. Bei der richtigen
Abfrage gibt es 5 Ergebnisse.
</p>
<div class="row">
<div class="col s2">
<span class="btn waves-effect waves-light solution-button">Lösung</span>
</div>
<div class="col s10 solution-field hide">
<pre style="margin-top: 7px;"><code>[word="jüdisch" %c];</code></pre>
</div>
</div>
<p>
2. Ich möchte jetzt in einer einzigen Suchabfrage alle Ergebnisse für die
Begriffe "jüdisch", aber auch "Juden" und "jüdischer" usw. finden. Bei der
richtigen Abfrage müsste es dafür 118 Ergebnisse geben.
</p>
<div class="row">
<div class="col s2">
<span class="btn waves-effect waves-light solution-button">Lösung</span>
</div>
<div class="col s10 solution-field hide">
<pre style="margin-top: 7px;"><code>[word="j(u|ü)d.*" %c];</code></pre>
</div>
</div>
<p>
3. Ich möchte in einer einzigen Suchabfrage alle Ergebnisse für den Begriff
"judisch" (in sämtlichen Ableitungen s.o.) im Zusammenhang mit dem Begriff
(ebenfalls sämtliche Ableitungen) "Freund" herausfiltern. Dazwischen sollen
0 bis 10 Wörter auftauchen. Es gibt 1 Ergebnis bei der richtigen Abfrage.
</p>
<div class="row">
<div class="col s2">
<span class="btn waves-effect waves-light solution-button">Lösung</span>
</div>
<div class="col s10 solution-field hide">
<pre style="margin-top: 7px;"><code>[word="j(u|ü)d.*" %c] []{0,10} [word="freund.*" %c];</code></pre>
</div>
</div>
<p>
4. Ich möchte zuletzt in einer einzigen Suchanfrage alle Ergebnisse für
entweder "jüdisch" oder "deutsch" (in sämtlichen Ableitungen) und "Leben"
herausfiltern. Dazwischen sollen wieder 0 bis 10 Wörter auftauchen.
Es gibt wieder ein Ergebnis.
</p>
<div class="row">
<div class="col s2">
<span class="btn waves-effect waves-light solution-button">Lösung</span>
</div>
<div class="col s10 solution-field hide">
<pre style="margin-top: 7px;"><code>[word="j(u|ü)d.*" %c | word="deutsch.*" %c] []{0,10} [word=".*freund.*" %c];</code></pre>
</div>
</div>
<ul class="collapsible">
<li>
<div class="collapsible-header">Tipp 1</div>
<div class="collapsible-body">
<p>
Wörter können über den Query Builder > "Add new Token to your Query" hinzugefügt werden. Unten kann der Haken bei "Ignore Case" gesetzt werden um Groß- und Kleinschreibung zu ignorieren.
</p>
</div>
</li>
<li>
<div class="collapsible-header">Tipp 2</div>
<div class="collapsible-body">
<p>
Über die Option Group lassen sich auch Buchstaben in der Suche definieren. So könnte als erste Option "u" und als zweite Option "ü" definiert werden. Die Suche würde dann nach beiden Varianten suchen.
Um beliebig viele Buchstaben zu finden, kann ein Wildcard-Charakter (".") verwendet werden, gefolgt von dem Incidence Modifier "zero or more" ("*"). Damit sind beliebig viele Buchstaben jeglicher Art möglich.
<pre><code>[word="l(o|u)r.*"];</code></pre> würde z.B. nach "lora", "lura", "lurum" usw. suchen.
</p>
</div>
</li>
<li>
<div class="collapsible-header">Tipp 3</div>
<div class="collapsible-body">
<p>
Um eine bestimmte Anzahl Wörter anzeigen zu lassen, die einen beliebigen
Inhalt haben dürfen, kann mit einem Empty Token (also einem nicht definiertem Token)
gerarbeitet werden. Den kann man über das Dropdown "Which kind of token are you looking for?"
hinzugefügt werden. Dem leeren Token kann dann ein Incidence Modifier zugewiesen werden, der
die Anzahl der Wörter auf 0 bis 10 begrenzt (between n and m).
<pre><code>[]{0,10};</code></pre>
</p>
</div>
</li>
<li>
<div class="collapsible-header">Tipp 4</div>
<div class="collapsible-body">
<p>
Wenn entweder das eine oder das andere Wort auftauchen soll, kann der "OR"-Operator in der unteren Options-Leiste verwendet werden.
Dann kann ein zweiter Wert eingetragen werden. Einer von beiden muss dann auftauchen.
<pre><code>[word="lorem" %c | word="ipsum" %c];</code></pre>
</p>
</div>
</li>
</ul>
<h3>Aufgabenblock 2</h3>
<p>
1. Ich möchte in einer Suchanfrage alle Ergebnisse für Wortfolgen, in denen
das Wort "jüdisch" (mit sämtlichen Ableitungen) vorkommt. Vor dem Wort
soll ein Adjektiv auftauchen auf das 0-5 Wörter folgen sollen. Die
richtige Abfrage ergibt 36 Ergebnisse.
</p>
<div class="row">
<div class="col s2">
<span class="btn waves-effect waves-light solution-button">Lösung</span>
</div>
<div class="col s10 solution-field hide">
<pre style="margin-top: 7px;"><code>[simple_pos="ADJ"] []{0,5} [word="j(u|ü)d.*" %c];</code></pre>
</div>
</div>
<p>
2. Ich möchte in einer Suchanfrage alle Ergebnisse für Wortfolgen, in denen
das Wort "jüdisch" (mit sämtlichen Ableitungen) ein Adjektiv ist und direkt
von einem Nomen gefolgt wird. Die richtige Abfrage ergibt 27 Ergebnisse.
</p>
<div class="row">
<div class="col s2">
<span class="btn waves-effect waves-light solution-button">Lösung</span>
</div>
<div class="col s10 solution-field hide">
<pre style="margin-top: 7px;"><code>[word="j(u|ü)d.*" %c & simple_pos="ADJ"] [simple_pos="NOUN"];</code></pre>
</div>
</div>
<p>
3. Ich möchte eine Suchanfrage aller Ergebnisse für Wortfolgen, in denen das
Wort "jüdisch" (mit sämtlichen Ableitungen) von dem Lemma "sein" gefolgt
wird. Dazwischen dürfen 5 beliebige Wörter vorkommen. Die richtige Abfrage
ergibt 16 Ergebnisse.
</p>
<div class="row">
<div class="col s2">
<span class="btn waves-effect waves-light solution-button">Lösung</span>
</div>
<div class="col s10 solution-field hide">
<pre style="margin-top: 7px;"><code>[word="J(u|ü)d.*" %c] []{0,5} [lemma="sein" %c];</code></pre>
</div>
</div>
<ul class="collapsible">
<li>
<div class="collapsible-header">Tipp 1</div>
<div class="collapsible-body">
<p>
Über die Token-Suchauswahl kann "simple_pos" ausgewählt werden. Darüber
findet man sämtliche Werte, zum Beispiel "ADJ" für Adjektive oder "NOUN" für Nomen.
</p>
</div>
</li>
<li>
<div class="collapsible-header">Tipp 2</div>
<div class="collapsible-body">
<p>
Dem Wort kann eine zweite Token-Eingrenzung hinzugefügt werden. Dafür nutzen wir den
"AND"-Operator. Dort können wir über die Token-Suchauswahl "simple_pos" auswählen und
dann einen Wert hinzufügen. Somit muss das gesuchte Wort
ebenfalls den simple_pos-Wert haben, also zum Beispiel ein Adjektiv sein.
<pre><code>[word="lorem" & simple_pos="NOUN"];</code></pre>
</p>
</div>
</li>
<li>
<div class="collapsible-header">Tipp 3</div>
<div class="collapsible-body">
<p>
Ich kann ebenfalls nach der Grundform (Lemma) eines Wortes suchen. Dafür
muss ich in der Token-Suchauswahl "lemma" auswählen und dann den Wert - genau wie beim "word" -
eintragen.
<pre><code>[lemma="lorem" %c];</code></pre>
</p>
</div>
</li>
</ul>
<h3>Aufgabenblock 3</h3>
<p>
1. Ich möchte in einer Suchanfrage alle Ergebnisse für "jüdisch"
(in sämtlichen Ableitungen) im Zusammenhang mit dem Lemma "sprechen"
(in sämtlichen Ableitungen, also auch "besprechen", "versprechen" usw.)
erhalten. Dabei möchte ich nur Ergebnisse innerhalb eines Satzes berücksichtigen.
Vor, zwischen und nach den Begriffen dürfen beliebig viele Wörter auftauchen.
Die richtige Abfrage ergibt 2 Treffer.
</p>
<div class="row">
<div class="col s2">
<span class="btn waves-effect waves-light solution-button">Lösung</span>
</div>
<div class="col s10 solution-field hide">
<pre style="margin-top: 7px;"><code>&lt;s&gt; []* [word="j(u|ü)d.*" %c] []* [lemma=".*sprechen.*" %c] []* &lt;/s&gt;</code></pre>
</div>
</div>
<p>
2. Ich möchte in einer Suchanfrage alle Ergebnisse für Sätze, in denen eine
Person im Zusammenhang mit dem Lemma "helfen" (in sämtlichen Schreibweisen,
also auch "verhelfen" etc.) vorkommt. Vor, zwischen und nach den gesuchten
Werten dürfen beliebig viele Wörter vorkommen. Der als Person deklarierte
Wert darf ebenfalls beliebig lang sein. Die richtige Abfrage ergibt 6 Treffer.
</p>
<div class="row">
<div class="col s2">
<span class="btn waves-effect waves-light solution-button">Lösung</span>
</div>
<div class="col s10 solution-field hide">
<pre style="margin-top: 7px;"><code>&lt;s&gt;[]* &lt;ent_type="PER"&gt; []* &lt;/ent_type&gt; []* [lemma=".*helfen.*" %c] []* &lt;/s&gt;;</code></pre>
</div>
</div>
<p>
3. Ich möchte in einer Suchabfrage alle Ergebnisse für Sätzen in denen der
Begriff "jüdisch" (in sämtlichen Ableitungen) im Zusammenhang mit einer
Organisation auftaucht. Vor, zwischen und nach den gesuchten Werten dürfen
beliebig viele Wörter vorkommen. Der als Person deklarierte Wert darf
ebenfalls beliebig lang sein. Die richtige Abfrage ergibt 4 Treffer.
</p>
<div class="row">
<div class="col s2">
<span class="btn waves-effect waves-light solution-button">Lösung</span>
</div>
<div class="col s10 solution-field hide">
<pre style="margin-top: 7px;"><code>&lt;s&gt;[]* [word="j(u|ü)d.*" %c][]* &lt;ent_type="ORG"&gt; []* &lt;/ent_type&gt; []*&lt;/s&gt;;</code></pre>
</div>
</div>
<ul class="collapsible">
<li>
<div class="collapsible-header">Tipp 1</div>
<div class="collapsible-body">
<p>
Ich kann über den Button "Add structural attributes to your query" umschließende
Satz-Tags setzen. Dafür muss ich auf den Button "Sentence" klicken und nachdem ich meine Anfrage
erstellt habe wieder auf "End Sentence" an der gleichen Stelle.
</p>
<p>
Wenn ich mit Sätzen arbeite, darf ich nicht die Platzhalter-Token ("<code>[]*</code>") vergessen, da sonst
ausschließlich Sätze mit genau den gesuchten Wörtern gefunden werden.
</p>
<p>
Wir haben bisher nur mit beliebigen Endungen eines Wortes in Form von
<code>[word="lore.*"]</code> gearbeitet. Das gleiche funktioniert auch am
Anfang eines Wortes, indem ich beliebig viele Wildcard-Character an den Anfang
der Wortes setze: <code>[word=".*rem"]</code>.
</p>
</div>
</li>
<li>
<div class="collapsible-header">Tipp 2</div>
<div class="collapsible-body">
<p>
Über den Button "Add structural attributes to your query" kann ich auch Entitäten bestimmen.
Für unser Beispiel arbeiten wir ausschließlich mit den german ent_types. Deren Definitionen kann ich
mir in den Tagset-Listen anschauen. Hier kann ich dann die gewünschte Entität auswählen. Wenn
der gesuchte Wert beliebig sein darf, muss ich zwischen den öffnenden und schließenden ent-tag ein
Platzhalter-Token setzen, das beliebig lang sein darf ("<code>[]*</code>").
<pre><code>&lt;ent_type="LOC"&gt; []* &lt;/ent_type&gt;</code></pre>
</p>
</div>
</li>
</ul>
<script>
let solutionButtons = document.querySelectorAll('.solution-button');
solutionButtons.forEach((button) => {
button.addEventListener('click', (event) => {
let solutionField = event.target.parentElement.parentElement.querySelector('.solution-field');
solutionField.classList.toggle('hide');
});
});
</script>

View File

@ -0,0 +1,82 @@
<h2>Vorbereitungen</h2>
<div class="row">
<div class="col s12 m5">
<img class="materialboxed responsive-img" alt="Dashboard" src="{{ url_for('static', filename='images/workshops/fgho_sommerschule_2023/dashboard.png') }}">
</div>
<div class="col s12 m7">
<p>
Navigiere zum Abschnitt "<a href="{{ url_for('main.dashboard', _anchor='corpora') }}">My Corpora</a>"
auf der <a href="{{ url_for('main.dashboard') }}">Dashboard</a> Seite.
</p>
<p>
Nutze dort den "<a href="{{ url_for('corpora.create_corpus') }}">Create corpus +</a>"
Button um einen neuen Korpus zu erstellen.
</p>
</div>
</div>
<hr>
<div class="row">
<div class="col s12 m5">
<img class="materialboxed responsive-img" alt="Dashboard" src="{{ url_for('static', filename='images/workshops/fgho_sommerschule_2023/create_corpus.png') }}">
</div>
<div class="col s12 m7">
<p>
Trage in dem Formular einen <b>Titel</b> und eine <b>Beschreibung</b> ein, die Felder
können frei befüllt werden. Die Angaben sollen dir in Zukunft helfen den
Korpus wiederzufinden, wenn deine Korpusliste sich füllt.
</p>
</div>
</div>
<hr>
<div class="row">
<div class="col s12 m5">
<img class="materialboxed responsive-img" alt="Dashboard" src="{{ url_for('static', filename='images/workshops/fgho_sommerschule_2023/empty_corpus.png') }}">
</div>
<div class="col s12 m7">
<p>
Nachdem du den Korpus erstellt hast, wirst du zur Korpusübersicht
weitergeleitet. Diese zeigt einen leeren Korpus, in dem noch keine
Korpusdateien hinterlegt sind. Um den Korpus mit Texten zu füllen,
wird der „<b>+ Add corpus file</b>“ Button benutzt.
</p>
</div>
</div>
<hr>
<div class="row">
<div class="col s12 m5">
<img class="materialboxed responsive-img" alt="Dashboard" src="{{ url_for('static', filename='images/workshops/fgho_sommerschule_2023/add_corpus_file.png') }}">
</div>
<div class="col s12 m7">
<p>
In dem folgenden Formular werden nun Metadaten zu dem Text, den wir
hinzufügen wollen, hinterlegt. Diese Daten sollten sorgfältig eingetragen
werden, da sie in der Analyse mit einbezogen werden. Mit dem „File“-Feld
muss eine Textdatei im „.vrt“ Format ausgewählt werden.
</p>
<p>
Füge deinem neu erstellten Korpus so die zwei, <b>im Workshop zur Verfügung
gestellten</b>, Texte hinzu.
</p>
</div>
</div>
<hr>
<div class="row">
<div class="col s12 m5">
<img class="materialboxed responsive-img" alt="Dashboard" src="{{ url_for('static', filename='images/workshops/fgho_sommerschule_2023/corpus.png') }}">
</div>
<div class="col s12 m7">
<p>
Nachdem die Texte dem Korpus hinzugefügt wurden, sollte deine
Korpusübersicht wie in dem Bild aussehen. Nutze nun die „<b>Build</b>“-Aktion,
um den Korpus für eine Analyse vorzubereiten.
</p>
</div>
</div>

View File

@ -0,0 +1,23 @@
{% extends "base.html.j2" %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12">
<a class="btn waves-effect waves-light" href="#fgho-sommerschule-2023-workshop-aufgaben">Hier geht es zu den Aufgaben<i class="material-icons right">send</i></a>
</div>
<div class="col s12" id="fgho-sommerschule-2023-vorbereitungen">
{% include "workshops/_fgho_sommerschule_2023/_vorbereitungen.html.j2" %}
</div>
<div class="col s12" id="fgho-sommerschule-2023-workshop-aufgaben">
{% include "workshops/_fgho_sommerschule_2023/_aufgaben.html.j2" %}
</div>
</div>
</div>
{% endblock page_content %}

View File

@ -7,7 +7,7 @@ from app.models import User
@socketio.on('GET /users/<user_id>') @socketio.on('GET /users/<user_id>')
@socketio_login_required @socketio_login_required
def get_user(user_hashid, backrefs=False, relationships=False): def get_user(user_hashid):
user_id = hashids.decode(user_hashid) user_id = hashids.decode(user_hashid)
user = User.query.get(user_id) user = User.query.get(user_id)
if user is None: if user is None:
@ -15,12 +15,9 @@ def get_user(user_hashid, backrefs=False, relationships=False):
if not (user == current_user or current_user.is_administrator()): if not (user == current_user or current_user.is_administrator()):
return {'status': 403, 'statusText': 'Forbidden'} return {'status': 403, 'statusText': 'Forbidden'}
return { return {
'body': user.to_json_serializeable( 'body': user.to_json_serializeable(backrefs=True, relationships=True),
backrefs=backrefs,
relationships=relationships
),
'status': 200, 'status': 200,
'statusText': 'OK', 'statusText': 'OK'
} }

View File

@ -7,29 +7,29 @@ from app.models import Avatar, User
from . import bp from . import bp
# @bp.route('/<hashid:user_id>', methods=['DELETE']) @bp.route('/<hashid:user_id>', methods=['DELETE'])
# @content_negotiation(produces='application/json') @content_negotiation(produces='application/json')
# def delete_user(user_id): def delete_user(user_id):
# def _delete_user(app, user_id): def _delete_user(app, user_id):
# with app.app_context(): with app.app_context():
# user = User.query.get(user_id) user = User.query.get(user_id)
# user.delete() user.delete()
# db.session.commit() db.session.commit()
# user = User.query.get_or_404(user_id) user = User.query.get_or_404(user_id)
# if not (user == current_user or current_user.is_administrator()): if not (user == current_user or current_user.is_administrator()):
# abort(403) abort(403)
# thread = Thread( thread = Thread(
# target=_delete_user, target=_delete_user,
# args=(current_app._get_current_object(), user.id) args=(current_app._get_current_object(), user.id)
# ) )
# if user == current_user: if user == current_user:
# logout_user() logout_user()
# thread.start() thread.start()
# response_data = { response_data = {
# 'message': f'User "{user.username}" marked for deletion' 'message': f'User "{user.username}" marked for deletion'
# } }
# return response_data, 202 return response_data, 202
# @bp.route('/<hashid:user_id>/avatar', methods=['DELETE']) # @bp.route('/<hashid:user_id>/avatar', methods=['DELETE'])

View File

@ -0,0 +1,5 @@
from flask import Blueprint
bp = Blueprint('workshops', __name__)
from . import routes

18
app/workshops/routes.py Normal file
View File

@ -0,0 +1,18 @@
from flask import redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from . import bp
@bp.route('')
@register_breadcrumb(bp, '.', '<i class="material-icons left">business_center</i>Workshops')
def workshops():
return redirect(url_for('main.dashboard'))
@bp.route('/fgho_sommerschule_2023')
@register_breadcrumb(bp, '.fgho_sommerschule_2023', 'FGHO Sommerschule 2023')
def fgho_sommerschule_2023():
return render_template(
'workshops/fgho_sommerschule_2023.html.j2',
title='FGHO Sommerschule 2023',
)

View File

@ -1,5 +1,5 @@
apifairy apifairy
cqi cqi>=0.1.5
dnspython==2.2.1 dnspython==2.2.1
docker docker
eventlet eventlet
@ -7,7 +7,7 @@ Flask==2.1.3
Flask-APScheduler Flask-APScheduler
Flask-Assets Flask-Assets
Flask-Breadcrumbs Flask-Breadcrumbs
Flask-Hashids==1.0.1 Flask-Hashids>=1.0.1
Flask-HTTPAuth Flask-HTTPAuth
Flask-Login Flask-Login
Flask-Mail Flask-Mail
@ -20,6 +20,7 @@ Flask-WTF
hiredis hiredis
MarkupSafe==2.0.1 MarkupSafe==2.0.1
marshmallow-sqlalchemy==0.29.0 marshmallow-sqlalchemy==0.29.0
nltk
psycopg2 psycopg2
PyJWT PyJWT
pyScss pyScss