212 Commits

Author SHA1 Message Date
cf8c164d60 allow .whl files for spacy nlp pipeline contributions 2024-02-14 14:24:21 +01:00
05ab204e5a Merge branch 'master' into development 2024-02-14 13:43:45 +01:00
9f188afd16 Bump nopaque version 2024-02-14 13:42:20 +01:00
dc77ac7b76 Add new spaCy NLP Pipeline version 2024-02-14 13:40:49 +01:00
84276af322 Merge branch 'development' 2024-01-23 14:38:24 +01:00
d9d4067536 Set new version in config 2024-01-23 14:38:10 +01:00
ba65cf5911 Merge branch 'development' 2024-01-23 14:19:06 +01:00
69a1edc51e fix eventlet version 2024-01-23 13:31:15 +01:00
32ad8c7359 News + user avatar fix 2024-01-22 10:58:52 +01:00
8c0843d2d0 Merge branch 'development' 2023-12-21 14:28:10 +01:00
d4c9ab5821 Add user reste cli command 2023-12-21 14:27:50 +01:00
518a245133 Merge branch 'development' 2023-12-21 13:54:25 +01:00
b6864b355a Bug fixes 2023-12-21 13:03:58 +01:00
0a45e1bb65 Bug fixes 2023-12-21 12:48:50 +01:00
08ca938333 Corrections in Terms of Use 2023-12-21 09:31:42 +01:00
cfdef8d1fa New terms of use + privacy statement 2023-12-20 15:37:59 +01:00
5dce269736 Version number + original slogan font 2023-12-18 12:49:30 +01:00
13369296d3 rename docker-entrypoint.sh to docker-nopaque-entrypoint.sh 2023-12-15 13:56:03 +01:00
4f6e1c121f Add nopaque version config variable 2023-12-15 08:47:59 +01:00
438a257fe3 Update CI script 2023-12-15 08:47:46 +01:00
2e88d7d035 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-15 08:37:02 +01:00
b338c33d42 Bump cwb version 2023-12-15 08:36:50 +01:00
d6cebddd92 Updated query builder gifs and instructions 2023-12-12 14:56:08 +01:00
07fda0e95a Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-07 22:35:41 +01:00
3927d9e4cd Edits in structural attributes section and others 2023-12-07 22:34:00 +01:00
8f5d5ffdec Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-07 12:46:48 +01:00
f02d1619e2 Try to implement anchor tags 2023-12-07 12:46:37 +01:00
892f1f799e Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-05 15:00:49 +01:00
f5e98ae655 Add badges to README 2023-12-05 15:00:21 +01:00
f790106e0e Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-05 14:54:05 +01:00
c57acc73d2 Manual changes 2023-12-05 14:42:38 +01:00
678a0767b7 Change Manual icon 2023-11-30 11:21:39 +01:00
17a9338d9f Fix job deletion from job page 2023-11-29 16:11:14 +01:00
a7cbce1eda Fix wrong spacy-nlp-pipeline version number 2023-11-29 10:45:35 +01:00
fa28c875e1 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-28 12:40:05 +01:00
0927edcceb Bug Fixes 2023-11-28 12:39:54 +01:00
9c22370eea Implement force download parameter in model insert_defaults methods 2023-11-28 12:10:55 +01:00
bdcc80a66f Add new tesseract-ocr-pipeline version. Remove redundant spacy-nlp-pipeline version. 2023-11-28 10:34:30 +01:00
9be5ce6014 link logo to homepage 2023-11-23 13:32:54 +01:00
00e4c3ade3 Add logo to sidenav 2023-11-23 13:26:19 +01:00
79a16cae83 Add links to my profile page 2023-11-23 13:16:21 +01:00
c5aea0be94 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-22 12:50:18 +01:00
afcb890ccf Element Target drag&drop + small improvements 2023-11-22 12:50:08 +01:00
9627708950 rename manual files to fit new naming convention 2023-11-21 12:31:10 +01:00
1bb1408988 make the workshops package fit the new file scheme 2023-11-21 10:11:49 +01:00
79bafdea89 Switch back to older settings and extension .vscode setup 2023-11-20 15:26:22 +01:00
a2d617718b Update .vscode directory contents 2023-11-20 11:05:56 +01:00
691b2de5b2 Bug Fix: lock chips after switch to QB 2023-11-20 09:48:06 +01:00
eb0e7c9ba1 Fix error on not authenticated users 2023-11-20 09:35:53 +01:00
ab132746e7 Add TODO in migration scripts 2023-11-17 10:42:55 +01:00
ae5646512d Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-17 10:15:50 +01:00
fc66327920 Make double quotation marks escapable again 2023-11-17 10:15:39 +01:00
9bfc96ad41 minor codestyle fix 2023-11-16 17:22:07 +01:00
008938b46b Avatar in top right corner 2023-11-16 15:57:27 +01:00
4f24e9f9da Erase meta data logic from struc attribute builder 2023-11-14 09:48:38 +01:00
d0fe4360bb Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-13 15:37:26 +01:00
1c18806c9c Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-13 15:53:17 +01:00
9487aa7a60 Restructure modals and base template 2023-11-13 15:53:14 +01:00
6559051fd5 Delete condition logic in token builder 2023-11-13 15:37:19 +01:00
0882e085a3 Function renaming 2023-11-13 14:46:19 +01:00
ff1bcb40f3 update query builder code to fit the new style 2023-11-13 14:20:19 +01:00
d298b200dc Move javascript files to fit new style 2023-11-13 12:59:36 +01:00
660d7ebc99 Fix sidenav profile entries 2023-11-13 12:46:48 +01:00
df33c7b36d Fix old Utils references in js 2023-11-13 10:30:24 +01:00
bf8b22fb58 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-13 09:43:03 +01:00
b216ad8a40 QB parts as extensions 2023-11-13 09:42:56 +01:00
4822f6ec02 integrate js cqi into corpus_analysis package 2023-11-10 10:27:39 +01:00
61be3345be some javascript fixes after namespace implementation 2023-11-09 15:51:00 +01:00
e9ddb85f03 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-09 14:30:10 +01:00
e3166ca54c Use a single js namespace as parent for all other nopaque namespaces. 2023-11-09 14:29:01 +01:00
0565f309f8 Split QB back to mult. classes, as far as possible 2023-11-08 15:46:53 +01:00
1f40002249 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-07 13:24:11 +01:00
1ff9c8bfe3 Query Builder in one class 2023-11-07 13:24:01 +01:00
e8fe67d290 Some code cleanup 2023-10-30 11:36:28 +01:00
fbb32ef580 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-10-26 15:18:32 +02:00
985e9b406f Editing nested token queries and bug fixes 2023-10-26 15:18:03 +02:00
0abfe65afa Bring back community update 2/x 2023-10-25 16:21:30 +02:00
f4d3415c11 First work to bring back Community Update functionality 2023-10-24 16:11:08 +02:00
965f2854b2 Add comments to JavaScript and some restructuring 2023-10-24 15:09:20 +02:00
f101a742a9 Fix broken dependency with Flask-Assets >2.0 2023-10-24 13:18:46 +02:00
c046fbfb1e Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-10-19 10:21:23 +02:00
8997d3ad67 New condition section token builder 2023-10-19 10:21:12 +02:00
bf249193af integrate all js files into assets 2023-10-12 14:13:47 +02:00
c40e428eb2 add more constants and type hints to cqi package 2023-10-12 10:27:28 +02:00
4daf3359b9 move constants in cqi package into seperate file 2023-10-12 10:03:12 +02:00
d875623a8c Remove clickable class from not clickable elements 2023-10-11 16:23:10 +02:00
067318bb89 Huge List class update 2023-10-11 16:20:17 +02:00
a9203cc409 Fix forms and displays 2023-10-11 14:26:07 +02:00
78dd375ef8 Performance update for the docker entrypoint script 2023-10-10 15:28:10 +02:00
82cd384e5f Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-10-10 11:06:50 +02:00
c7dab5e502 intermediate update on displays and forms 1/2 2023-10-10 11:06:44 +02:00
d3cfd2cfaf Editing Meta Data and Tokens 2023-10-09 16:30:46 +02:00
14c10aeab1 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-10-09 15:42:10 +02:00
2dec17b1b9 Editing Entity Type 2023-10-09 15:42:01 +02:00
9fe38fab52 Remove debug messages 2023-10-09 14:34:06 +02:00
e20dd01710 Better auto initialization method for forms and resource displays 2023-10-09 14:21:31 +02:00
1b974f0bbc Fix Requests usage again 2023-10-06 15:04:36 +02:00
c6be72d0a7 Update broken requests 2023-10-06 11:55:31 +02:00
d3f2d5648e Further javascript improvements 2023-10-05 16:08:04 +02:00
7cae84ffdc Make the joblist clickable again 2023-10-05 14:19:46 +02:00
1d6834302d Change js structure for displays 2023-10-05 14:11:17 +02:00
53f4400731 rename Requests namespace to requests 2023-10-04 14:07:39 +02:00
f36600f06c downgrade python to v3.10.13 2023-10-04 13:48:32 +02:00
068211a72b add missing semicolons 2023-10-04 13:48:10 +02:00
f566e276a1 use better js naming conventions 2023-10-04 12:32:27 +02:00
c605613d86 Fix path 2023-09-26 15:08:22 +02:00
d1fc425f48 Update docker compose file examples 2023-09-26 15:02:02 +02:00
b8ae221987 Expert Mode - Query Builder Switch Parser v1 2023-09-25 14:40:39 +02:00
b50147a66a Use IP instead of Hostname again... 2023-09-25 13:43:17 +02:00
18311c8c9c Use Hostname for cqpserver again... 2023-09-25 13:39:39 +02:00
2dc54f4258 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-09-25 13:28:51 +02:00
bcdc3721ef Don't use container name as host for cqiclient 2023-09-25 13:28:48 +02:00
60bcaa9e01 Corpus Analysis Asset update 2023-09-25 12:42:10 +02:00
af89a5776f add missing dot 2023-09-25 10:18:19 +02:00
fcbf9c8cb6 Set default values in docker compose 2023-09-25 10:17:06 +02:00
cc6ce6e1f3 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-09-25 10:11:51 +02:00
4581367d04 Restructure startup procedure 2023-09-25 10:11:11 +02:00
d7f00f6337 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-09-18 17:05:10 +02:00
86947e2cf8 First parser text to query Chip 2023-09-18 17:05:01 +02:00
4a9a03e648 Update cwb image 2023-09-15 11:42:37 +02:00
45369d4c84 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-09-12 16:42:38 +02:00
f56e951b71 Locking end-tag of structural attributes, ... 2023-09-12 16:42:28 +02:00
d776e11fe5 Bump python version 2023-09-11 14:39:05 +02:00
9200837e63 Use new cqi version. No chunking needed anymore 2023-09-08 11:12:43 +02:00
aad347caa0 Fix problems with Flask-Breadcrumbs (use fixed Flask-Menu versio) 2023-09-06 13:59:39 +02:00
9ccab8657a Query Builder: Incidence Modifier for tokens 2023-08-29 17:06:10 +02:00
fe7f69d596 QB form update + incidence modifier 2023-08-21 07:26:54 +02:00
8a5c94f448 Bug fix 2023-08-11 14:38:18 +02:00
3d38e550a0 Update Positional Attribute Modal Query Builder 2023-08-11 13:55:41 +02:00
1387d80a26 Update cqi utils 2023-08-11 13:50:56 +02:00
5c00c5740e upgrade cqi to 0.1.6 2023-08-11 10:49:40 +02:00
04575b78cf Codestyle enhancements 2023-08-10 15:48:49 +02:00
2951fc6966 fix id issues 2023-08-08 16:08:58 +02:00
bf0213edbc Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-08-08 16:00:34 +02:00
c843fbb437 changing id_prefix 2023-08-08 16:00:30 +02:00
1dc7d2a1c6 remove prefix stuff 2023-08-08 16:00:05 +02:00
173aea7df4 Fix id reference errors 2023-08-08 14:33:07 +02:00
f1962b3b47 add id_prefix to query builder macro 2023-08-08 14:19:50 +02:00
dd04623278 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-08-08 14:15:44 +02:00
5e8008399d First Query Builder/Expert Mode implementation 2023-08-08 14:12:07 +02:00
0d92f221cb rename ui-form 2023-08-08 14:07:07 +02:00
766c5ba27d Update corpus analysis extensions to use dynamic id prefixes for elements 2023-08-08 12:21:47 +02:00
661ac7c509 Fix macro problems in corpus analysis 2023-08-08 11:28:10 +02:00
3b390858ff Use macros for html generation instead of variables 2023-08-08 10:48:36 +02:00
ae8e383085 First rearrangement Query Builder 2023-08-02 14:14:46 +02:00
9ac626c64d Merge branch 'development' 2023-07-26 20:44:54 +02:00
d0c6b2b9e5 Merge branch 'development' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into development 2023-07-26 20:37:25 +02:00
8277e60689 Merge branch 'visualizations-update' into development 2023-07-26 20:36:57 +02:00
8b887d79ef Workshop Aufgaben update 2023-07-26 20:36:24 +02:00
c9ad538bee Merge branch 'development' 2023-07-26 11:35:39 +02:00
983400b925 Merge branch 'visualizations-update' into development 2023-07-26 11:32:11 +02:00
37f9e1281d Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-07-26 11:25:37 +02:00
5eef2292e7 bug fixes 2023-07-26 11:25:32 +02:00
351da5d4e9 Fix admin delete user in AdminUserList.js 2023-07-26 10:53:34 +02:00
27fe4a95e4 Add "(beta)" to Static Visualization + small fixes 2023-07-26 09:03:36 +02:00
0627b27ec7 Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-07-25 17:16:25 +02:00
adfd229e66 FGHO Sommerschule 2023 Aufgaben 2023-07-25 17:16:20 +02:00
ae6a7cb86d Add vorbereitungen section for workshop fgho 2023 2023-07-25 16:04:45 +02:00
2dd6015ba6 Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-07-25 15:20:23 +02:00
f80b635ca3 Add workshops package 2023-07-25 15:18:57 +02:00
0e8a87d34e Query Builder fixes 2023-07-25 14:56:07 +02:00
ccf7f449dd Bump cqi version 2023-07-24 15:12:05 +02:00
dd05657362 Fix wrong pagination handling in concordance 2023-07-24 13:48:01 +02:00
cef82d9001 Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-07-24 10:02:44 +02:00
656eef17db unify get_user event via socketio 2023-07-24 10:02:35 +02:00
104c2fe468 Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-07-21 13:14:39 +02:00
d08f95e944 dynamic token visualization 2023-07-21 13:14:29 +02:00
87e2c2b484 Bump Flask-Hashids version 2023-07-19 11:10:50 +02:00
7a925b6a19 Better error handling in CorpusAnalysisApp 2023-07-18 17:18:04 +02:00
e4f435c5ee small fix 2023-07-18 16:07:06 +02:00
7721926d6c Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-07-18 16:05:09 +02:00
691d4757ff Token list first implementation+ query builder fix 2023-07-18 16:01:31 +02:00
6c744fc3ba Query Builder fix 2023-07-18 14:05:11 +02:00
e46f0032bd Show static visualizations only on overview page 2023-07-17 12:35:53 +02:00
9da1a6e987 Add status text to corpus analysis app startup modal 2023-07-17 10:40:34 +02:00
8182cccecd Update cqi to v0.1.4 2023-07-14 12:59:27 +02:00
d898cd8516 Some Codestyle enhancements 2023-07-13 15:27:49 +02:00
4ae4b88a44 Cleanup in cqi over socketio 2023-07-13 12:42:47 +02:00
b7483af8e9 Bump Socket.IO client version 2023-07-13 12:40:37 +02:00
41d8dbad5d simplify decompression and decoding 2023-07-12 11:25:58 +02:00
203faa4257 Remove debug messages and move data inflation into api fn 2023-07-12 11:11:15 +02:00
960f36c740 Fix BrokenPipeError handling in cqi_over_socketio 2023-07-12 10:54:52 +02:00
c3834ca400 Outsource Static Viz to extensions logic 2023-07-11 15:52:44 +02:00
572fdf3a00 Small updates custom stopword list 2023-07-11 13:40:20 +02:00
22b43a689f Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-07-11 09:33:22 +02:00
deec9e8a76 Custom Stopword List Settings 2023-07-11 09:33:11 +02:00
688b96ffee remove debug messages and increase chunk size in cqi 2023-07-07 11:47:34 +02:00
a9973e9c8e Add compression to static corpus data, use chunked computation, hide read corpus ids in corpus analysis 2023-07-06 13:02:22 +02:00
413b6111df Implement fast boundary computation for ent and s s_attrs 2023-07-03 15:31:28 +02:00
a9f05fffdf Fix Error handling in corpus analysis app 2023-07-03 13:28:52 +02:00
7936ac270b Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-07-03 11:17:13 +02:00
1eabf18b13 Remove timeout in cqi js 2023-07-03 11:17:07 +02:00
94dc25750c Merge branch 'visualizations-update' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into visualizations-update 2023-07-03 11:06:59 +02:00
beb157092e New visualizations for frequencies 2023-07-03 11:06:43 +02:00
1cd9540e5b Improve cqi extension structure 2023-06-30 15:36:45 +02:00
912bd7da07 Remove TODOs that are done 2023-06-30 14:18:07 +02:00
e21ef2422d cqi-js: implement timeout 2023-06-30 14:13:34 +02:00
c52c966863 Better marking for non standard cqi additions ins cqi js 2023-06-30 12:55:32 +02:00
a7a948908f Small fixes and remove old cqi_over_socketio interface 2023-06-30 12:19:18 +02:00
3a97b1a07a Remove mention of old cqi client 2023-06-30 12:11:17 +02:00
315b538c30 Replace the old js CQiClient with fully featured new one 2023-06-30 12:10:17 +02:00
c35b2f8674 Sidenav User Field height fix 2023-06-14 10:28:05 +02:00
baf70750e8 Merge branch 'development' 2023-06-07 15:14:57 +02:00
525723818e Merge branch 'development' 2023-02-15 11:37:09 +01:00
20c0678d3e Merge branch 'master' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque 2023-02-06 15:51:41 +01:00
c323c53f37 Merge branch 'development' 2023-02-06 15:51:13 +01:00
2d8cef64e8 Merge branch 'development' 2023-02-06 12:40:07 +01:00
9b9edf501d Merge branch 'development' 2022-11-25 10:47:15 +01:00
903310c17f Merge branch 'development' 2022-11-24 12:29:28 +01:00
bc92fd249f Merge branch 'development' 2022-11-18 16:02:52 +01:00
422415065d Merge branch 'development' 2022-10-28 13:21:29 +02:00
07ec01ae2e Merge branch 'development' 2022-10-11 11:34:19 +02:00
201 changed files with 7197 additions and 127984 deletions

View File

@ -8,5 +8,6 @@
!.flaskenv !.flaskenv
!boot.sh !boot.sh
!config.py !config.py
!docker-nopaque-entrypoint.sh
!nopaque.py !nopaque.py
!requirements.txt !requirements.txt

210
.env.tpl
View File

@ -1,204 +1,32 @@
################################################################################ ##############################################################################
# Docker # # Variables for use in Docker Compose YAML files #
################################################################################ ##############################################################################
# DEFAULT: ./data
# NOTE: Use `.` as <project-basedir>
# HOST_DATA_DIR=
# Example: 1000
# HINT: Use this bash command `id -u` # HINT: Use this bash command `id -u`
# NOTE: 0 (= root user) is not allowed
HOST_UID= HOST_UID=
# Example: 1000
# HINT: Use this bash command `id -g` # HINT: Use this bash command `id -g`
HOST_GID= HOST_GID=
# Example: 999
# HINT: Use this bash command `getent group docker | cut -d: -f3` # HINT: Use this bash command `getent group docker | cut -d: -f3`
HOST_DOCKER_GID= HOST_DOCKER_GID=
# DEFAULT: ./logs # DEFAULT: nopaque
# NOTES: Use `.` as <project-basedir> # DOCKER_DEFAULT_NETWORK_NAME=
# HOST_LOG_DIR=
# DEFAULT: nopaque_default # DEFAULT: ./volumes/db/data
# DOCKER_NETWORK_NAME= # NOTE: Use `.` as <project-basedir>
# DOCKER_DB_SERVICE_DATA_VOLUME_SOURCE_PATH=
################################################################################ # DEFAULT: ./volumes/mq/data
# Flask # # NOTE: Use `.` as <project-basedir>
# https://flask.palletsprojects.com/en/1.1.x/config/ # # DOCKER_MQ_SERVICE_DATA_VOLUME_SOURCE_PATH=
################################################################################
# CHOOSE ONE: http, https
# DEFAULT: http
# PREFERRED_URL_SCHEME=
# DEFAULT: hard to guess string # NOTE: This must be a network share and it must be available on all
# HINT: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"` # Docker Swarm nodes, mounted to the same path with the same
# SECRET_KEY= # user and group ownership.
DOCKER_NOPAQUE_SERVICE_DATA_VOLUME_SOURCE_PATH=
# DEFAULT: localhost:5000 # DEFAULT: ./volumes/nopaque/logs
# Example: nopaque.example.com/nopaque.example.com:5000 # NOTE: Use `.` as <project-basedir>
# HINT: If your instance is publicly available on a different Port then 80/443, # DOCKER_NOPAQUE_SERVICE_LOGS_VOLUME_SOURCE_PATH=.
# you will have to add this to the server name
# SERVER_NAME=
# CHOOSE ONE: False, True
# DEFAULT: False
# HINT: Set to true if you redirect http to https
# SESSION_COOKIE_SECURE=
################################################################################
# Flask-Assets #
# https://webassets.readthedocs.io/en/latest/ #
################################################################################
# CHOOSE ONE: False, True
# DEFAULT: False
# ASSETS_DEBUG=
################################################################################
# Flask-Hashids #
# https://github.com/Pevtrick/Flask-Hashids #
################################################################################
# DEFAULT: 16
# HASHIDS_MIN_LENGTH=
# NOTE: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"`
# It is strongly recommended that this is NEVER the same as the SECRET_KEY
HASHIDS_SALT=
################################################################################
# Flask-Login #
# https://flask-login.readthedocs.io/en/latest/ #
################################################################################
# CHOOSE ONE: False, True
# DEFAULT: False
# HINT: Set to true if you redirect http to https
# REMEMBER_COOKIE_SECURE=
################################################################################
# Flask-Mail #
# https://pythonhosted.org/Flask-Mail/ #
################################################################################
# EXAMPLE: nopaque Admin <nopaque@example.com>
MAIL_DEFAULT_SENDER=
MAIL_PASSWORD=
# EXAMPLE: smtp.example.com
MAIL_SERVER=
# EXAMPLE: 587
MAIL_PORT=
# CHOOSE ONE: False, True
# DEFAULT: False
# MAIL_USE_SSL=
# CHOOSE ONE: False, True
# DEFAULT: False
# MAIL_USE_TLS=
# EXAMPLE: nopaque@example.com
MAIL_USERNAME=
################################################################################
# Flask-SQLAlchemy #
# https://flask-sqlalchemy.palletsprojects.com/en/2.x/config/ #
################################################################################
# DEFAULT: 'sqlite:///<nopaque-basedir>/data.sqlite'
# NOTE: Use `.` as <nopaque-basedir>,
# Don't use a SQLite database when using Docker
# SQLALCHEMY_DATABASE_URI=
################################################################################
# nopaque #
################################################################################
# An account is registered with this email adress gets automatically assigned
# the administrator role.
# EXAMPLE: admin.nopaque@example.com
NOPAQUE_ADMIN=
# DEFAULT: /mnt/nopaque
# NOTE: This must be a network share and it must be available on all Docker
# Swarm nodes
# NOPAQUE_DATA_DIR=
# CHOOSE ONE: False, True
# DEFAULT: True
# NOPAQUE_IS_PRIMARY_INSTANCE=
# transport://[userid:password]@hostname[:port]/[virtual_host]
NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI=
# NOTE: Get these from the nopaque development team
NOPAQUE_DOCKER_REGISTRY_USERNAME=
NOPAQUE_DOCKER_REGISTRY_PASSWORD=
# DEFAULT: %Y-%m-%d %H:%M:%S
# NOPAQUE_LOG_DATE_FORMAT=
# DEFAULT: [%(asctime)s] %(levelname)s in %(pathname)s (function: %(funcName)s, line: %(lineno)d): %(message)s
# NOPAQUE_LOG_FORMAT=
# DEFAULT: INFO
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
# NOPAQUE_LOG_LEVEL=
# CHOOSE ONE: False, True
# DEFAULT: True
# NOPAQUE_LOG_FILE_ENABLED=
# DEFAULT: <nopaque-basedir>/logs
# NOTE: Use `.` as <nopaque-basedir>
# NOPAQUE_LOG_FILE_DIR=
# DEFAULT: NOPAQUE_LOG_LEVEL
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
# NOPAQUE_LOG_FILE_LEVEL=
# CHOOSE ONE: False, True
# DEFAULT: False
# NOPAQUE_LOG_STDERR_ENABLED=
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
# DEFAULT: NOPAQUE_LOG_LEVEL
# NOPAQUE_LOG_STDERR_LEVEL=
# CHOOSE ONE: False, True
# DEFAULT: False
# HINT: Set this to True only if you are using a proxy in front of nopaque
# NOPAQUE_PROXY_FIX_ENABLED=
# DEFAULT: 0
# Number of values to trust for X-Forwarded-For
# NOPAQUE_PROXY_FIX_X_FOR=
# DEFAULT: 0
# Number of values to trust for X-Forwarded-Host
# NOPAQUE_PROXY_FIX_X_HOST=
# DEFAULT: 0
# Number of values to trust for X-Forwarded-Port
# NOPAQUE_PROXY_FIX_X_PORT=
# DEFAULT: 0
# Number of values to trust for X-Forwarded-Prefix
# NOPAQUE_PROXY_FIX_X_PREFIX=
# DEFAULT: 0
# Number of values to trust for X-Forwarded-Proto
# NOPAQUE_PROXY_FIX_X_PROTO=
# CHOOSE ONE: False, True
# DEFAULT: False
# NOPAQUE_TRANSKRIBUS_ENABLED=
# READ-COOP account data: https://readcoop.eu/
# NOPAQUE_READCOOP_USERNAME=
# NOPAQUE_READCOOP_PASSWORD=

2
.gitignore vendored
View File

@ -1,6 +1,6 @@
# nopaque specifics # nopaque specifics
app/static/gen/ app/static/gen/
data/ volumes/
docker-compose.override.yml docker-compose.override.yml
logs/ logs/
!logs/dummy !logs/dummy

84
.gitlab-ci.yml Normal file
View File

@ -0,0 +1,84 @@
include:
- template: Security/Container-Scanning.gitlab-ci.yml
##############################################################################
# Pipeline stages in order of execution #
##############################################################################
stages:
- build
- publish
- sca
##############################################################################
# Pipeline behavior #
##############################################################################
workflow:
rules:
# Run the pipeline on commits to the default branch
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
variables:
# Set the Docker image tag to `latest`
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:latest
when: always
# Run the pipeline on tag creation
- if: $CI_COMMIT_TAG
variables:
# Set the Docker image tag to the Git tag name
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
when: always
# Don't run the pipeline on all other occasions
- when: never
##############################################################################
# Default values for pipeline jobs #
##############################################################################
default:
image: docker:24.0.6
services:
- docker:24.0.6-dind
tags:
- docker
##############################################################################
# CI/CD variables for all jobs in the pipeline #
##############################################################################
variables:
DOCKER_TLS_CERTDIR: /certs
DOCKER_BUILD_PATH: .
DOCKERFILE: Dockerfile
##############################################################################
# Pipeline jobs #
##############################################################################
build:
stage: build
script:
- docker build --tag $DOCKER_IMAGE --file $DOCKERFILE $DOCKER_BUILD_PATH
- docker save $DOCKER_IMAGE > docker_image.tar
artifacts:
paths:
- docker_image.tar
publish:
stage: publish
before_script:
- docker login --username gitlab-ci-token --password $CI_JOB_TOKEN $CI_REGISTRY
script:
- docker load --input docker_image.tar
- docker push $DOCKER_IMAGE
after_script:
- docker logout $CI_REGISTRY
container_scanning:
stage: sca
rules:
# Run the job on commits to the default branch
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: always
# Run the job on tag creation
- if: $CI_COMMIT_TAG
when: always
# Don't run the job on all other occasions
- when: never
variables:
CS_IMAGE: $DOCKER_IMAGE

View File

@ -1,7 +1,8 @@
{ {
"recommendations": [ "recommendations": [
"samuelcolvin.jinjahtml", "irongeek.vscode-env",
"ms-azuretools.vscode-docker", "ms-azuretools.vscode-docker",
"ms-python.python" "ms-python.python",
"samuelcolvin.jinjahtml"
] ]
} }

View File

@ -1,13 +1,9 @@
{ {
"editor.rulers": [79], "editor.rulers": [79],
"files.insertFinalNewline": true, "files.insertFinalNewline": true,
"python.terminal.activateEnvironment": false,
"[css]": { "[css]": {
"editor.tabSize": 2 "editor.tabSize": 2
}, },
"[scss]": {
"editor.tabSize": 2
},
"[html]": { "[html]": {
"editor.tabSize": 2 "editor.tabSize": 2
}, },
@ -17,7 +13,7 @@
"[jinja-html]": { "[jinja-html]": {
"editor.tabSize": 2 "editor.tabSize": 2
}, },
"[jinja-js]": { "[scss]": {
"editor.tabSize": 2 "editor.tabSize": 2
} }
} }

View File

@ -1,14 +1,9 @@
FROM python:3.8.10-slim-buster FROM python:3.10.13-slim-bookworm
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>" LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
ARG DOCKER_GID
ARG UID
ARG GID
ENV LANG="C.UTF-8" ENV LANG="C.UTF-8"
ENV PYTHONDONTWRITEBYTECODE="1" ENV PYTHONDONTWRITEBYTECODE="1"
ENV PYTHONUNBUFFERED="1" ENV PYTHONUNBUFFERED="1"
@ -17,34 +12,42 @@ ENV PYTHONUNBUFFERED="1"
RUN apt-get update \ RUN apt-get update \
&& apt-get install --no-install-recommends --yes \ && apt-get install --no-install-recommends --yes \
build-essential \ build-essential \
gosu \
libpq-dev \ libpq-dev \
&& rm --recursive /var/lib/apt/lists/* && rm --recursive /var/lib/apt/lists/*
RUN groupadd --gid "${DOCKER_GID}" docker \ RUN useradd --create-home --no-log-init nopaque \
&& groupadd --gid "${GID}" nopaque \ && groupadd docker \
&& useradd --create-home --gid nopaque --groups "${DOCKER_GID}" --no-log-init --uid "${UID}" nopaque && usermod --append --groups docker nopaque
USER nopaque USER nopaque
WORKDIR /home/nopaque WORKDIR /home/nopaque
ENV PYTHON3_VENV_PATH="/home/nopaque/venv" ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv"
RUN python3 -m venv "${PYTHON3_VENV_PATH}" RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}"
ENV PATH="${PYTHON3_VENV_PATH}/bin:${PATH}" ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}"
COPY --chown=nopaque:nopaque requirements.txt .
RUN python3 -m pip install --requirement requirements.txt \
&& rm requirements.txt
COPY --chown=nopaque:nopaque app app COPY --chown=nopaque:nopaque app app
COPY --chown=nopaque:nopaque migrations migrations COPY --chown=nopaque:nopaque migrations migrations
COPY --chown=nopaque:nopaque tests tests COPY --chown=nopaque:nopaque tests tests
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py ./ COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./
RUN python3 -m pip install --requirement requirements.txt \
&& mkdir logs
USER root
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
EXPOSE 5000 EXPOSE 5000
ENTRYPOINT ["./boot.sh"] ENTRYPOINT ["docker-nopaque-entrypoint.sh"]

View File

@ -1,5 +1,8 @@
# nopaque # nopaque
![release badge](https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque/-/badges/release.svg)
![pipeline badge](https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque/badges/master/pipeline.svg?ignore_skipped=true)
nopaque bundles various tools and services that provide humanities scholars with DH methods and thus can support their various individual research processes. Using nopaque, researchers can subject digitized sources to Optical Character Recognition (OCR). The resulting text files can then be used as a data basis for Natural Language Processing (NLP). The texts are automatically subjected to various linguistic annotations. The data processed via NLP can then be summarized in the web application as corpora and analyzed by means of an information retrieval system through complex search queries. The range of functions of the web application will be successively extended according to the needs of the researchers. nopaque bundles various tools and services that provide humanities scholars with DH methods and thus can support their various individual research processes. Using nopaque, researchers can subject digitized sources to Optical Character Recognition (OCR). The resulting text files can then be used as a data basis for Natural Language Processing (NLP). The texts are automatically subjected to various linguistic annotations. The data processed via NLP can then be summarized in the web application as corpora and analyzed by means of an information retrieval system through complex search queries. The range of functions of the web application will be successively extended according to the needs of the researchers.
## Prerequisites and requirements ## Prerequisites and requirements

View File

@ -9,6 +9,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Amharic' # - title: 'Amharic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
@ -20,6 +21,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Arabic' - title: 'Arabic'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
@ -31,6 +33,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Assamese' # - title: 'Assamese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
@ -42,6 +45,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Azerbaijani' # - title: 'Azerbaijani'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
@ -53,6 +57,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Azerbaijani - Cyrillic' # - title: 'Azerbaijani - Cyrillic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
@ -64,6 +69,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Belarusian' # - title: 'Belarusian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
@ -75,6 +81,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Bengali' # - title: 'Bengali'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
@ -86,6 +93,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tibetan' # - title: 'Tibetan'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
@ -97,6 +105,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Bosnian' # - title: 'Bosnian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
@ -108,6 +117,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Bulgarian' # - title: 'Bulgarian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
@ -119,6 +129,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Catalan; Valencian' # - title: 'Catalan; Valencian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
@ -130,6 +141,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Cebuano' # - title: 'Cebuano'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
@ -141,6 +153,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Czech' # - title: 'Czech'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
@ -152,6 +165,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Chinese - Simplified' # - title: 'Chinese - Simplified'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
@ -163,6 +177,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Chinese - Traditional' - title: 'Chinese - Traditional'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
@ -174,6 +189,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Cherokee' # - title: 'Cherokee'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
@ -185,6 +201,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Welsh' # - title: 'Welsh'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
@ -196,6 +213,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Danish' - title: 'Danish'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
@ -207,6 +225,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'German' - title: 'German'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
@ -218,6 +237,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Dzongkha' # - title: 'Dzongkha'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
@ -229,6 +249,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Greek, Modern (1453-)' - title: 'Greek, Modern (1453-)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
@ -240,6 +261,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'English' - title: 'English'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
@ -251,6 +273,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'English, Middle (1100-1500)' - title: 'English, Middle (1100-1500)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
@ -262,6 +285,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Esperanto' # - title: 'Esperanto'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
@ -273,6 +297,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Estonian' # - title: 'Estonian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
@ -284,6 +309,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Basque' # - title: 'Basque'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
@ -295,6 +321,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Persian' # - title: 'Persian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
@ -306,6 +333,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Finnish' # - title: 'Finnish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
@ -317,6 +345,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'French' - title: 'French'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
@ -328,6 +357,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'German Fraktur' - title: 'German Fraktur'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
@ -339,6 +369,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'French, Middle (ca. 1400-1600)' - title: 'French, Middle (ca. 1400-1600)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
@ -350,6 +381,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Irish' # - title: 'Irish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
@ -361,6 +393,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Galician' # - title: 'Galician'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
@ -372,6 +405,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Greek, Ancient (-1453)' - title: 'Greek, Ancient (-1453)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
@ -383,6 +417,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Gujarati' # - title: 'Gujarati'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
@ -394,6 +429,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Haitian; Haitian Creole' # - title: 'Haitian; Haitian Creole'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
@ -405,6 +441,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Hebrew' # - title: 'Hebrew'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
@ -416,6 +453,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Hindi' # - title: 'Hindi'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
@ -427,6 +465,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Croatian' # - title: 'Croatian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
@ -438,6 +477,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Hungarian' # - title: 'Hungarian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
@ -449,6 +489,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Inuktitut' # - title: 'Inuktitut'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
@ -460,6 +501,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Indonesian' # - title: 'Indonesian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
@ -471,6 +513,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Icelandic' # - title: 'Icelandic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
@ -482,6 +525,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Italian' - title: 'Italian'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
@ -493,6 +537,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Italian - Old' - title: 'Italian - Old'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
@ -504,6 +549,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Javanese' # - title: 'Javanese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
@ -515,6 +561,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Japanese' # - title: 'Japanese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
@ -526,6 +573,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kannada' # - title: 'Kannada'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
@ -537,6 +585,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Georgian' # - title: 'Georgian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
@ -548,6 +597,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Georgian - Old' # - title: 'Georgian - Old'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
@ -559,6 +609,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kazakh' # - title: 'Kazakh'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
@ -570,6 +621,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Central Khmer' # - title: 'Central Khmer'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
@ -581,6 +633,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kirghiz; Kyrgyz' # - title: 'Kirghiz; Kyrgyz'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
@ -592,6 +645,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Korean' # - title: 'Korean'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
@ -603,6 +657,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kurdish' # - title: 'Kurdish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
@ -614,6 +669,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Lao' # - title: 'Lao'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
@ -625,6 +681,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Latin' # - title: 'Latin'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
@ -636,6 +693,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Latvian' # - title: 'Latvian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
@ -647,6 +705,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Lithuanian' # - title: 'Lithuanian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
@ -658,6 +717,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Malayalam' # - title: 'Malayalam'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
@ -669,6 +729,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Marathi' # - title: 'Marathi'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
@ -680,6 +741,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Macedonian' # - title: 'Macedonian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
@ -691,6 +753,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Maltese' # - title: 'Maltese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
@ -702,6 +765,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Malay' # - title: 'Malay'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
@ -713,6 +777,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Burmese' # - title: 'Burmese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
@ -724,6 +789,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Nepali' # - title: 'Nepali'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
@ -735,6 +801,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Dutch; Flemish' # - title: 'Dutch; Flemish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
@ -746,6 +813,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Norwegian' # - title: 'Norwegian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
@ -757,6 +825,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Oriya' # - title: 'Oriya'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
@ -768,6 +837,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Panjabi; Punjabi' # - title: 'Panjabi; Punjabi'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
@ -779,6 +849,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Polish' # - title: 'Polish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
@ -790,6 +861,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Portuguese' - title: 'Portuguese'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
@ -801,6 +873,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Pushto; Pashto' # - title: 'Pushto; Pashto'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
@ -812,6 +885,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Romanian; Moldavian; Moldovan' # - title: 'Romanian; Moldavian; Moldovan'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
@ -823,6 +897,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Russian' - title: 'Russian'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
@ -834,6 +909,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Sanskrit' # - title: 'Sanskrit'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
@ -845,6 +921,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Sinhala; Sinhalese' # - title: 'Sinhala; Sinhalese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
@ -856,6 +933,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Slovak' # - title: 'Slovak'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
@ -867,6 +945,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Slovenian' # - title: 'Slovenian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
@ -878,6 +957,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Spanish; Castilian' - title: 'Spanish; Castilian'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
@ -889,6 +969,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Spanish; Castilian - Old' - title: 'Spanish; Castilian - Old'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
@ -900,6 +981,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Albanian' # - title: 'Albanian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
@ -911,6 +993,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Serbian' # - title: 'Serbian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
@ -922,6 +1005,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Serbian - Latin' # - title: 'Serbian - Latin'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
@ -933,6 +1017,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Swahili' # - title: 'Swahili'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
@ -944,6 +1029,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Swedish' # - title: 'Swedish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
@ -955,6 +1041,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Syriac' # - title: 'Syriac'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
@ -966,6 +1053,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tamil' # - title: 'Tamil'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
@ -977,6 +1065,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Telugu' # - title: 'Telugu'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
@ -988,6 +1077,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tajik' # - title: 'Tajik'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
@ -999,6 +1089,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tagalog' # - title: 'Tagalog'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
@ -1010,6 +1101,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Thai' # - title: 'Thai'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
@ -1021,6 +1113,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tigrinya' # - title: 'Tigrinya'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
@ -1032,6 +1125,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Turkish' # - title: 'Turkish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
@ -1043,6 +1137,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Uighur; Uyghur' # - title: 'Uighur; Uyghur'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
@ -1054,6 +1149,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Ukrainian' # - title: 'Ukrainian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
@ -1065,6 +1161,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Urdu' # - title: 'Urdu'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
@ -1076,6 +1173,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Uzbek' # - title: 'Uzbek'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
@ -1087,6 +1185,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Uzbek - Cyrillic' # - title: 'Uzbek - Cyrillic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
@ -1098,6 +1197,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Vietnamese' # - title: 'Vietnamese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
@ -1109,6 +1209,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Yiddish' # - title: 'Yiddish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
@ -1120,3 +1221,4 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'

View File

@ -13,7 +13,6 @@ from flask_paranoid import Paranoid
from flask_socketio import SocketIO from flask_socketio import SocketIO
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
from flask_hashids import Hashids from flask_hashids import Hashids
from werkzeug.exceptions import HTTPException
apifairy = APIFairy() apifairy = APIFairy()
@ -74,8 +73,10 @@ def create_app(config: Config = Config) -> Flask:
app.register_blueprint(contributions_blueprint, url_prefix='/contributions') app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
from .corpora import bp as corpora_blueprint from .corpora import bp as corpora_blueprint
from .corpora.cqi_over_sio import CQiNamespace
default_breadcrumb_root(corpora_blueprint, '.corpora') default_breadcrumb_root(corpora_blueprint, '.corpora')
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora') app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
socketio.on_namespace(CQiNamespace('/cqi_over_sio'))
from .errors import bp as errors_bp from .errors import bp as errors_bp
app.register_blueprint(errors_bp) app.register_blueprint(errors_bp)
@ -98,6 +99,9 @@ def create_app(config: Config = Config) -> Flask:
from .users import bp as users_blueprint from .users import bp as users_blueprint
default_breadcrumb_root(users_blueprint, '.users') default_breadcrumb_root(users_blueprint, '.users')
app.register_blueprint(users_blueprint, url_prefix='/users') app.register_blueprint(users_blueprint, cli_group='user', url_prefix='/users')
from .workshops import bp as workshops_blueprint
app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
return app return app

View File

@ -16,8 +16,8 @@ class CreateSpaCyNLPPipelineModelForm(ContributionBaseForm):
) )
def validate_spacy_model_file(self, field): def validate_spacy_model_file(self, field):
if not field.data.filename.lower().endswith('.tar.gz'): if not field.data.filename.lower().endswith(('.tar.gz', ('.whl'))):
raise ValidationError('.tar.gz files only!') raise ValidationError('.tar.gz or .whl files only!')
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs: if 'prefix' not in kwargs:

View File

@ -16,5 +16,4 @@ def before_request():
pass pass
from . import cli, cqi_over_socketio, files, followers, routes, json_routes from . import cli, files, followers, routes, json_routes
from . import cqi_over_sio

View File

@ -19,6 +19,9 @@ def reset():
for corpus in [x for x in Corpus.query.all() if x.status in status]: for corpus in [x for x in Corpus.query.all() if x.status in status]:
print(f'Resetting corpus {corpus}') print(f'Resetting corpus {corpus}')
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True) shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
os.mkdir(os.path.join(corpus.path, 'cwb'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
corpus.status = CorpusStatus.UNPREPARED corpus.status = CorpusStatus.UNPREPARED
corpus.num_analysis_sessions = 0 corpus.num_analysis_sessions = 0
db.session.commit() db.session.commit()

View File

@ -1,112 +1,206 @@
from cqi import CQiClient from cqi import CQiClient
from cqi.errors import CQiException from cqi.errors import CQiException
from flask import session from cqi.status import CQiStatus
from docker.models.containers import Container
from flask import current_app, session
from flask_login import current_user from flask_login import current_user
from flask_socketio import ConnectionRefusedError from flask_socketio import Namespace
from inspect import signature
from threading import Lock from threading import Lock
from app import db, hashids, socketio from typing import Callable, Dict, List, Optional
from app import db, docker_client, hashids, socketio
from app.decorators import socketio_login_required from app.decorators import socketio_login_required
from app.models import Corpus, CorpusStatus from app.models import Corpus, CorpusStatus
from . import extensions
''' '''
This package tunnels the Corpus Query interface (CQi) protocol through This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event. Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
This module only handles the SIO connect/disconnect, which handles the setup
and teardown of necessary ressources for later use. Each CQi function has a
corresponding SIO event. The event handlers are spread across the different
modules within this package.
Basic concept: Basic concept:
1. A client connects to the SIO namespace and provides the id of a corpus to be 1. A client connects to the "/cqi_over_sio" namespace.
analysed. 2. The client emits the "init" event and provides a corpus id for the corpus
that should be analysed in this session.
1.1 The analysis session counter of the corpus is incremented. 1.1 The analysis session counter of the corpus is incremented.
1.2 A CQiClient and a (Mutex) Lock belonging to it is created. 1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
1.3 Wait until the CQP server is running. 1.3 Wait until the CQP server is running.
1.4 Connect the CQiClient to the server. 1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient and the Lock in the session for subsequential use. 1.5 Save the CQiClient, the Lock and the corpus id in the session for
2. A client emits an event and may provide a single json object with necessary subsequential use.
arguments for the targeted CQi function. 2. The client emits the "exec" event provides the name of a CQi API function
3. A SIO event handler (decorated with cqi_over_socketio) gets executed. arguments (optional).
- The event handler function defines all arguments. Hence the client - The event "exec" handler will execute the function, make sure that the
is sent as a single json object, the decorator decomposes it to fit result is serializable and returns the result back to the client.
the functions signature. This also includes type checking and proper
use of the lock (acquire/release) mechanism.
4. Wait for more events 4. Wait for more events
5. The client disconnects from the SIO namespace 5. The client disconnects from the "/cqi_over_sio" namespace
1.1 The analysis session counter of the corpus is decremented. 1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down. 1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
''' '''
CQI_API_FUNCTION_NAMES: List[str] = [
NAMESPACE = '/cqi_over_sio' 'ask_feature_cl_2_3',
'ask_feature_cqi_1_0',
'ask_feature_cqp_2_3',
'cl_alg2cpos',
'cl_attribute_size',
'cl_cpos2alg',
'cl_cpos2id',
'cl_cpos2lbound',
'cl_cpos2rbound',
'cl_cpos2str',
'cl_cpos2struc',
'cl_drop_attribute',
'cl_id2cpos',
'cl_id2freq',
'cl_id2str',
'cl_idlist2cpos',
'cl_lexicon_size',
'cl_regex2id',
'cl_str2id',
'cl_struc2cpos',
'cl_struc2str',
'corpus_alignment_attributes',
'corpus_charset',
'corpus_drop_corpus',
'corpus_full_name',
'corpus_info',
'corpus_list_corpora',
'corpus_positional_attributes',
'corpus_properties',
'corpus_structural_attribute_has_values',
'corpus_structural_attributes',
'cqp_drop_subcorpus',
'cqp_dump_subcorpus',
'cqp_fdist_1',
'cqp_fdist_2',
'cqp_list_subcorpora',
'cqp_query',
'cqp_subcorpus_has_field',
'cqp_subcorpus_size',
'ctrl_bye',
'ctrl_connect',
'ctrl_last_general_error',
'ctrl_ping',
'ctrl_user_abort'
]
from .cqi import * # noqa class CQiNamespace(Namespace):
@socketio.on('connect', namespace=NAMESPACE)
@socketio_login_required @socketio_login_required
def connect(auth): def on_connect(self):
# the auth variable is used in a hacky way. It contains the corpus id for pass
# which a corpus analysis session should be started.
corpus_id = hashids.decode(auth['corpus_id']) @socketio_login_required
corpus = Corpus.query.get(corpus_id) def on_init(self, db_corpus_hashid: str):
if corpus is None: db_corpus_id: int = hashids.decode(db_corpus_hashid)
# return {'code': 404, 'msg': 'Not Found'} db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
raise ConnectionRefusedError('Not Found') if db_corpus is None:
if not (corpus.user == current_user return {'code': 404, 'msg': 'Not Found'}
or current_user.is_following_corpus(corpus) if not (db_corpus.user == current_user
or current_user.is_following_corpus(db_corpus)
or current_user.is_administrator()): or current_user.is_administrator()):
# return {'code': 403, 'msg': 'Forbidden'} return {'code': 403, 'msg': 'Forbidden'}
raise ConnectionRefusedError('Forbidden') if db_corpus.status not in [
if corpus.status not in [
CorpusStatus.BUILT, CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION, CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION, CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION CorpusStatus.CANCELING_ANALYSIS_SESSION
]: ]:
# return {'code': 424, 'msg': 'Failed Dependency'} return {'code': 424, 'msg': 'Failed Dependency'}
raise ConnectionRefusedError('Failed Dependency') if db_corpus.num_analysis_sessions is None:
if corpus.num_analysis_sessions is None: db_corpus.num_analysis_sessions = 0
corpus.num_analysis_sessions = 0
db.session.commit() db.session.commit()
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1 db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
db.session.commit() db.session.commit()
retry_counter = 20 retry_counter: int = 20
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION: while db_corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
if retry_counter == 0: if retry_counter == 0:
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit() db.session.commit()
return {'code': 408, 'msg': 'Request Timeout'} return {'code': 408, 'msg': 'Request Timeout'}
socketio.sleep(3) socketio.sleep(3)
retry_counter -= 1 retry_counter -= 1
db.session.refresh(corpus) db.session.refresh(db_corpus)
cqi_client = CQiClient(f'cqpserver_{corpus_id}') # cqi_client: CQiClient = CQiClient(f'cqpserver_{db_corpus_id}')
cqpserver_container_name: str = f'cqpserver_{db_corpus_id}'
cqpserver_container: Container = docker_client.containers.get(cqpserver_container_name)
cqpserver_host: str = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
cqi_client: CQiClient = CQiClient(cqpserver_host)
session['cqi_over_sio'] = { session['cqi_over_sio'] = {
'corpus_id': corpus_id,
'cqi_client': cqi_client, 'cqi_client': cqi_client,
'cqi_client_lock': Lock(), 'cqi_client_lock': Lock(),
'db_corpus_id': db_corpus_id
} }
# return {'code': 200, 'msg': 'OK'} return {'code': 200, 'msg': 'OK'}
@socketio_login_required
@socketio.on('disconnect', namespace=NAMESPACE) def on_exec(self, fn_name: str, fn_args: Dict = {}):
def disconnect():
try: try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock'] cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
except KeyError:
return {'code': 424, 'msg': 'Failed Dependency'}
if fn_name in CQI_API_FUNCTION_NAMES:
fn: Callable = getattr(cqi_client.api, fn_name)
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
fn: Callable = getattr(extensions, fn_name)
else:
return {'code': 400, 'msg': 'Bad Request'}
for param in signature(fn).parameters.values():
if param.default is param.empty:
if param.name not in fn_args:
return {'code': 400, 'msg': 'Bad Request'}
else:
if param.name not in fn_args:
continue
if type(fn_args[param.name]) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
cqi_client_lock.acquire()
try:
fn_return_value = fn(**fn_args)
except BrokenPipeError as e:
return {'code': 500, 'msg': 'Internal Server Error'}
except CQiException as e:
return {
'code': 502,
'msg': 'Bad Gateway',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.__class__.__name__
}
}
finally:
cqi_client_lock.release()
if isinstance(fn_return_value, CQiStatus):
payload = {
'code': fn_return_value.code,
'msg': fn_return_value.__class__.__name__
}
else:
payload = fn_return_value
return {'code': 200, 'msg': 'OK', 'payload': payload}
def on_disconnect(self):
try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
except KeyError: except KeyError:
return return
cqi_client_lock.acquire() cqi_client_lock.acquire()
try:
session.pop('cqi_over_sio')
except KeyError:
pass
try: try:
cqi_client.api.ctrl_bye() cqi_client.api.ctrl_bye()
except (BrokenPipeError, CQiException): except (BrokenPipeError, CQiException):
pass pass
cqi_client_lock.release() cqi_client_lock.release()
corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id']) db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1 if db_corpus is None:
return
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit() db.session.commit()
session.pop('cqi_over_sio')
# return {'code': 200, 'msg': 'OK'}

View File

@ -1,114 +0,0 @@
from cqi import CQiClient
from cqi.errors import CQiException
from cqi.status import CQiStatus
from flask import session
from inspect import signature
from threading import Lock
from typing import Callable, Dict, List
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
CQI_API_FUNCTIONS: List[str] = [
'ask_feature_cl_2_3',
'ask_feature_cqi_1_0',
'ask_feature_cqp_2_3',
'cl_alg2cpos',
'cl_attribute_size',
'cl_cpos2alg',
'cl_cpos2id',
'cl_cpos2lbound',
'cl_cpos2rbound',
'cl_cpos2str',
'cl_cpos2struc',
'cl_drop_attribute',
'cl_id2cpos',
'cl_id2freq',
'cl_id2str',
'cl_idlist2cpos',
'cl_lexicon_size',
'cl_regex2id',
'cl_str2id',
'cl_struc2cpos',
'cl_struc2str',
'corpus_alignment_attributes',
'corpus_charset',
'corpus_drop_corpus',
'corpus_full_name',
'corpus_info',
'corpus_list_corpora',
'corpus_positional_attributes',
'corpus_properties',
'corpus_structural_attribute_has_values',
'corpus_structural_attributes',
'cqp_drop_subcorpus',
'cqp_dump_subcorpus',
'cqp_fdist_1',
'cqp_fdist_2',
'cqp_list_subcorpora',
'cqp_query',
'cqp_subcorpus_has_field',
'cqp_subcorpus_size',
'ctrl_bye',
'ctrl_connect',
'ctrl_last_general_error',
'ctrl_ping',
'ctrl_user_abort'
]
@socketio.on('cqi_client.api', namespace=ns)
@socketio_login_required
def cqi_over_sio(fn_data):
try:
fn_name: str = fn_data['fn_name']
if fn_name not in CQI_API_FUNCTIONS:
raise KeyError
except KeyError:
return {'code': 400, 'msg': 'Bad Request'}
fn_name: str = fn_data['fn_name']
fn_args: Dict = fn_data.get('fn_args', {})
try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
except KeyError:
return {'code': 424, 'msg': 'Failed Dependency'}
fn: Callable = getattr(cqi_client.api, fn_name)
for param in signature(fn).parameters.values():
if param.default is param.empty:
if param.name not in fn_args:
return {'code': 400, 'msg': 'Bad Request'}
else:
if param.name not in fn_args:
continue
if type(fn_args[param.name]) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
cqi_client_lock.acquire()
try:
return_value = fn(**fn_args)
except BrokenPipeError:
return_value = {
'code': 500,
'msg': 'Internal Server Error'
}
except CQiException as e:
return_value = {
'code': 502,
'msg': 'Bad Gateway',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.__class__.__name__
}
}
finally:
cqi_client_lock.release()
if isinstance(return_value, CQiStatus):
payload = {
'code': return_value.code,
'msg': return_value.__class__.__name__
}
else:
payload = return_value
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -0,0 +1,288 @@
from collections import Counter
from cqi import CQiClient
from cqi.models.corpora import Corpus as CQiCorpus
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
from cqi.models.attributes import (
PositionalAttribute as CQiPositionalAttribute,
StructuralAttribute as CQiStructuralAttribute
)
from cqi.status import StatusOk as CQiStatusOk
from flask import session
from typing import Dict, List
import gzip
import json
import math
import os
from app import db
from app.models import Corpus
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
'ext_corpus_update_db',
'ext_corpus_static_data',
'ext_corpus_paginate_corpus',
'ext_cqp_paginate_subcorpus',
'ext_cqp_partial_export_subcorpus',
'ext_cqp_export_subcorpus',
]
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
db_corpus.num_tokens = cqi_corpus.size
db.session.commit()
return CQiStatusOk()
def ext_corpus_static_data(corpus: str) -> Dict:
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
if os.path.exists(static_data_file_path):
with open(static_data_file_path, 'rb') as f:
return f.read()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
cqi_p_attrs: List[CQiPositionalAttribute] = cqi_corpus.positional_attributes.list()
cqi_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list()
static_data = {
'corpus': {
'bounds': [0, cqi_corpus.size - 1],
'freqs': {}
},
'p_attrs': {},
's_attrs': {},
'values': {'p_attrs': {}, 's_attrs': {}}
}
for p_attr in cqi_p_attrs:
print(f'corpus.freqs.{p_attr.name}')
static_data['corpus']['freqs'][p_attr.name] = []
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list))
del p_attr_id_list
print(f'p_attrs.{p_attr.name}')
static_data['p_attrs'][p_attr.name] = []
cpos_list: List[int] = list(range(cqi_corpus.size))
static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list))
del cpos_list
print(f'values.p_attrs.{p_attr.name}')
static_data['values']['p_attrs'][p_attr.name] = []
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list))
del p_attr_id_list
for s_attr in cqi_s_attrs:
if s_attr.has_values:
continue
static_data['s_attrs'][s_attr.name] = {'lexicon': [], 'values': None}
if s_attr.name in ['s', 'ent']:
##############################################################
# A faster way to get cpos boundaries for smaller s_attrs #
# Note: Needs more testing, don't use it in production #
##############################################################
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
cqi_subcorpus: CQiSubcorpus = cqi_corpus.subcorpora.get('Last')
first_match: int = 0
last_match: int = cqi_subcorpus.size - 1
match_boundaries = zip(
range(first_match, last_match + 1),
cqi_subcorpus.dump(
cqi_subcorpus.fields['match'],
first_match,
last_match
),
cqi_subcorpus.dump(
cqi_subcorpus.fields['matchend'],
first_match,
last_match
)
)
cqi_subcorpus.drop()
del cqi_subcorpus, first_match, last_match
for id, lbound, rbound in match_boundaries:
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
del match_boundaries
if s_attr.name != 'text':
continue
for id in range(0, s_attr.size):
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
# This is a very slow operation, thats why we only use it for
# the text attribute
lbound, rbound = s_attr.cpos_by_id(id)
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
cpos_list: List[int] = list(range(lbound, rbound + 1))
for p_attr in cqi_p_attrs:
p_attr_ids: List[int] = []
p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list))
print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
del p_attr_ids
del cpos_list
sub_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
print(f's_attrs.{s_attr.name}.values')
static_data['s_attrs'][s_attr.name]['values'] = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
s_attr_id_list: List[int] = list(range(s_attr.size))
sub_s_attr_values: List[str] = []
for sub_s_attr in sub_s_attrs:
tmp = []
tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list))
sub_s_attr_values.append(tmp)
del tmp
del s_attr_id_list
print(f'values.s_attrs.{s_attr.name}')
static_data['values']['s_attrs'][s_attr.name] = [
{
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id]
for s_attr_value_name_idx, s_attr_value_name in enumerate(
static_data['s_attrs'][s_attr.name]['values']
)
} for s_attr_id in range(0, s_attr.size)
]
del sub_s_attr_values
print('Saving static data to file')
with gzip.open(static_data_file_path, 'wt') as f:
json.dump(static_data, f)
del static_data
print('Sending static data to client')
with open(static_data_file_path, 'rb') as f:
return f.read()
def ext_corpus_paginate_corpus(
corpus: str,
page: int = 1,
per_page: int = 20
) -> Dict:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_corpus.size > 0
and page > math.ceil(cqi_corpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
# the lookups for the items
payload['lookups'] = lookups
# the total number of items matching the query
payload['total'] = cqi_corpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return payload
def ext_cqp_paginate_subcorpus(
subcorpus: str,
context: int = 50,
page: int = 1,
per_page: int = 20
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_subcorpus.size > 0
and page > math.ceil(cqi_subcorpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
payload['items'] = cqi_results_export.pop('matches')
# the lookups for the items
payload['lookups'] = cqi_results_export
# the total number of items matching the query
payload['total'] = cqi_subcorpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return payload
def ext_cqp_partial_export_subcorpus(
subcorpus: str,
match_id_list: list,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return cqi_subcorpus_partial_export
def ext_cqp_export_subcorpus(
subcorpus: str,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
return cqi_subcorpus_export

View File

@ -0,0 +1,131 @@
from cqi.models.corpora import Corpus as CQiCorpus
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
from typing import Dict, List
def lookups_by_cpos(corpus: CQiCorpus, cpos_list: List[int]) -> Dict:
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
cpos_attr_values: List[str] = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
for attr in corpus.structural_attributes.list():
# We only want to iterate over non subattributes, identifiable by
# attr.has_values == False
if attr.has_values:
continue
cpos_attr_ids: List[int] = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1:
continue
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
if len(occured_attr_ids) == 0:
continue
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
if len(subattrs) == 0:
continue
lookup_name: str = f'{attr.name}_lookup'
lookups[lookup_name] = {}
for attr_id in occured_attr_ids:
lookups[lookup_name][attr_id] = {}
for subattr in subattrs:
subattr_name = subattr.name[(len(attr.name) + 1):] # noqa
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
return lookups
def partial_export_subcorpus(
subcorpus: CQiSubcorpus,
match_id_list: List[int],
context: int = 25
) -> Dict:
if subcorpus.size == 0:
return {"matches": []}
match_boundaries = []
for match_id in match_id_list:
if match_id < 0 or match_id >= subcorpus.size:
continue
match_boundaries.append(
(
match_id,
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
)
)
cpos_set = set()
matches = []
for match_boundary in match_boundaries:
match_num, match_start, match_end = match_boundary
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}
def export_subcorpus(
subcorpus: CQiSubcorpus,
context: int = 25,
cutoff: float = float('inf'),
offset: int = 0
) -> Dict:
if subcorpus.size == 0:
return {"matches": []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
match_boundaries = zip(
range(first_match, last_match + 1),
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
for match_num, match_start, match_end in match_boundaries:
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}

View File

@ -1,115 +0,0 @@
from flask import session
from flask_login import current_user
from flask_socketio import ConnectionRefusedError
from threading import Lock
import cqi
from app import db, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusStatus
'''
This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
This module only handles the SIO connect/disconnect, which handles the setup
and teardown of necessary ressources for later use. Each CQi function has a
corresponding SIO event. The event handlers are spread across the different
modules within this package.
Basic concept:
1. A client connects to the SIO namespace and provides the id of a corpus to be
analysed.
1.1 The analysis session counter of the corpus is incremented.
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
1.3 Wait until the CQP server is running.
1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient and the Lock in the session for subsequential use.
2. A client emits an event and may provide a single json object with necessary
arguments for the targeted CQi function.
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
- The event handler function defines all arguments. Hence the client
is sent as a single json object, the decorator decomposes it to fit
the functions signature. This also includes type checking and proper
use of the lock (acquire/release) mechanism.
4. Wait for more events
5. The client disconnects from the SIO namespace
1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
'''
NAMESPACE = '/corpora/corpus/corpus_analysis'
# Import all CQi over Socket.IO event handlers
from .cqi_corpora_corpus_subcorpora import * # noqa
from .cqi_corpora_corpus_structural_attributes import * # noqa
from .cqi_corpora_corpus_positional_attributes import * # noqa
from .cqi_corpora_corpus_alignment_attributes import * # noqa
from .cqi_corpora_corpus import * # noqa
from .cqi_corpora import * # noqa
from .cqi import * # noqa
@socketio.on('connect', namespace=NAMESPACE)
@socketio_login_required
def connect(auth):
# the auth variable is used in a hacky way. It contains the corpus id for
# which a corpus analysis session should be started.
corpus_id = hashids.decode(auth['corpus_id'])
corpus = Corpus.query.get(corpus_id)
if corpus is None:
# return {'code': 404, 'msg': 'Not Found'}
raise ConnectionRefusedError('Not Found')
if not (corpus.user == current_user
or current_user.is_following_corpus(corpus)
or current_user.is_administrator()):
# return {'code': 403, 'msg': 'Forbidden'}
raise ConnectionRefusedError('Forbidden')
if corpus.status not in [
CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION
]:
# return {'code': 424, 'msg': 'Failed Dependency'}
raise ConnectionRefusedError('Failed Dependency')
if corpus.num_analysis_sessions is None:
corpus.num_analysis_sessions = 0
db.session.commit()
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
db.session.commit()
retry_counter = 20
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
if retry_counter == 0:
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
return {'code': 408, 'msg': 'Request Timeout'}
socketio.sleep(3)
retry_counter -= 1
db.session.refresh(corpus)
cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}')
session['d'] = {
'corpus_id': corpus_id,
'cqi_client': cqi_client,
'cqi_client_lock': Lock(),
}
# return {'code': 200, 'msg': 'OK'}
@socketio.on('disconnect', namespace=NAMESPACE)
def disconnect():
if 'd' not in session:
return
session['d']['cqi_client_lock'].acquire()
try:
session['d']['cqi_client'].disconnect()
except (BrokenPipeError, cqi.errors.CQiException):
pass
session['d']['cqi_client_lock'].release()
corpus = Corpus.query.get(session['d']['corpus_id'])
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
session.pop('d')
# return {'code': 200, 'msg': 'OK'}

View File

@ -1,43 +0,0 @@
from socket import gaierror
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.connect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_connect(cqi_client: cqi.CQiClient):
try:
cqi_status = cqi_client.connect()
except gaierror as e:
return {
'code': 500,
'msg': 'Internal Server Error',
'payload': {'code': e.args[0], 'desc': e.args[1]}
}
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.disconnect', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_disconnect(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.disconnect()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.ping', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_ping(cqi_client: cqi.CQiClient):
cqi_status = cqi_client.ping()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,22 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = {**cqi_corpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_list(cqi_client: cqi.CQiClient):
payload = [{**x.attrs} for x in cqi_client.corpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,199 +0,0 @@
from collections import Counter
from flask import session
import cqi
import json
import math
import os
from app import db, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, lookups_by_cpos
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.drop()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_status = cqi_corpus.query(subcorpus_name, query)
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
cqi_corpus = cqi_client.corpora.get(corpus_name)
corpus.num_tokens = cqi_corpus.size
db.session.commit()
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
corpus = Corpus.query.get(session['d']['corpus_id'])
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
if os.path.exists(visualization_data_file_path):
with open(visualization_data_file_path, 'r') as f:
payload = json.load(f)
return {'code': 200, 'msg': 'OK', 'payload': payload}
cqi_corpus = cqi_client.corpora.get(corpus_name)
##########################################################################
# A faster way to get cpos boundaries for smaller s_attrs #
##########################################################################
# cqi_corpus.query('Last', '<s> []* </s>;')
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
# print(cqi_subcorpus.size)
# first_match = 0
# last_match = cqi_subcorpus.attrs['size'] - 1
# match_boundaries = zip(
# list(range(first_match, last_match + 1)),
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
# )
# for x in match_boundaries:
# print(x)
cqi_p_attrs = {
p_attr.name: p_attr
for p_attr in cqi_corpus.positional_attributes.list()
}
cqi_s_attrs = {
s_attr.name: s_attr
for s_attr in cqi_corpus.structural_attributes.list()
}
payload = {
'corpus': {
'bounds': [0, cqi_corpus.size - 1],
'counts': {
'token': cqi_corpus.size
},
'freqs': {}
},
'p_attrs': {},
's_attrs': {},
'values': {'p_attrs': {}, 's_attrs': {}}
}
for p_attr in cqi_p_attrs.values():
payload['corpus']['freqs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
payload['p_attrs'][p_attr.name] = dict(
zip(
range(0, cqi_corpus.size),
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
)
)
payload['values']['p_attrs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
for s_attr in cqi_s_attrs.values():
if s_attr.has_values:
continue
payload['corpus']['counts'][s_attr.name] = s_attr.size
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
payload['values']['s_attrs'][s_attr.name] = {}
for id in range(0, s_attr.size):
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
lbound, rbound = s_attr.cpos_by_id(id)
payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
if s_attr.name not in ['text', 's']:
continue
cpos_range = range(lbound, rbound + 1)
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
if s_attr.name != 'text':
continue
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
for p_attr in cqi_p_attrs.values():
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
s_attr_value_names = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
sub_s_attr_values = [
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
for sub_s_attr in sub_s_attrs
]
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
payload['values']['s_attrs'][s_attr.name] = {
s_attr_id: {
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
for s_attr_value_name_idx, s_attr_value_name in enumerate(
payload['s_attrs'][s_attr.name]['values']
)
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
}
with open(visualization_data_file_path, 'w') as f:
json.dump(payload, f)
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_corpus.size > 0
and page > math.ceil(cqi_corpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
# the lookups for the items
payload['lookups'] = lookups
# the total number of items matching the query
payload['total'] = cqi_corpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
payload = {**cqi_alignment_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
payload = {**cqi_positional_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,24 +0,0 @@
import cqi
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
payload = {**cqi_structural_attribute.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,140 +0,0 @@
import cqi
import math
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
payload = {**cqi_subcorpus.attrs}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_status = cqi_subcorpus.drop()
payload = {'code': cqi_status.code,
'msg': cqi_status.__class__.__name__}
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, field: int, first: int, last: int): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
payload = cqi_subcorpus.dump(field, first, last)
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_name: str, positional_attribute_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
field = cqi_subcorpus.fields[field_name]
pos_attr = cqi_corpus.positional_attributes.get(positional_attribute_name)
payload = cqi_subcorpus.fdist_1(cutoff, field, pos_attr)
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_1_name: str, positional_attribute_1_name: str, field_2_name: str, positional_attribute_2_name: str): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
field_1 = cqi_subcorpus.fields[field_1_name]
pos_attr_1 = cqi_corpus.positional_attributes.get(positional_attribute_1_name)
field_2 = cqi_subcorpus.fields[field_2_name]
pos_attr_2 = cqi_corpus.positional_attributes.get(positional_attribute_2_name)
payload = cqi_subcorpus.fdist_2(cutoff, field_1, pos_attr_1, field_2, pos_attr_2)
return {'code': 200, 'msg': 'OK', 'payload': payload}
###############################################################################
# nopaque specific CQi extensions #
###############################################################################
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_subcorpus.attrs['size'] > 0
and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
payload['items'] = cqi_results_export.pop('matches')
# the lookups for the items
payload['lookups'] = cqi_results_export
# the total number of items matching the query
payload['total'] = cqi_subcorpus.attrs['size']
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return {'code': 200, 'msg': 'OK', 'payload': payload}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
@socketio_login_required
@cqi_over_socketio
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}

View File

@ -1,178 +0,0 @@
from flask import session
from functools import wraps
from inspect import signature
import cqi
def cqi_over_socketio(f):
@wraps(f)
def wrapped(*args):
if 'd' not in session:
return {'code': 424, 'msg': 'Failed Dependency'}
f_args = {}
# Check for missing args and if all provided args are of the right type
for param in signature(f).parameters.values():
if param.name == 'corpus_name':
f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}'
continue
if param.name == 'cqi_client':
f_args[param.name] = session['d']['cqi_client']
continue
if param.default is param.empty:
# args
if param.name not in args[0]:
return {'code': 400, 'msg': 'Bad Request'}
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
else:
# kwargs
if param.name not in args[0]:
continue
arg = args[0][param.name]
if type(arg) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
f_args[param.name] = arg
session['d']['cqi_client_lock'].acquire()
try:
return_value = f(**f_args)
except BrokenPipeError:
return_value = {
'code': 500,
'msg': 'Internal Server Error'
}
except cqi.errors.CQiException as e:
return_value = {
'code': 500,
'msg': 'Internal Server Error',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.__class__.__name__
}
}
finally:
session['d']['cqi_client_lock'].release()
return return_value
return wrapped
def lookups_by_cpos(corpus, cpos_list):
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
cpos_attr_values = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
cpos_attr_values[i]
for attr in corpus.structural_attributes.list():
# We only want to iterate over non subattributes, identifiable by
# attr.attrs['has_values'] == False
if attr.attrs['has_values']:
continue
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1:
continue
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
if not occured_attr_ids:
continue
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
if not subattrs:
continue
lookup_name = f'{attr.attrs["name"]}_lookup'
lookups[lookup_name] = {}
for attr_id in occured_attr_ids:
lookups[lookup_name][attr_id] = {}
for subattr in subattrs:
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
return lookups
def partial_export_subcorpus(subcorpus, match_id_list, context=25):
if subcorpus.attrs['size'] == 0:
return {"matches": []}
match_boundaries = []
for match_id in match_id_list:
if match_id < 0 or match_id >= subcorpus.attrs['size']:
continue
match_boundaries.append(
(
match_id,
subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0]
)
)
cpos_set = set()
matches = []
for match_boundary in match_boundaries:
match_num, match_start, match_end = match_boundary
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.attrs['size'] - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
if subcorpus.attrs['size'] == 0:
return {"matches": []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
match_boundaries = zip(
list(range(first_match, last_match + 1)),
subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match),
subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
for match_num, match_start, match_end in match_boundaries:
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.attrs['size'] - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}

View File

@ -12,65 +12,65 @@ from ..decorators import corpus_follower_permission_required
from . import bp from . import bp
# @bp.route('/<hashid:corpus_id>/followers', methods=['POST']) @bp.route('/<hashid:corpus_id>/followers', methods=['POST'])
# @corpus_follower_permission_required('MANAGE_FOLLOWERS') @corpus_follower_permission_required('MANAGE_FOLLOWERS')
# @content_negotiation(consumes='application/json', produces='application/json') @content_negotiation(consumes='application/json', produces='application/json')
# def create_corpus_followers(corpus_id): def create_corpus_followers(corpus_id):
# usernames = request.json usernames = request.json
# if not (isinstance(usernames, list) or all(isinstance(u, str) for u in usernames)): if not (isinstance(usernames, list) or all(isinstance(u, str) for u in usernames)):
# abort(400) abort(400)
# corpus = Corpus.query.get_or_404(corpus_id) corpus = Corpus.query.get_or_404(corpus_id)
# for username in usernames: for username in usernames:
# user = User.query.filter_by(username=username, is_public=True).first_or_404() user = User.query.filter_by(username=username, is_public=True).first_or_404()
# user.follow_corpus(corpus) user.follow_corpus(corpus)
# db.session.commit() db.session.commit()
# response_data = { response_data = {
# 'message': f'Users are now following "{corpus.title}"', 'message': f'Users are now following "{corpus.title}"',
# 'category': 'corpus' 'category': 'corpus'
# } }
# return response_data, 200 return response_data, 200
# @bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>/role', methods=['PUT']) @bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>/role', methods=['PUT'])
# @corpus_follower_permission_required('MANAGE_FOLLOWERS') @corpus_follower_permission_required('MANAGE_FOLLOWERS')
# @content_negotiation(consumes='application/json', produces='application/json') @content_negotiation(consumes='application/json', produces='application/json')
# def update_corpus_follower_role(corpus_id, follower_id): def update_corpus_follower_role(corpus_id, follower_id):
# role_name = request.json role_name = request.json
# if not isinstance(role_name, str): if not isinstance(role_name, str):
# abort(400) abort(400)
# cfr = CorpusFollowerRole.query.filter_by(name=role_name).first() cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
# if cfr is None: if cfr is None:
# abort(400) abort(400)
# cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404() cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
# cfa.role = cfr cfa.role = cfr
# db.session.commit() db.session.commit()
# response_data = { response_data = {
# 'message': f'User "{cfa.follower.username}" is now {cfa.role.name}', 'message': f'User "{cfa.follower.username}" is now {cfa.role.name}',
# 'category': 'corpus' 'category': 'corpus'
# } }
# return response_data, 200 return response_data, 200
# @bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>', methods=['DELETE']) @bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>', methods=['DELETE'])
# def delete_corpus_follower(corpus_id, follower_id): def delete_corpus_follower(corpus_id, follower_id):
# cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404() cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
# if not ( if not (
# current_user.id == follower_id current_user.id == follower_id
# or current_user == cfa.corpus.user or current_user == cfa.corpus.user
# or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS') or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
# or current_user.is_administrator()): or current_user.is_administrator()):
# abort(403) abort(403)
# if current_user.id == follower_id: if current_user.id == follower_id:
# flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus') flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')
# response = make_response() response = make_response()
# response.status_code = 204 response.status_code = 204
# else: else:
# response_data = { response_data = {
# 'message': f'"{cfa.follower.username}" is not following "{cfa.corpus.title}" anymore', 'message': f'"{cfa.follower.username}" is not following "{cfa.corpus.title}" anymore',
# 'category': 'corpus' 'category': 'corpus'
# } }
# response = jsonify(response_data) response = jsonify(response_data)
# response.status_code = 200 response.status_code = 200
# cfa.follower.unfollow_corpus(cfa.corpus) cfa.follower.unfollow_corpus(cfa.corpus)
# db.session.commit() db.session.commit()
# return response return response

View File

@ -7,6 +7,8 @@ from app.decorators import content_negotiation
from app.models import Corpus, CorpusFollowerRole from app.models import Corpus, CorpusFollowerRole
from . import bp from . import bp
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
import nltk
from string import punctuation
@bp.route('/<hashid:corpus_id>', methods=['DELETE']) @bp.route('/<hashid:corpus_id>', methods=['DELETE'])
@ -56,56 +58,68 @@ def build_corpus(corpus_id):
} }
return response_data, 202 return response_data, 202
@bp.route('/stopwords')
@content_negotiation(produces='application/json')
def get_stopwords():
nltk.download('stopwords', quiet=True)
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
stopwords = {}
for language in languages:
stopwords[language] = nltk.corpus.stopwords.words(language)
stopwords['punctuation'] = list(punctuation) + ['', '|', '', '', '', '--']
stopwords['user_stopwords'] = []
response_data = stopwords
return response_data, 202
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST']) @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
# @corpus_follower_permission_required('MANAGE_FOLLOWERS') @corpus_follower_permission_required('MANAGE_FOLLOWERS')
# @content_negotiation(consumes='application/json', produces='application/json') @content_negotiation(consumes='application/json', produces='application/json')
# def generate_corpus_share_link(corpus_id): def generate_corpus_share_link(corpus_id):
# data = request.json data = request.json
# if not isinstance(data, dict): if not isinstance(data, dict):
# abort(400) abort(400)
# expiration = data.get('expiration') expiration = data.get('expiration')
# if not isinstance(expiration, str): if not isinstance(expiration, str):
# abort(400) abort(400)
# role_name = data.get('role') role_name = data.get('role')
# if not isinstance(role_name, str): if not isinstance(role_name, str):
# abort(400) abort(400)
# expiration_date = datetime.strptime(expiration, '%b %d, %Y') expiration_date = datetime.strptime(expiration, '%b %d, %Y')
# cfr = CorpusFollowerRole.query.filter_by(name=role_name).first() cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
# if cfr is None: if cfr is None:
# abort(400) abort(400)
# corpus = Corpus.query.get_or_404(corpus_id) corpus = Corpus.query.get_or_404(corpus_id)
# token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date) token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date)
# corpus_share_link = url_for( corpus_share_link = url_for(
# 'corpora.follow_corpus', 'corpora.follow_corpus',
# corpus_id=corpus_id, corpus_id=corpus_id,
# token=token, token=token,
# _external=True _external=True
# ) )
# response_data = { response_data = {
# 'message': 'Corpus share link generated', 'message': 'Corpus share link generated',
# 'category': 'corpus', 'category': 'corpus',
# 'corpusShareLink': corpus_share_link 'corpusShareLink': corpus_share_link
# } }
# return response_data, 200 return response_data, 200
# @bp.route('/<hashid:corpus_id>/is_public', methods=['PUT']) @bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
# @corpus_owner_or_admin_required @corpus_owner_or_admin_required
# @content_negotiation(consumes='application/json', produces='application/json') @content_negotiation(consumes='application/json', produces='application/json')
# def update_corpus_is_public(corpus_id): def update_corpus_is_public(corpus_id):
# is_public = request.json is_public = request.json
# if not isinstance(is_public, bool): if not isinstance(is_public, bool):
# abort(400) abort(400)
# corpus = Corpus.query.get_or_404(corpus_id) corpus = Corpus.query.get_or_404(corpus_id)
# corpus.is_public = is_public corpus.is_public = is_public
# db.session.commit() db.session.commit()
# response_data = { response_data = {
# 'message': ( 'message': (
# f'Corpus "{corpus.title}" is now' f'Corpus "{corpus.title}" is now'
# f' {"public" if is_public else "private"}' f' {"public" if is_public else "private"}'
# ), ),
# 'category': 'corpus' 'category': 'corpus'
# } }
# return response_data, 200 return response_data, 200

View File

@ -71,17 +71,16 @@ def corpus(corpus_id):
users=users users=users
) )
if (current_user.is_following_corpus(corpus) or corpus.is_public): if (current_user.is_following_corpus(corpus) or corpus.is_public):
abort(404) cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all()
# cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all() return render_template(
# return render_template( 'corpora/public_corpus.html.j2',
# 'corpora/public_corpus.html.j2', title=corpus.title,
# title=corpus.title, corpus=corpus,
# corpus=corpus, cfrs=cfrs,
# cfrs=cfrs, cfr=cfr,
# cfr=cfr, cfas=cfas,
# cfas=cfas, users=users
# users = users )
# )
abort(403) abort(403)
@ -98,14 +97,14 @@ def analysis(corpus_id):
) )
# @bp.route('/<hashid:corpus_id>/follow/<token>') @bp.route('/<hashid:corpus_id>/follow/<token>')
# def follow_corpus(corpus_id, token): def follow_corpus(corpus_id, token):
# corpus = Corpus.query.get_or_404(corpus_id) corpus = Corpus.query.get_or_404(corpus_id)
# if current_user.follow_corpus_by_token(token): if current_user.follow_corpus_by_token(token):
# db.session.commit() db.session.commit()
# flash(f'You are following "{corpus.title}" now', category='corpus') flash(f'You are following "{corpus.title}" now', category='corpus')
# return redirect(url_for('corpora.corpus', corpus_id=corpus_id)) return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
# abort(403) abort(403)
@bp.route('/import', methods=['GET', 'POST']) @bp.route('/import', methods=['GET', 'POST'])

View File

@ -28,24 +28,24 @@ def _create_build_corpus_service(corpus):
''' ## Command ## ''' ''' ## Command ## '''
command = ['bash', '-c'] command = ['bash', '-c']
command.append( command.append(
f'mkdir /corpora/data/nopaque_{corpus.id}' f'mkdir /corpora/data/nopaque-{corpus.hashid.lower()}'
' && ' ' && '
'cwb-encode' 'cwb-encode'
' -c utf8' ' -c utf8'
f' -d /corpora/data/nopaque_{corpus.id}' f' -d /corpora/data/nopaque-{corpus.hashid.lower()}'
' -f /root/files/corpus.vrt' ' -f /root/files/corpus.vrt'
f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}' f' -R /usr/local/share/cwb/registry/nopaque-{corpus.hashid.lower()}'
' -P pos -P lemma -P simple_pos' ' -P pos -P lemma -P simple_pos'
' -S ent:0+type -S s:0' ' -S ent:0+type -S s:0'
' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title' ' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
' -xsB -9' ' -xsB -9'
' && ' ' && '
f'cwb-make -V NOPAQUE_{corpus.id}' f'cwb-make -V NOPAQUE-{corpus.hashid.upper()}'
) )
''' ## Constraints ## ''' ''' ## Constraints ## '''
constraints = ['node.role==worker'] constraints = ['node.role==worker']
''' ## Image ## ''' ''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702' image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
''' ## Labels ## ''' ''' ## Labels ## '''
labels = { labels = {
'origin': current_app.config['SERVER_NAME'], 'origin': current_app.config['SERVER_NAME'],
@ -139,21 +139,25 @@ def _create_cqpserver_container(corpus):
''' ## Entrypoint ## ''' ''' ## Entrypoint ## '''
entrypoint = ['bash', '-c'] entrypoint = ['bash', '-c']
''' ## Image ## ''' ''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702' image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
''' ## Name ## ''' ''' ## Name ## '''
name = f'cqpserver_{corpus.id}' name = f'cqpserver_{corpus.id}'
''' ## Network ## ''' ''' ## Network ## '''
network = f'{current_app.config["DOCKER_NETWORK_NAME"]}' network = f'{current_app.config["NOPAQUE_DOCKER_NETWORK_NAME"]}'
''' ## Volumes ## ''' ''' ## Volumes ## '''
volumes = [] volumes = []
''' ### Corpus data volume ### ''' ''' ### Corpus data volume ### '''
data_volume_source = os.path.join(corpus.path, 'cwb', 'data') data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
data_volume_target = '/corpora/data' data_volume_target = '/corpora/data'
# data_volume_source = os.path.join(corpus.path, 'cwb', 'data', f'nopaque_{corpus.id}')
# data_volume_target = f'/corpora/data/nopaque_{corpus.hashid.lower()}'
data_volume = f'{data_volume_source}:{data_volume_target}:rw' data_volume = f'{data_volume_source}:{data_volume_target}:rw'
volumes.append(data_volume) volumes.append(data_volume)
''' ### Corpus registry volume ### ''' ''' ### Corpus registry volume ### '''
registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry') registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_volume_target = '/usr/local/share/cwb/registry' registry_volume_target = '/usr/local/share/cwb/registry'
# registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry', f'nopaque_{corpus.id}')
# registry_volume_target = f'/usr/local/share/cwb/registry/nopaque_{corpus.hashid.lower()}'
registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw' registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
volumes.append(registry_volume) volumes.append(registry_volume)
# Check if a cqpserver container already exists. If this is the case, # Check if a cqpserver container already exists. If this is the case,

View File

@ -42,7 +42,6 @@ def job_log(job_id):
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file: with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
log = log_file.read() log = log_file.read()
response_data = { response_data = {
'message': '',
'jobLog': log 'jobLog': log
} }
return response_data, 200 return response_data, 200

View File

@ -43,3 +43,5 @@ def deploy():
SpaCyNLPPipelineModel.insert_defaults() SpaCyNLPPipelineModel.insert_defaults()
print('Insert/Update default TesseractOCRPipelineModels') print('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRPipelineModel.insert_defaults() TesseractOCRPipelineModel.insert_defaults()
# TODO: Implement checks for if the nopaque network exists

View File

@ -45,12 +45,6 @@ def dashboard():
) )
# @bp.route('/user_manual')
# @register_breadcrumb(bp, '.user_manual', '<i class="material-icons left">help</i>User manual')
# def user_manual():
# return render_template('main/user_manual.html.j2', title='User manual')
@bp.route('/news') @bp.route('/news')
@register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News') @register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News')
def news(): def news():
@ -78,15 +72,17 @@ def terms_of_use():
) )
# @bp.route('/social-area') @bp.route('/social-area')
# @register_breadcrumb(bp, '.social_area', '<i class="material-icons left">group</i>Social Area') @register_breadcrumb(bp, '.social_area', '<i class="material-icons left">group</i>Social Area')
# @login_required @login_required
# def social_area(): def social_area():
# corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all() print('test')
# users = User.query.filter(User.is_public == True, User.id != current_user.id).all() corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
# return render_template( print(corpora)
# 'main/social_area.html.j2', users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
# title='Social Area', return render_template(
# corpora=corpora, 'main/social_area.html.j2',
# users=users title='Social Area',
# ) corpora=corpora,
users=users
)

View File

@ -853,7 +853,7 @@ class User(HashidMixin, UserMixin, db.Model):
json_serializeable = { json_serializeable = {
'id': self.hashid, 'id': self.hashid,
'confirmed': self.confirmed, 'confirmed': self.confirmed,
# 'avatar': url_for('users.user_avatar', user_id=self.id), 'avatar': url_for('users.user_avatar', user_id=self.id),
'email': self.email, 'email': self.email,
'last_seen': ( 'last_seen': (
None if self.last_seen is None None if self.last_seen is None
@ -953,7 +953,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
return self.user.hashid return self.user.hashid
@staticmethod @staticmethod
def insert_defaults(): def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first() nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join( defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.path.dirname(os.path.abspath(__file__)),
@ -966,6 +966,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
if model is not None: if model is not None:
model.compatible_service_versions = m['compatible_service_versions'] model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description'] model.description = m['description']
model.filename = f'{model.id}.traineddata'
model.publisher = m['publisher'] model.publisher = m['publisher']
model.publisher_url = m['publisher_url'] model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url'] model.publishing_url = m['publishing_url']
@ -973,7 +974,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
model.is_public = True model.is_public = True
model.title = m['title'] model.title = m['title']
model.version = m['version'] model.version = m['version']
continue else:
model = TesseractOCRPipelineModel( model = TesseractOCRPipelineModel(
compatible_service_versions=m['compatible_service_versions'], compatible_service_versions=m['compatible_service_versions'],
description=m['description'], description=m['description'],
@ -990,6 +991,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.flush(objects=[model]) db.session.flush(objects=[model])
db.session.refresh(model) db.session.refresh(model)
model.filename = f'{model.id}.traineddata' model.filename = f'{model.id}.traineddata'
if not os.path.exists(model.path) or force_download:
r = requests.get(m['url'], stream=True) r = requests.get(m['url'], stream=True)
pbar = tqdm( pbar = tqdm(
desc=f'{model.title} ({model.filename})', desc=f'{model.title} ({model.filename})',
@ -1080,7 +1082,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
return self.user.hashid return self.user.hashid
@staticmethod @staticmethod
def insert_defaults(): def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first() nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join( defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.path.dirname(os.path.abspath(__file__)),
@ -1093,6 +1095,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
if model is not None: if model is not None:
model.compatible_service_versions = m['compatible_service_versions'] model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description'] model.description = m['description']
model.filename = m['url'].split('/')[-1]
model.publisher = m['publisher'] model.publisher = m['publisher']
model.publisher_url = m['publisher_url'] model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url'] model.publishing_url = m['publishing_url']
@ -1101,10 +1104,11 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
model.title = m['title'] model.title = m['title']
model.version = m['version'] model.version = m['version']
model.pipeline_name = m['pipeline_name'] model.pipeline_name = m['pipeline_name']
continue else:
model = SpaCyNLPPipelineModel( model = SpaCyNLPPipelineModel(
compatible_service_versions=m['compatible_service_versions'], compatible_service_versions=m['compatible_service_versions'],
description=m['description'], description=m['description'],
filename=m['url'].split('/')[-1],
publisher=m['publisher'], publisher=m['publisher'],
publisher_url=m['publisher_url'], publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'], publishing_url=m['publishing_url'],
@ -1118,7 +1122,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.add(model) db.session.add(model)
db.session.flush(objects=[model]) db.session.flush(objects=[model])
db.session.refresh(model) db.session.refresh(model)
model.filename = m['url'].split('/')[-1] if not os.path.exists(model.path) or force_download:
r = requests.get(m['url'], stream=True) r = requests.get(m['url'], stream=True)
pbar = tqdm( pbar = tqdm(
desc=f'{model.title} ({model.filename})', desc=f'{model.title} ({model.filename})',
@ -1607,9 +1611,14 @@ class Corpus(HashidMixin, db.Model):
return corpus return corpus
def build(self): def build(self):
build_dir = os.path.join(self.path, 'cwb')
shutil.rmtree(build_dir, ignore_errors=True)
os.mkdir(build_dir)
os.mkdir(os.path.join(build_dir, 'data'))
os.mkdir(os.path.join(build_dir, 'registry'))
corpus_element = ET.fromstring('<corpus>\n</corpus>') corpus_element = ET.fromstring('<corpus>\n</corpus>')
for corpus_file in self.files: for corpus_file in self.files:
normalized_vrt_path = os.path.join(self.path, 'cwb', f'{corpus_file.id}.norm.vrt') normalized_vrt_path = os.path.join(build_dir, f'{corpus_file.id}.norm.vrt')
try: try:
normalize_vrt_file(corpus_file.path, normalized_vrt_path) normalize_vrt_file(corpus_file.path, normalized_vrt_path)
except: except:
@ -1636,7 +1645,7 @@ class Corpus(HashidMixin, db.Model):
# corpus_element.insert(1, text_element) # corpus_element.insert(1, text_element)
corpus_element.append(text_element) corpus_element.append(text_element)
ET.ElementTree(corpus_element).write( ET.ElementTree(corpus_element).write(
os.path.join(self.path, 'cwb', 'corpus.vrt'), os.path.join(build_dir, 'corpus.vrt'),
encoding='utf-8' encoding='utf-8'
) )
self.status = CorpusStatus.SUBMITTED self.status = CorpusStatus.SUBMITTED

View File

@ -10,7 +10,7 @@ file-setup-pipeline:
tesseract-ocr-pipeline: tesseract-ocr-pipeline:
name: 'Tesseract OCR Pipeline' name: 'Tesseract OCR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF' publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.1' latest_version: '0.1.2'
versions: versions:
0.1.0: 0.1.0:
methods: methods:
@ -23,6 +23,12 @@ tesseract-ocr-pipeline:
- 'ocropus_nlbin_threshold' - 'ocropus_nlbin_threshold'
publishing_year: 2022 publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1' url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'binarization'
- 'ocropus_nlbin_threshold'
publishing_year: 2023
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
transkribus-htr-pipeline: transkribus-htr-pipeline:
name: 'Transkribus HTR Pipeline' name: 'Transkribus HTR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF' publisher: 'Bielefeld University - CRC 1288 - INF'
@ -41,7 +47,7 @@ transkribus-htr-pipeline:
spacy-nlp-pipeline: spacy-nlp-pipeline:
name: 'SpaCy NLP Pipeline' name: 'SpaCy NLP Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF' publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.2' latest_version: '0.1.1'
versions: versions:
0.1.0: 0.1.0:
methods: methods:
@ -56,5 +62,5 @@ spacy-nlp-pipeline:
0.1.2: 0.1.2:
methods: methods:
- 'encoding_detection' - 'encoding_detection'
publishing_year: 2022 publishing_year: 2024
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2' url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'

View File

@ -1,132 +1,108 @@
.modal-conent { #corpus-analysis-concordance-query-builder-input-field {
border-bottom: #9E9E9E 1px solid;
min-height: 38px;
margin-top: 23px;
}
#corpus-analysis-concordance-query-builder-input-field-placeholder {
color: #9E9E9E;
}
.modal-content {
overflow-x: hidden; overflow-x: hidden;
} }
#concordance-query-builder { #corpus-analysis-concordance-positional-attr-modal, #corpus-analysis-concordance-corpus-analysis-concordance-structural-attr-modal {
width: 70%; width: 70%;
} }
#concordance-query-builder nav { #corpus-analysis-concordance-general-options-query-builder-tutorial-info-icon {
background-color: #6B3F89;
margin-top: -25px;
margin-left: -25px;
width: 105%;
}
#query-builder-nav{
padding-left: 15px;
}
#close-query-builder {
margin-right: 50px;
cursor: pointer;
}
#general-options-query-builder-tutorial-info-icon {
color: black; color: black;
} }
#your-query { #corpus-analysis-concordance-insert-query-button {
border-bottom-style: solid;
border-bottom-width: 1px;
}
#insert-query-button {
background-color: #00426f; background-color: #00426f;
text-align: center; text-align: center;
} }
#structural-attr h6 { .attr-modal-header {
margin-left: 15px;
}
#add-structural-attribute-tutorial-info-icon {
color: black;
}
#sentence {
background-color:#FD9720;
}
#entity {
background-color: #A6E22D;
}
#text-annotation {
background-color: #2FBBAB;
}
#no-value-metadata-message {
padding-top: 25px;
margin-left: -20px;
}
#token-kind-selector {
background-color: #f2eff7; background-color: #f2eff7;
padding: 15px; padding: 15px;
border-top-style: solid; padding-left: 25px;
border-color: #6B3F89; border-top: 10px solid #6B3F89;
margin-left: -24px;
margin-top: -24px;
margin-right: -24px;
} }
#token-kind-selector.s5 { .attr-modal-header h6 {
margin-top: 15px;
}
#token-kind-selector h6 {
margin-left: 15px; margin-left: 15px;
} }
#token-tutorial-info-icon { #corpus-analysis-concordance-add-structural-attribute-tutorial-info-icon {
color: black; color: black;
} }
#no-value-message { [data-structural-attr-modal-action-button="sentence"]{
background-color:#FD9720 !important;
}
[data-structural-attr-modal-action-button="entity"]{
background-color: #A6E22D !important;
}
[data-structural-attr-modal-action-button="meta-data"]{
background-color: #2FBBAB !important;
}
#corpus-analysis-concordance-no-value-metadata-message {
padding-top: 25px; padding-top: 25px;
margin-left: -20px; margin-left: -20px;
} }
#token-edit-options h6 { .attr-modal-header.input-field {
margin-left: 15px; margin-left: 41px;
} }
#edit-options-tutorial-info-icon { #corpus-analysis-concordance-token-attr {
margin-left: 41px;
}
#corpus-analysis-concordance-token-tutorial-info-icon {
color: black; color: black;
} }
#incidence-modifiers-button a{ #corpus-analysis-concordance-no-value-message {
background-color: #2FBBAB; padding-top: 25px;
margin-left: -20px;
} }
#incidence-modifiers a{ #corpus-analysis-concordance-token-edit-options h6 {
background-color: white; margin-left: 15px;
} }
#ignore-case { #corpus-analysis-concordance-edit-options-tutorial-info-icon {
margin-left: 5px; color: black;
} }
#or, #and { [data-toggle-area="input-field-options"] a {
background-color: #fc0; margin-right: 10px;
} }
#betweenNM { [data-target="corpus-analysis-concordance-character-incidence-modifiers-dropdown"], [data-target="corpus-analysis-concordance-token-incidence-modifiers-dropdown"] {
width: 60%; background-color: #2FBBAB !important;
} }
#query-builder-tutorial-modal { #corpus-analysis-concordance-exactly-n-token-modal, #corpus-analysis-concordance-between-nm-token-modal {
width: 60%; width: 30%;
} }
#query-builder-tutorial-modal ul { [data-modal-id="corpus-analysis-concordance-exactly-n-token-modal"], [data-modal-id="corpus-analysis-concordance-between-nm-token-modal"] {
margin-top: 10px; margin-top: 15px !important;
} }
#query-builder-tutorial { [data-options-action="and"], [data-options-action="or"] {
padding:15px; background-color: #fc0 !important;
}
#scroll-up-button-query-builder-tutorial {
background-color: #28B3D1;
} }
[data-type="start-sentence"], [data-type="end-sentence"] { [data-type="start-sentence"], [data-type="end-sentence"] {
@ -134,13 +110,18 @@
} }
[data-type="start-empty-entity"], [data-type="start-entity"], [data-type="end-entity"] { [data-type="start-empty-entity"], [data-type="start-entity"], [data-type="end-entity"] {
background-color: #A6E22D; background-color: #a6e22d;
} }
[data-type="start-text-annotation"]{ [data-type="text-annotation"]{
background-color: #2FBBAB; background-color: #2FBBAB;
} }
[data-type="token"] { [data-type="token"] {
background-color: #28B3D1; background-color: #28B3D1;
} }
[data-type="token-incidence-modifier"] {
background-color: #4db6ac;
color: white;
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 222 KiB

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 378 KiB

After

Width:  |  Height:  |  Size: 402 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 720 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 854 KiB

After

Width:  |  Height:  |  Size: 589 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 436 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 189 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 511 KiB

After

Width:  |  Height:  |  Size: 381 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1009 KiB

After

Width:  |  Height:  |  Size: 759 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 903 KiB

After

Width:  |  Height:  |  Size: 750 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 413 KiB

After

Width:  |  Height:  |  Size: 524 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 182 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 155 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

View File

@ -1,104 +0,0 @@
class App {
constructor() {
this.data = {
promises: {getUser: {}, subscribeUser: {}},
users: {},
};
this.socket = io({transports: ['websocket'], upgrade: false});
this.socket.on('PATCH', (patch) => {this.onPatch(patch);});
}
getUser(userId, backrefs=true, relationships=true) {
if (userId in this.data.promises.getUser) {
return this.data.promises.getUser[userId];
}
this.data.promises.getUser[userId] = new Promise((resolve, reject) => {
this.socket.emit('GET /users/<user_id>', userId, backrefs, relationships, (response) => {
if (response.status !== 200) {
reject(response);
return;
}
this.data.users[userId] = response.body;
resolve(this.data.users[userId]);
});
});
return this.data.promises.getUser[userId];
}
subscribeUser(userId) {
if (userId in this.data.promises.subscribeUser) {
return this.data.promises.subscribeUser[userId];
}
this.data.promises.subscribeUser[userId] = new Promise((resolve, reject) => {
this.socket.emit('SUBSCRIBE /users/<user_id>', userId, (response) => {
if (response.status !== 200) {
reject(response);
return;
}
resolve(response);
});
});
return this.data.promises.subscribeUser[userId];
}
flash(message, category) {
let iconPrefix = '';
switch (category) {
case 'corpus': {
iconPrefix = '<i class="left material-icons">book</i>';
break;
}
case 'error': {
iconPrefix = '<i class="error-color-text left material-icons">error</i>';
break;
}
case 'job': {
iconPrefix = '<i class="left nopaque-icons">J</i>';
break;
}
case 'settings': {
iconPrefix = '<i class="left material-icons">settings</i>';
break;
}
default: {
iconPrefix = '<i class="left material-icons">notifications</i>';
break;
}
}
let toast = M.toast(
{
html: `
<span>${iconPrefix}${message}</span>
<button class="action-button btn-flat toast-action white-text" data-action="close">
<i class="material-icons">close</i>
</button>
`.trim()
}
);
let toastCloseActionElement = toast.el.querySelector('.action-button[data-action="close"]');
toastCloseActionElement.addEventListener('click', () => {toast.dismiss();});
}
onPatch(patch) {
// Filter Patch to only include operations on users that are initialized
let regExp = new RegExp(`^/users/(${Object.keys(this.data.users).join('|')})`);
let filteredPatch = patch.filter(operation => regExp.test(operation.path));
// Handle job status updates
let subRegExp = new RegExp(`^/users/([A-Za-z0-9]*)/jobs/([A-Za-z0-9]*)/status$`);
let subFilteredPatch = filteredPatch
.filter((operation) => {return operation.op === 'replace';})
.filter((operation) => {return subRegExp.test(operation.path);});
for (let operation of subFilteredPatch) {
let [match, userId, jobId] = operation.path.match(subRegExp);
this.flash(`[<a href="/jobs/${jobId}">${this.data.users[userId].jobs[jobId].title}</a>] New status: <span class="job-status-text" data-status="${operation.value}"></span>`, 'job');
}
// Apply Patch
jsonpatch.applyPatch(this.data, filteredPatch);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,245 +0,0 @@
class CorpusAnalysisApp {
constructor(corpusId) {
this.data = {};
// HTML elements
this.elements = {
container: document.querySelector('#corpus-analysis-app-container'),
extensionTabs: document.querySelector('#corpus-analysis-app-extension-tabs'),
initModal: document.querySelector('#corpus-analysis-app-init-modal'),
overview: document.querySelector('#corpus-analysis-app-overview')
};
// Materialize elements
this.elements.m = {
extensionTabs: M.Tabs.init(this.elements.extensionTabs),
initModal: M.Modal.init(this.elements.initModal, {dismissible: false})
};
this.extensions = {};
this.settings = {
corpusId: corpusId
};
}
init() {
this.disableActionElements();
this.elements.m.initModal.open();
// Init data
this.data.cQiClient = new CQiClient(this.settings.corpusId);
this.data.cQiClient.connect()
.then(cQiStatus => {
return this.data.cQiClient.corpora.get(`NOPAQUE_${this.settings.corpusId}`);
})
.then(
cQiCorpus => {
this.data.corpus = {o: cQiCorpus};
this.data.corpus.o.getVisualizationData()
.then(
(data) => {
console.log(data);
this.renderGeneralCorpusInfo(data);
this.renderTextInfoList(data);
this.renderTextProportionsGraphic(data);
this.renderFrequenciesGraphic(data);
this.renderBoundsGraphic(data);
}
);
// this.data.corpus.o.getCorpusData()
// .then(corpusData => {
// console.log(corpusData);
// this.renderGeneralCorpusInfo(corpusData);
// this.renderTextInfoList(corpusData);
// this.renderTextProportionsGraphic(corpusData);
// this.renderFrequenciesGraphic(corpusData);
// this.renderBoundsGraphic(corpusData);
// });
// TODO: Don't do this hgere
cQiCorpus.updateDb();
this.enableActionElements();
for (let extension of Object.values(this.extensions)) {extension.init();}
this.elements.m.initModal.close();
},
cQiError => {
let errorsElement = this.elements.initModal.querySelector('.errors');
let progressElement = this.elements.initModal.querySelector('.progress');
errorsElement.innerText = JSON.stringify(cQiError);
errorsElement.classList.remove('hide');
progressElement.classList.add('hide');
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
}
}
);
// Add event listeners
for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) {
extensionSelectorElement.addEventListener('click', () => {
this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target);
});
}
}
registerExtension(extension) {
if (extension.name in this.extensions) {
console.error(`Can't register extension ${extension.name}: Already registered`);
return;
}
this.extensions[extension.name] = extension;
if ('cQiClient' in this.data && this.data.cQiClient.connected) {extension.init();}
}
disableActionElements() {
let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
for (let actionElement of actionElements) {
if (actionElement.nodeName === 'INPUT') {
actionElement.disabled = true;
} else if (actionElement.nodeName === 'SELECT') {
actionElement.parentNode.querySelector('input.select-dropdown').disabled = true;
} else {
actionElement.classList.add('disabled');
}
}
}
enableActionElements() {
let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
for (let actionElement of actionElements) {
if (actionElement.nodeName === 'INPUT') {
actionElement.disabled = false;
} else if (actionElement.nodeName === 'SELECT') {
actionElement.parentNode.querySelector('input.select-dropdown').disabled = false;
} else {
actionElement.classList.remove('disabled');
}
}
}
renderGeneralCorpusInfo(corpusData) {
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.counts.token;
document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.counts.s;
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length;
}
renderTextInfoList(corpusData) {
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
let texts = corpusData.s_attrs.text.lexicon;
let textData = [];
for (let i = 0; i < Object.entries(texts).length; i++) {
let resource = {
title: corpusData.values.s_attrs.text[i].title,
publishing_year: corpusData.values.s_attrs.text[i].publishing_year,
num_tokens: corpusData.s_attrs.text.lexicon[i].counts.token,
num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s,
num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length,
num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length,
num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length,
num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length
};
textData.push(resource);
}
corpusTextInfoList.add(textData);
let textCountChipElement = document.querySelector('.text-count-chip');
textCountChipElement.innerHTML = `Text count: ${corpusData.corpus.counts.text}`;
}
renderTextProportionsGraphic(corpusData) {
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
let graphData = [
{
values: texts.map(text => text[1].counts.token),
labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
type: 'pie'
}
];
let config = {responsive: true};
Plotly.newPlot(textProportionsGraphicElement, graphData, config);
}
renderFrequenciesGraphic(corpusData) {
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
this.renderFrequenciesGraphic(corpusData);
});
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData);
let graphLayout = {
barmode: 'stack',
type: 'bar'
};
let config = {responsive: true};
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
}
createFrequenciesGraphData(category, texts, corpusData) {
let graphData = [];
let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
for (let item of sortedData) {
let data = {
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
y: texts.map(text => text[1].freqs[category][item[0]]),
name: corpusData.values.p_attrs[category][item[0]],
type: 'bar'
};
graphData.push(data);
}
return graphData;
}
renderBoundsGraphic(corpusData) {
let boundsGraphicElement = document.querySelector('#bounds-graphic');
let graphData = [];
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
graphData = [{
type: 'bar',
x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
y: texts.map(text => corpusData.values.s_attrs.text[text[0]].title),
base: texts.map(text => text[1].bounds[0]),
text: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
orientation: 'h',
hovertemplate: '%{base} - %{x} <br>%{y}',
showlegend: false
}];
let graphLayout = {
barmode: 'stack',
type: 'bar',
showgrid: false,
xaxis: {
rangemode: 'nonnegative',
autorange: true
},
yaxis: {
autorange: true,
showticklabels: false
}
};
let config = {responsive: true};
Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
}
}

View File

@ -1,972 +0,0 @@
class ConcordanceQueryBuilder {
constructor() {
this.elements = {
counter: 0,
yourQueryContent: [],
queryContent:[],
concordanceQueryBuilder: document.querySelector('#concordance-query-builder'),
concordanceQueryBuilderButton: document.querySelector('#concordance-query-builder-button'),
closeQueryBuilder: document.querySelector('#close-query-builder'),
queryBuilderTutorialModal: document.querySelector('#query-builder-tutorial-modal'),
valueValidator: true,
//#region QueryBuilder Elements
positionalAttrButton: document.querySelector('#positional-attr-button'),
positionalAttrArea: document.querySelector('#positional-attr'),
positionalAttr: document.querySelector('#token-attr'),
structuralAttrButton: document.querySelector('#structural-attr-button'),
structuralAttrArea: document.querySelector('#structural-attr'),
queryContainer: document.querySelector('#query-container'),
buttonPreparer: document.querySelector('#button-preparer'),
yourQuery: document.querySelector('#your-query'),
insertQueryButton: document.querySelector('#insert-query-button'),
queryPreview: document.querySelector('#query-preview'),
tokenQuery: document.querySelector('#token-query'),
tokenBuilderContent: document.querySelector('#token-builder-content'),
tokenSubmitButton: document.querySelector('#token-submit'),
extFormQuery: document.querySelector('#concordance-extension-form-query'),
dropButton: '',
queryBuilderTutorialInfoIcon: document.querySelector('#query-builder-tutorial-info-icon'),
tokenTutorialInfoIcon: document.querySelector('#token-tutorial-info-icon'),
editTokenTutorialInfoIcon: document.querySelector('#edit-options-tutorial-info-icon'),
structuralAttributeTutorialInfoIcon: document.querySelector('#add-structural-attribute-tutorial-info-icon'),
generalOptionsQueryBuilderTutorialInfoIcon: document.querySelector('#general-options-query-builder-tutorial-info-icon'),
//#endregion QueryBuilder Elements
//#region Strucutral Attributes
sentence:document.querySelector('#sentence'),
entity: document.querySelector('#entity'),
textAnnotation: document.querySelector('#text-annotation'),
entityBuilder: document.querySelector('#entity-builder'),
englishEntType: document.querySelector('#english-ent-type'),
germanEntType: document.querySelector('#german-ent-type'),
emptyEntity: document.querySelector('#empty-entity'),
entityAnyType: false,
textAnnotationBuilder: document.querySelector('#text-annotation-builder'),
textAnnotationOptions: document.querySelector('#text-annotation-options'),
textAnnotationInput: document.querySelector('#text-annotation-input'),
textAnnotationSubmit: document.querySelector('#text-annotation-submit'),
noValueMetadataMessage: document.querySelector('#no-value-metadata-message'),
//#endregion Structural Attributes
//#region Token Attributes
tokenQueryFilled: false,
lemma: document.querySelector('#lemma'),
emptyToken: document.querySelector('#empty-token'),
word: document.querySelector('#word'),
lemma: document.querySelector('#lemma'),
pos: document.querySelector('#pos'),
simplePosButton: document.querySelector('#simple-pos-button'),
incidenceModifiers: document.querySelector('[data-target="incidence-modifiers"]'),
or: document.querySelector('#or'),
and: document.querySelector('#and'),
//#region Word and Lemma Elements
wordBuilder: document.querySelector('#word-builder'),
lemmaBuilder: document.querySelector('#lemma-builder'),
inputOptions: document.querySelector('#input-options'),
incidenceModifiersButton: document.querySelector('#incidence-modifiers-button'),
conditionContainer: document.querySelector('#condition-container'),
wordInput: document.querySelector('#word-input'),
lemmaInput: document.querySelector('#lemma-input'),
ignoreCaseCheckbox : document.querySelector('#ignore-case-checkbox'),
ignoreCase: document.querySelector('input[type="checkbox"]'),
wildcardChar: document.querySelector('#wildcard-char'),
optionGroup: document.querySelector('#option-group'),
//#endregion Word and Lemma Elements
//#region posBuilder Elements
englishPosBuilder: document.querySelector('#english-pos-builder'),
englishPos: document.querySelector('#english-pos'),
germanPosBuilder: document.querySelector('#german-pos-builder'),
germanPos: document.querySelector('#german-pos'),
//#endregion posBuilder Elements
//#region simple_posBuilder Elements
simplePosBuilder: document.querySelector('#simplepos-builder'),
simplePos: document.querySelector('#simple-pos'),
//#endregion simple_posBuilder Elements
//#region incidence modifiers
oneOrMore: document.querySelector('#one-or-more'),
zeroOrMore: document.querySelector('#zero-or-more'),
zeroOrOne: document.querySelector('#zero-or-one'),
exactlyN: document.querySelector('#exactlyN'),
betweenNM: document.querySelector('#betweenNM'),
nInput: document.querySelector('#n-input'),
nSubmit: document.querySelector('#n-submit'),
nmInput: document.querySelector('#n-m-input'),
mInput: document.querySelector('#m-input'),
nmSubmit: document.querySelector('#n-m-submit'),
//#endregion incidence modifiers
cancelBool: false,
noValueMessage: document.querySelector('#no-value-message'),
//#endregion Token Attributes
}
this.elements.closeQueryBuilder.addEventListener('click', () => {this.closeQueryBuilderModal(this.elements.concordanceQueryBuilder);});
this.elements.concordanceQueryBuilderButton.addEventListener('click', () => {this.clearAll();});
this.elements.insertQueryButton.addEventListener('click', () => {this.insertQuery();});
this.elements.positionalAttrButton.addEventListener('click', () => {this.showPositionalAttrArea();});
this.elements.structuralAttrButton.addEventListener('click', () => {this.showStructuralAttrArea();});
//#region Structural Attribute Event Listeners
this.elements.sentence.addEventListener('click', () => {this.addSentence();});
this.elements.entity.addEventListener('click', () => {this.addEntity();});
this.elements.textAnnotation.addEventListener('click', () => {this.addTextAnnotation();});
this.elements.englishEntType.addEventListener('change', () => {this.englishEntTypeHandler();});
this.elements.germanEntType.addEventListener('change', () => {this.germanEntTypeHandler();});
this.elements.emptyEntity.addEventListener('click', () => {this.emptyEntityButton();});
this.elements.textAnnotationSubmit.addEventListener('click', () => {this.textAnnotationSubmitHandler();});
//#endregion
//#region Token Attribute Event Listeners
this.elements.queryBuilderTutorialInfoIcon.addEventListener('click', () => {this.tutorialIconHandler('#query-builder-tutorial-start');});
this.elements.tokenTutorialInfoIcon.addEventListener('click', () => {this.tutorialIconHandler('#add-new-token-tutorial');});
this.elements.editTokenTutorialInfoIcon.addEventListener('click', () => {this.tutorialIconHandler('#edit-options-tutorial');});
this.elements.structuralAttributeTutorialInfoIcon.addEventListener('click', () => {this.tutorialIconHandler('#add-structural-attribute-tutorial');});
this.elements.generalOptionsQueryBuilderTutorialInfoIcon.addEventListener('click', () => {this.tutorialIconHandler('#general-options-query-builder');});
this.elements.positionalAttr.addEventListener('change', () => {this.tokenTypeSelector();});
this.elements.tokenSubmitButton.addEventListener('click', () => {this.addTokenToQuery();});
this.elements.wordInput.addEventListener('input', () => {this.inputFieldHandler();});
this.elements.lemmaInput.addEventListener('input', () => {this.inputFieldHandler();});
this.elements.ignoreCase.addEventListener('change', () => {this.inputOptionHandler(this.elements.ignoreCase);});
this.elements.wildcardChar.addEventListener('click', () => {this.inputOptionHandler(this.elements.wildcardChar);});
this.elements.optionGroup.addEventListener('click', () => {this.inputOptionHandler(this.elements.optionGroup);});
this.elements.oneOrMore.addEventListener('click', () => {this.incidenceModifiersHandler(this.elements.oneOrMore);});
this.elements.zeroOrMore.addEventListener('click', () => {this.incidenceModifiersHandler(this.elements.zeroOrMore);});
this.elements.zeroOrOne.addEventListener('click', () => {this.incidenceModifiersHandler(this.elements.zeroOrOne);});
this.elements.nSubmit.addEventListener('click', () => {this.nSubmitHandler();});
this.elements.nmSubmit.addEventListener('click', () => {this.nmSubmitHandler();});
this.elements.or.addEventListener('click', () => {this.orHandler();});
this.elements.and.addEventListener('click', () => {this.andHandler();});
//#endregion Token Attribute Event Listeners
}
// ##########################################################################
// #################### General Functions ###################################
// ##########################################################################
//#region General Functions
closeQueryBuilderModal(closeInstance) {
let instance = M.Modal.getInstance(closeInstance);
instance.close();
}
showPositionalAttrArea() {
this.elements.positionalAttrArea.classList.remove('hide');
this.wordBuilder();
this.elements.tokenQueryFilled = false;
window.location.href = '#token-builder-content';
}
showStructuralAttrArea() {
this.elements.positionalAttrArea.classList.add('hide');
this.elements.structuralAttrArea.classList.remove('hide');
}
queryChipFactory(dataType, prettyQueryText, queryText) {
window.location.href = '#query-container';
queryText = Utils.escape(queryText);
prettyQueryText = Utils.escape(prettyQueryText);
let queryChipElement = Utils.HTMLToElement(
`
<span class="chip query-component" data-type="${dataType}" data-query="${queryText}" draggable="true">
${prettyQueryText}
<i class="material-icons close">close</i>
</span>
`
);
queryChipElement.addEventListener('click', () => {this.deleteAttr(queryChipElement);});
queryChipElement.addEventListener('dragstart', (event) => {
// selects all nodes without target class
let queryChips = this.elements.yourQuery.querySelectorAll('.query-component');
// Adds a target chip in front of all draggable childnodes
setTimeout(() => {
let targetChipElement = Utils.HTMLToElement('<span class="chip drop-target">Drop here</span>');
for (let element of queryChips) {
if (element === queryChipElement.nextSibling) {continue;}
let targetChipClone = targetChipElement.cloneNode(true);
if (element === queryChipElement) {
// If the dragged element is not at the very end, a target chip is also inserted at the end
if (queryChips[queryChips.length - 1] !== element) {
queryChips[queryChips.length - 1].insertAdjacentElement('afterend', targetChipClone);
}
} else {
element.insertAdjacentElement('beforebegin', targetChipClone);
}
targetChipClone.addEventListener('dragover', (event) => {
event.preventDefault();
});
targetChipClone.addEventListener('dragenter', (event) => {
event.preventDefault();
event.target.style.borderStyle = 'solid dotted';
});
targetChipClone.addEventListener('dragleave', (event) => {
event.preventDefault();
event.target.style.borderStyle = 'hidden';
});
targetChipClone.addEventListener('drop', (event) => {
let dropzone = event.target;
dropzone.parentElement.replaceChild(queryChipElement, dropzone);
this.queryPreviewBuilder();
});
}
}, 0);
});
queryChipElement.addEventListener('dragend', (event) => {
let targets = document.querySelectorAll('.drop-target');
for (let target of targets) {
target.remove();
}
});
// Ensures that metadata is always at the end of the query:
const lastChild = this.elements.yourQuery.lastChild;
const isLastChildTextAnnotation = lastChild && lastChild.dataset.type === 'text-annotation';
if (!isLastChildTextAnnotation) {
this.elements.yourQuery.appendChild(queryChipElement);
} else {
this.elements.yourQuery.insertBefore(queryChipElement, lastChild);
}
this.elements.queryContainer.classList.remove('hide');
this.queryPreviewBuilder();
// Shows a hint about possible functions for editing the query at the first added element in the query
if (this.elements.yourQuery.childNodes.length === 1) {
app.flash('You can edit your query by deleting individual elements or moving them via drag and drop.');
}
}
queryPreviewBuilder() {
this.elements.yourQueryContent = [];
for (let element of this.elements.yourQuery.childNodes) {
let queryElement = decodeURI(element.dataset.query);
queryElement = Utils.escape(queryElement);
if (queryElement !== 'undefined') {
this.elements.yourQueryContent.push(queryElement);
}
}
let queryString = this.elements.yourQueryContent.join(' ');
queryString += ';';
this.elements.queryPreview.innerHTML = queryString;
}
deleteAttr(attr) {
this.elements.yourQuery.removeChild(attr);
if (attr.dataset.type === "start-sentence") {
this.elements.sentence.innerHTML = 'Sentence';
} else if (attr.dataset.type === "start-entity" || attr.dataset.type === "start-empty-entity") {
this.elements.entity.innerHTML = 'Entity';
}
this.elements.counter -= 1;
if (this.elements.counter === 0) {
this.elements.queryContainer.classList.add('hide');
}
this.queryPreviewBuilder();
}
insertQuery() {
this.elements.yourQueryContent = [];
this.validateValue();
if (this.elements.valueValidator) {
for (let element of this.elements.yourQuery.childNodes) {
let queryElement = decodeURI(element.dataset.query);
if (queryElement !== 'undefined') {
this.elements.yourQueryContent.push(queryElement);
}
}
let queryString = this.elements.yourQueryContent.join(' ');
queryString += ';';
this.elements.concordanceQueryBuilder.classList.add('modal-close');
this.elements.extFormQuery.value = queryString;
}
}
validateValue() {
this.elements.valueValidator = true;
let sentenceCounter = 0;
let sentenceEndCounter = 0;
let entityCounter = 0;
let entityEndCounter = 0;
for (let element of this.elements.yourQuery.childNodes) {
if (element.dataset.type === 'start-sentence') {
sentenceCounter += 1;
}else if (element.dataset.type === 'end-sentence') {
sentenceEndCounter += 1;
}else if (element.dataset.type === 'start-entity' || element.dataset.type === 'start-empty-entity') {
entityCounter += 1;
}else if (element.dataset.type === 'end-entity') {
entityEndCounter += 1;
}
}
// Checks if the same number of opening and closing tags (entity and sentence) are present. Depending on what is missing, the corresponding error message is ejected
if (sentenceCounter > sentenceEndCounter) {
app.flash('Please add the closing sentence tag', 'error');
this.elements.valueValidator = false;
} else if (sentenceCounter < sentenceEndCounter) {
app.flash('Please remove the closing sentence tag', 'error');
this.elements.valueValidator = false;
}
if (entityCounter > entityEndCounter) {
app.flash('Please add the closing entity tag', 'error');
this.elements.valueValidator = false;
} else if (entityCounter < entityEndCounter) {
app.flash('Please remove the closing entity tag', 'error');
this.elements.valueValidator = false;
}
}
clearAll() {
// Everything is reset.
let instance = M.Tooltip.getInstance(this.elements.queryBuilderTutorialInfoIcon);
this.hideEverything();
this.elements.counter = 0;
this.elements.concordanceQueryBuilder.classList.remove('modal-close');
this.elements.positionalAttrArea.classList.add('hide');
this.elements.structuralAttrArea.classList.add('hide');
this.elements.yourQuery.innerHTML = '';
this.elements.queryContainer.classList.add('hide');
this.elements.entity.innerHTML = 'Entity';
this.elements.sentence.innerHTML = 'Sentence';
// If the Modal is open after 5 seconds for 5 seconds (with 'instance'), a message is displayed indicating that further information can be obtained via the question mark icon
instance.tooltipEl.style.background = '#98ACD2';
instance.tooltipEl.style.borderTop = 'solid 4px #0064A3';
instance.tooltipEl.style.padding = '10px';
instance.tooltipEl.style.color = 'black';
setTimeout(() => {
let modalInstance = M.Modal.getInstance(this.elements.concordanceQueryBuilder);
if (modalInstance.isOpen) {
instance.open();
setTimeout(() => {
instance.close();
}, 5000);
}
}, 5000);
}
tutorialIconHandler(id) {
setTimeout(() => {
window.location.href= id;
}, 0);
}
//#endregion General Functions
// ##########################################################################
// ############## Token Attribute Builder Functions #########################
// ##########################################################################
//#region Token Attribute Builder Functions
//#region General functions of the Token Builder
tokenTypeSelector() {
this.hideEverything();
switch (this.elements.positionalAttr.value) {
case 'word':
this.wordBuilder();
break;
case 'lemma':
this.lemmaBuilder();
break;
case 'english-pos':
this.englishPosHandler();
break;
case 'german-pos':
this.germanPosHandler();
break;
case 'simple-pos-button':
this.simplePosBuilder();
break;
case 'empty-token':
this.emptyTokenHandler();
break;
default:
this.wordBuilder();
break;
}
}
hideEverything() {
this.elements.wordBuilder.classList.add('hide');
this.elements.lemmaBuilder.classList.add('hide');
this.elements.ignoreCaseCheckbox.classList.add('hide');
this.elements.inputOptions.classList.add('hide');
this.elements.incidenceModifiersButton.classList.add('hide');
this.elements.conditionContainer.classList.add('hide');
this.elements.englishPosBuilder.classList.add('hide');
this.elements.germanPosBuilder.classList.add('hide');
this.elements.simplePosBuilder.classList.add('hide');
this.elements.entityBuilder.classList.add('hide');
this.elements.textAnnotationBuilder.classList.add('hide');
}
tokenChipFactory(prettyQueryText, tokenText) {
tokenText = encodeURI(tokenText);
let builderElement;
let queryChipElement;
builderElement = document.createElement('div');
builderElement.innerHTML = `
<div class='chip col s2 l2' style='margin-top:20px;' data-tokentext='${tokenText}'>
${prettyQueryText}
<i class='material-icons close'>close</i>
</div>`;
queryChipElement = builderElement.firstElementChild;
queryChipElement.addEventListener('click', () => {this.deleteTokenAttr(queryChipElement);});
this.elements.tokenQuery.appendChild(queryChipElement);
}
deleteTokenAttr(attr) {
if (this.elements.tokenQuery.childNodes.length < 2) {
this.elements.tokenQuery.removeChild(attr);
this.wordBuilder();
} else {
this.elements.tokenQuery.removeChild(attr);
}
}
addTokenToQuery() {
let c;
let tokenQueryContent = ''; //for ButtonFactory(prettyQueryText)
let tokenQueryText = ''; //for ButtonFactory(queryText)
this.elements.cancelBool = false;
let tokenIsEmpty = false;
if (this.elements.ignoreCase.checked) {
c = ' %c';
} else {
c = '';
}
for (let element of this.elements.tokenQuery.childNodes) {
tokenQueryContent += ' ' + element.firstChild.data + ' ';
tokenQueryText += decodeURI(element.dataset.tokentext);
if (element.innerText.indexOf('empty token') !== -1) {
tokenIsEmpty = true;
}
}
if (this.elements.tokenQueryFilled === false) {
switch (this.elements.positionalAttr.value) {
case 'word':
if (this.elements.wordInput.value === '') {
this.disableTokenSubmit();
} else {
tokenQueryContent += `word=${this.elements.wordInput.value}${c}`;
tokenQueryText += `word="${this.elements.wordInput.value}"${c}`;
this.elements.wordInput.value = '';
}
break;
case 'lemma':
if (this.elements.lemmaInput.value === '') {
this.disableTokenSubmit();
} else {
tokenQueryContent += `lemma=${this.elements.lemmaInput.value}${c}`;
tokenQueryText += `lemma="${this.elements.lemmaInput.value}"${c}`;
this.elements.lemmaInput.value = '';
}
break;
case 'english-pos':
if (this.elements.englishPos.value === 'default') {
this.disableTokenSubmit();
} else {
tokenQueryContent += `pos=${this.elements.englishPos.value}`;
tokenQueryText += `pos="${this.elements.englishPos.value}"`;
this.elements.englishPos.value = '';
}
break;
case 'german-pos':
if (this.elements.germanPos.value === 'default') {
this.disableTokenSubmit();
} else {
tokenQueryContent += `pos=${this.elements.germanPos.value}`;
tokenQueryText += `pos="${this.elements.germanPos.value}"`;
this.elements.germanPos.value = '';
}
break;
case 'simple-pos-button':
if (this.elements.simplePos.value === 'default') {
this.disableTokenSubmit();
} else {
tokenQueryContent += `simple_pos=${this.elements.simplePos.value}`;
tokenQueryText += `simple_pos="${this.elements.simplePos.value}"`;
this.elements.simplePos.value = '';
}
break;
default:
this.wordBuilder();
break;
}
}
// cancelBool looks in disableTokenSubmit() whether a value is passed. If the input fields/dropdowns are empty (cancelBool === true), no token is added.
if (this.elements.cancelBool === false) {
// Square brackets are added only if it is not an empty token (where they are already present).
if (tokenIsEmpty === false) {
tokenQueryText = '[' + tokenQueryText + ']';
}
this.queryChipFactory('token', tokenQueryContent, tokenQueryText);
this.hideEverything();
this.elements.positionalAttrArea.classList.add('hide');
this.elements.tokenQuery.innerHTML = '';
}
}
disableTokenSubmit() {
this.elements.cancelBool = true;
this.elements.tokenSubmitButton.classList.add('red');
this.elements.noValueMessage.classList.remove('hide');
setTimeout(() => {
this.elements.tokenSubmitButton.classList.remove('red');
}, 500);
setTimeout(() => {
this.elements.noValueMessage.classList.add('hide');
}, 3000);
}
inputFieldHandler() {
let input;
if (this.elements.wordBuilder.classList.contains('hide') === false) {
input = this.elements.wordInput;
} else {
input = this.elements.lemmaInput;
}
if (input.value === '') {
this.elements.incidenceModifiersButton.firstElementChild.classList.add('disabled');
this.elements.or.classList.add('disabled');
this.elements.and.classList.add('disabled');
} else {
this.elements.incidenceModifiersButton.firstElementChild.classList.remove('disabled');
this.elements.or.classList.remove('disabled');
this.elements.and.classList.remove('disabled');
}
}
//#endregion General functions of the Token Builder
//#region Dropdown Select Handler
wordBuilder() {
this.hideEverything();
this.elements.wordInput.value = '';
this.elements.wordBuilder.classList.remove('hide');
this.elements.inputOptions.classList.remove('hide');
this.elements.incidenceModifiersButton.classList.remove('hide');
this.elements.conditionContainer.classList.remove('hide');
this.elements.ignoreCaseCheckbox.classList.remove('hide');
this.elements.incidenceModifiersButton.firstElementChild.classList.add('disabled');
this.elements.or.classList.add('disabled');
this.elements.and.classList.add('disabled');
// Resets materialize select field to default value
let SelectInstance = M.FormSelect.getInstance(this.elements.positionalAttr);
SelectInstance.input.value = 'word';
this.elements.positionalAttr.value = 'word';
}
lemmaBuilder() {
this.hideEverything();
this.elements.lemmaInput.value = '';
this.elements.lemmaBuilder.classList.remove('hide');
this.elements.inputOptions.classList.remove('hide');
this.elements.incidenceModifiersButton.classList.remove('hide');
this.elements.incidenceModifiersButton.firstElementChild.classList.add('disabled');
this.elements.conditionContainer.classList.remove('hide');
this.elements.ignoreCaseCheckbox.classList.remove('hide');
this.elements.incidenceModifiersButton.firstElementChild.classList.add('disabled');
this.elements.or.classList.add('disabled');
this.elements.and.classList.add('disabled');
}
englishPosHandler() {
this.hideEverything();
this.elements.englishPosBuilder.classList.remove('hide');
// this.elements.incidenceModifiersButton.classList.remove('hide');
this.elements.conditionContainer.classList.remove('hide');
// Resets materialize select dropdown
let selectInstance = M.FormSelect.getInstance(this.elements.englishPos);
selectInstance.input.value = 'English pos tagset';
this.elements.englishPos.value = 'default';
}
germanPosHandler() {
this.hideEverything();
this.elements.germanPosBuilder.classList.remove('hide');
// this.elements.incidenceModifiersButton.classList.remove('hide');
this.elements.conditionContainer.classList.remove('hide');
// Resets materialize select dropdown
let selectInstance = M.FormSelect.getInstance(this.elements.germanPos);
selectInstance.input.value = 'German pos tagset';
this.elements.germanPos.value = 'default';
}
simplePosBuilder() {
this.hideEverything();
this.elements.simplePosBuilder.classList.remove('hide');
// this.elements.incidenceModifiersButton.classList.remove('hide');
this.elements.conditionContainer.classList.remove('hide');
this.elements.simplePos.selectedIndex = 0;
// Resets materialize select dropdown
let selectInstance = M.FormSelect.getInstance(this.elements.simplePos);
selectInstance.input.value = 'simple_pos tagset';
this.elements.simplePos.value = 'default';
}
emptyTokenHandler() {
this.tokenChipFactory('empty token', '[]');
this.elements.tokenQueryFilled = true;
this.hideEverything();
this.elements.incidenceModifiersButton.classList.remove('hide');
}
//#endregion Dropdown Select Handler
//#region Options to edit your token - Wildcard Charakter, Option Group, Incidence Modifiers, Ignore Case, 'and', 'or'
inputOptionHandler(elem) {
let input;
if (this.elements.wordBuilder.classList.contains('hide') === false) {
input = this.elements.wordInput;
} else {
input = this.elements.lemmaInput;
}
if (elem === this.elements.optionGroup) {
input.value += '( option1 | option2 )';
let firstIndex = input.value.indexOf('option1');
let lastIndex = firstIndex + 'option1'.length;
input.focus();
input.setSelectionRange(firstIndex, lastIndex);
} else if (elem === this.elements.wildcardChar) {
input.value += '.';
}
}
nSubmitHandler() {
let instance = M.Modal.getInstance(this.elements.exactlyN);
instance.close();
switch (this.elements.positionalAttr.value) {
case 'word':
this.elements.wordInput.value += ' {' + this.elements.nInput.value + '}';
break;
case 'lemma':
this.elements.lemmaInput.value += ' {' + this.elements.nInput.value + '}';
break;
case 'english-pos':
this.elements.tokenQueryFilled = true;
this.tokenChipFactory(`pos=${this.elements.englishPos.value}`, `pos="${this.elements.englishPos.value}"`);
this.tokenChipFactory('{' + this.elements.nInput.value + '}', '{' + this.elements.nInput.value + '}');
this.elements.englishPosBuilder.classList.add('hide');
this.elements.incidenceModifiersButton.classList.add('hide');
break;
case 'german-pos':
this.elements.tokenQueryFilled = true;
this.tokenChipFactory(`pos=${this.elements.germanPos.value}`, `pos="${this.elements.germanPos.value}"`);
this.tokenChipFactory('{' + this.elements.nInput.value + '}', '{' + this.elements.nInput.value + '}');
this.elements.germanPosBuilder.classList.add('hide');
this.elements.incidenceModifiersButton.classList.add('hide');
break;
case 'simple-pos-button':
this.elements.tokenQueryFilled = true;
this.tokenChipFactory(`simple_pos=${this.elements.simplePos.value}`, `simple_pos="${this.elements.simplePos.value}"`);
this.tokenChipFactory('{' + this.elements.nInput.value + '}', '{' + this.elements.nInput.value + '}');
this.elements.simplePosBuilder.classList.add('hide');
this.elements.incidenceModifiersButton.classList.add('hide');
break;
case 'empty-token':
this.tokenChipFactory('{' + this.elements.nInput.value + '}', '{' + this.elements.nInput.value + '}');
break;
default:
break;
}
}
nmSubmitHandler() {
let instance = M.Modal.getInstance(this.elements.betweenNM);
instance.close();
switch (this.elements.positionalAttr.value) {
case 'word':
this.elements.wordInput.value += `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`;
break;
case 'lemma':
this.elements.lemmaInput.value += `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`;
break;
case 'english-pos':
this.elements.tokenQueryFilled = true;
this.tokenChipFactory(`pos=${this.elements.englishPos.value}`, `pos="${this.elements.englishPos.value}"`);
this.tokenChipFactory(`{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`, `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`);
this.elements.englishPosBuilder.classList.add('hide');
this.elements.incidenceModifiersButton.classList.add('hide');
break;
case 'german-pos':
this.elements.tokenQueryFilled = true;
this.tokenChipFactory(`pos=${this.elements.germanPos.value}`, `pos="${this.elements.germanPos.value}"`);
this.tokenChipFactory(`{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`, `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`);
this.elements.germanPosBuilder.classList.add('hide');
this.elements.incidenceModifiersButton.classList.add('hide');
break;
case 'simple-pos-button':
this.elements.tokenQueryFilled = true;
this.tokenChipFactory(`simple_pos=${this.elements.simplePos.value}`, `simple_pos="${this.elements.simplePos.value}"`);
this.tokenChipFactory(`{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`, `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`);
this.elements.simplePosBuilder.classList.add('hide');
this.elements.incidenceModifiersButton.classList.add('hide');
break;
case 'empty-token':
this.tokenChipFactory(`{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`, `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`);
break;
default:
break;
}
}
incidenceModifiersHandler(elem) {
// For word and lemma, the incidence modifiers are inserted in the input field. For the others, one or two chips are created which contain the respective value of the token and the incidence modifier.
if (this.elements.positionalAttr.value === 'empty-token') {
this.tokenChipFactory(elem.innerText, elem.dataset.token);
} else if (this.elements.positionalAttr.value === 'english-pos') {
this.tokenChipFactory(`pos=${this.elements.englishPos.value}`, `pos="${this.elements.englishPos.value}"`);
this.tokenChipFactory(elem.innerText, elem.dataset.token);
this.elements.englishPosBuilder.classList.add('hide');
this.elements.incidenceModifiersButton.classList.add('hide');
this.elements.tokenQueryFilled = true;
} else if (this.elements.positionalAttr.value === 'german-pos') {
this.tokenChipFactory(`pos=${this.elements.germanPos.value}`, `pos="${this.elements.germanPos.value}"`);
this.tokenChipFactory(elem.innerText, elem.dataset.token);
this.elements.germanPosBuilder.classList.add('hide');
this.elements.incidenceModifiersButton.classList.add('hide');
this.elements.tokenQueryFilled = true;
} else if (this.elements.positionalAttr.value === 'simple-pos-button') {
this.tokenChipFactory(`simple_pos=${this.elements.simplePos.value}`, `simple_pos="${this.elements.simplePos.value}"`);
this.tokenChipFactory(elem.innerText, elem.dataset.token);
this.elements.simplePosBuilder.classList.add('hide');
this.elements.incidenceModifiersButton.classList.add('hide');
this.elements.tokenQueryFilled = true;
} else {
let input;
if (this.elements.wordBuilder.classList.contains('hide') === false) {
input = this.elements.wordInput;
} else {
input = this.elements.lemmaInput;
}
input.value += ' ' + elem.dataset.token;
}
}
orHandler() {
this.conditionHandler('or', ' | ');
}
andHandler() {
this.conditionHandler('and', ' & ');
}
conditionHandler(conditionText, conditionQueryContent) {
this.hideEverything();
let tokenQueryContent;
let tokenQueryText;
let c;
if (this.elements.ignoreCase.checked) {
c = ' %c';
} else {
c = '';
}
switch (this.elements.positionalAttr.value) {
case 'word':
tokenQueryContent = `word=${this.elements.wordInput.value}${c}`;
tokenQueryText = `word="${this.elements.wordInput.value}"${c}`;
this.elements.wordInput.value = '';
break;
case 'lemma':
tokenQueryContent = `lemma=${this.elements.lemmaInput.value}${c}`;
tokenQueryText = `lemma="${this.elements.lemmaInput.value}"${c}`;
this.elements.lemmaInput.value = '';
break;
case 'english-pos':
tokenQueryContent = `pos=${this.elements.englishPos.value}`;
tokenQueryText = `pos="${this.elements.englishPos.value}"`;
this.elements.englishPos.value = '';
break;
case 'german-pos':
tokenQueryContent = `pos=${this.elements.germanPos.value}`;
tokenQueryText = `pos="${this.elements.germanPos.value}"`;
this.elements.germanPos.value = '';
break;
case 'simple-pos-button':
tokenQueryContent = `simple_pos=${this.elements.simplePos.value}`;
tokenQueryText = `simple_pos="${this.elements.simplePos.value}"`;
this.elements.simplePos.value = '';
break;
default:
this.wordBuilder();
break;
}
this.tokenChipFactory(tokenQueryContent, tokenQueryText);
this.tokenChipFactory(conditionText, conditionQueryContent);
this.wordBuilder();
}
//#endregion Options to edit your token - Wildcard Charakter, Option Group, Incidence Modifiers, Ignore Case, 'and', 'or'
//#endregion Token Attribute Builder Functions
// ##########################################################################
// ############ Structural Attribute Builder Functions ######################
// ##########################################################################
//#region Structural Attribute Builder Functions
addSentence() {
this.hideEverything();
if (this.elements.sentence.text === 'End Sentence') {
this.queryChipFactory('end-sentence', 'Sentence End', '</s>');
this.elements.sentence.innerHTML = 'Sentence';
} else {
this.queryChipFactory('start-sentence', 'Sentence Start', '<s>');
this.elements.queryContent.push('sentence');
this.elements.sentence.innerHTML = 'End Sentence';
}
}
addEntity() {
if (this.elements.entity.text === 'End Entity') {
let queryText;
if (this.elements.entityAnyType === false) {
queryText = '</ent_type>';
} else {
queryText = '</ent>';
}
this.queryChipFactory('end-entity', 'Entity End', queryText);
this.elements.entity.innerHTML = 'Entity';
} else {
this.hideEverything();
this.elements.entityBuilder.classList.remove('hide');
window.location.href = '#entity-builder';
}
}
englishEntTypeHandler() {
this.queryChipFactory('start-entity', 'Entity Type=' + this.elements.englishEntType.value, '<ent_type="' + this.elements.englishEntType.value + '">');
this.elements.entity.innerHTML = 'End Entity';
this.hideEverything();
this.elements.entityAnyType = false;
// Resets materialize select dropdown
let SelectInstance = M.FormSelect.getInstance(this.elements.englishEntType);
SelectInstance.input.value = 'English ent_type';
this.elements.englishEntType.value = 'default';
}
germanEntTypeHandler() {
this.queryChipFactory('start-entity', 'Entity Type=' + this.elements.germanEntType.value, '<ent_type="' + this.elements.germanEntType.value + '">');
this.elements.entity.innerHTML = 'End Entity';
this.hideEverything();
this.elements.entityAnyType = false;
// Resets materialize select dropdown
let SelectInstance = M.FormSelect.getInstance(this.elements.germanEntType);
SelectInstance.input.value = 'German ent_type';
this.elements.germanEntType.value = 'default';
}
emptyEntityButton() {
this.queryChipFactory('start-empty-entity', 'Entity Start', '<ent>');
this.elements.entity.innerHTML = 'End Entity';
this.hideEverything();
this.elements.entityAnyType = true;
}
addTextAnnotation() {
this.hideEverything();
this.elements.textAnnotationBuilder.classList.remove('hide');
window.location.href = '#text-annotation-builder';
// Resets materialize select dropdown
let SelectInstance = M.FormSelect.getInstance(this.elements.textAnnotationOptions);
SelectInstance.input.value = 'address';
this.elements.textAnnotationOptions.value = 'address';
this.elements.textAnnotationInput.value= '';
}
textAnnotationSubmitHandler() {
if (this.elements.textAnnotationInput.value === '') {
this.elements.textAnnotationSubmit.classList.add('red');
this.elements.noValueMetadataMessage.classList.remove('hide');
setTimeout(() => {
this.elements.textAnnotationSubmit.classList.remove('red');
}, 500);
setTimeout(() => {
this.elements.noValueMetadataMessage.classList.add('hide');
}, 3000);
} else {
let queryText = `:: match.text_${this.elements.textAnnotationOptions.value}="${this.elements.textAnnotationInput.value}"`;
this.queryChipFactory('text-annotation', `${this.elements.textAnnotationOptions.value}=${this.elements.textAnnotationInput.value}`, queryText);
this.hideEverything();
}
}
//#endregion Structural Attribute Builder Functions
}

View File

@ -1,18 +0,0 @@
class CreateContributionForm extends Form {
static autoInit() {
let createContributionFormElements = document.querySelectorAll('.create-contribution-form');
for (let createContributionFormElement of createContributionFormElements) {
new CreateContributionForm(createContributionFormElement);
}
}
constructor(formElement) {
super(formElement);
this.addEventListener('requestLoad', (event) => {
if (event.target.status === 201) {
window.location.href = event.target.getResponseHeader('Location');
}
});
}
}

View File

@ -1,18 +0,0 @@
class CreateCorpusFileForm extends Form {
static autoInit() {
let createCorpusFileFormElements = document.querySelectorAll('.create-corpus-file-form');
for (let createCorpusFileFormElement of createCorpusFileFormElements) {
new CreateCorpusFileForm(createCorpusFileFormElement);
}
}
constructor(formElement) {
super(formElement);
this.addEventListener('requestLoad', (event) => {
if (event.target.status === 201) {
window.location.href = event.target.getResponseHeader('Location');
}
});
}
}

View File

@ -1,20 +0,0 @@
/*****************************************************************************
* Admin *
* Fetch requests for /admin routes *
*****************************************************************************/
Requests.admin = {};
Requests.admin.users = {};
Requests.admin.users.entity = {};
Requests.admin.users.entity.confirmed = {};
Requests.admin.users.entity.confirmed.update = (userId, value) => {
let input = `/admin/users/${userId}/confirmed`;
let init = {
method: 'PUT',
body: JSON.stringify(value)
};
return Requests.JSONfetch(input, init);
};

View File

@ -1,5 +0,0 @@
/*****************************************************************************
* Contributions *
* Fetch requests for /contributions routes *
*****************************************************************************/
Requests.contributions = {};

View File

@ -1,26 +0,0 @@
/*****************************************************************************
* SpaCy NLP Pipeline Models *
* Fetch requests for /contributions/spacy-nlp-pipeline-models routes *
*****************************************************************************/
Requests.contributions.spacy_nlp_pipeline_models = {};
Requests.contributions.spacy_nlp_pipeline_models.entity = {};
Requests.contributions.spacy_nlp_pipeline_models.entity.delete = (spacyNlpPipelineModelId) => {
let input = `/contributions/spacy-nlp-pipeline-models/${spacyNlpPipelineModelId}`;
let init = {
method: 'DELETE'
};
return Requests.JSONfetch(input, init);
};
Requests.contributions.spacy_nlp_pipeline_models.entity.isPublic = {};
Requests.contributions.spacy_nlp_pipeline_models.entity.isPublic.update = (spacyNlpPipelineModelId, value) => {
let input = `/contributions/spacy-nlp-pipeline-models/${spacyNlpPipelineModelId}/is_public`;
let init = {
method: 'PUT',
body: JSON.stringify(value)
};
return Requests.JSONfetch(input, init);
};

View File

@ -1,26 +0,0 @@
/*****************************************************************************
* Tesseract OCR Pipeline Models *
* Fetch requests for /contributions/tesseract-ocr-pipeline-models routes *
*****************************************************************************/
Requests.contributions.tesseract_ocr_pipeline_models = {};
Requests.contributions.tesseract_ocr_pipeline_models.entity = {};
Requests.contributions.tesseract_ocr_pipeline_models.entity.delete = (tesseractOcrPipelineModelId) => {
let input = `/contributions/tesseract-ocr-pipeline-models/${tesseractOcrPipelineModelId}`;
let init = {
method: 'DELETE'
};
return Requests.JSONfetch(input, init);
};
Requests.contributions.tesseract_ocr_pipeline_models.entity.isPublic = {};
Requests.contributions.tesseract_ocr_pipeline_models.entity.isPublic.update = (tesseractOcrPipelineModelId, value) => {
let input = `/contributions/tesseract-ocr-pipeline-models/${tesseractOcrPipelineModelId}/is_public`;
let init = {
method: 'PUT',
body: JSON.stringify(value)
};
return Requests.JSONfetch(input, init);
};

View File

@ -1,46 +0,0 @@
/*****************************************************************************
* Corpora *
* Fetch requests for /corpora routes *
*****************************************************************************/
Requests.corpora = {};
Requests.corpora.entity = {};
Requests.corpora.entity.delete = (corpusId) => {
let input = `/corpora/${corpusId}`;
let init = {
method: 'DELETE'
};
return Requests.JSONfetch(input, init);
};
Requests.corpora.entity.build = (corpusId) => {
let input = `/corpora/${corpusId}/build`;
let init = {
method: 'POST',
};
return Requests.JSONfetch(input, init);
};
Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => {
let input = `/corpora/${corpusId}/generate-share-link`;
let init = {
method: 'POST',
body: JSON.stringify({role: role, expiration: expiration})
};
return Requests.JSONfetch(input, init);
};
Requests.corpora.entity.isPublic = {};
Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
let input = `/corpora/${corpusId}/is_public`;
let init = {
method: 'PUT',
body: JSON.stringify(isPublic)
};
return Requests.JSONfetch(input, init);
};

View File

@ -1,15 +0,0 @@
/*****************************************************************************
* Corpora *
* Fetch requests for /corpora/<entity>/files routes *
*****************************************************************************/
Requests.corpora.entity.files = {};
Requests.corpora.entity.files.ent = {};
Requests.corpora.entity.files.ent.delete = (corpusId, corpusFileId) => {
let input = `/corpora/${corpusId}/files/${corpusFileId}`;
let init = {
method: 'DELETE',
};
return Requests.JSONfetch(input, init);
};

View File

@ -1,35 +0,0 @@
/*****************************************************************************
* Corpora *
* Fetch requests for /corpora/<entity>/followers routes *
*****************************************************************************/
Requests.corpora.entity.followers = {};
Requests.corpora.entity.followers.add = (corpusId, usernames) => {
let input = `/corpora/${corpusId}/followers`;
let init = {
method: 'POST',
body: JSON.stringify(usernames)
};
return Requests.JSONfetch(input, init);
};
Requests.corpora.entity.followers.entity = {};
Requests.corpora.entity.followers.entity.delete = (corpusId, followerId) => {
let input = `/corpora/${corpusId}/followers/${followerId}`;
let init = {
method: 'DELETE',
};
return Requests.JSONfetch(input, init);
};
Requests.corpora.entity.followers.entity.role = {};
Requests.corpora.entity.followers.entity.role.update = (corpusId, followerId, value) => {
let input = `/corpora/${corpusId}/followers/${followerId}/role`;
let init = {
method: 'PUT',
body: JSON.stringify(value)
};
return Requests.JSONfetch(input, init);
};

View File

@ -1,31 +0,0 @@
/*****************************************************************************
* Jobs *
* Fetch requests for /jobs routes *
*****************************************************************************/
Requests.jobs = {};
Requests.jobs.entity = {};
Requests.jobs.entity.delete = (jobId) => {
let input = `/jobs/${jobId}`;
let init = {
method: 'DELETE'
};
return Requests.JSONfetch(input, init);
}
Requests.jobs.entity.log = (jobId) => {
let input = `/jobs/${jobId}/log`;
let init = {
method: 'GET'
};
return Requests.JSONfetch(input, init);
}
Requests.jobs.entity.restart = (jobId) => {
let input = `/jobs/${jobId}/restart`;
let init = {
method: 'POST'
};
return Requests.JSONfetch(input, init);
}

View File

@ -1,17 +0,0 @@
/*****************************************************************************
* Settings *
* Fetch requests for /users/<entity>/settings routes *
*****************************************************************************/
Requests.users.entity.settings = {};
Requests.users.entity.settings.profilePrivacy = {};
Requests.users.entity.settings.profilePrivacy.update = (userId, profilePrivacySetting, enabled) => {
let input = `/users/${userId}/settings/profile-privacy/${profilePrivacySetting}`;
let init = {
method: 'PUT',
body: JSON.stringify(enabled)
};
return Requests.JSONfetch(input, init);
};

View File

@ -1,35 +0,0 @@
/*****************************************************************************
* Users *
* Fetch requests for /users routes *
*****************************************************************************/
Requests.users = {};
Requests.users.entity = {};
Requests.users.entity.delete = (userId) => {
let input = `/users/${userId}`;
let init = {
method: 'DELETE'
};
return Requests.JSONfetch(input, init);
};
Requests.users.entity.acceptTermsOfUse = () => {
let input = `/users/accept-terms-of-use`;
let init = {
method: 'POST'
};
return Requests.JSONfetch(input, init);
};
Requests.users.entity.avatar = {};
Requests.users.entity.avatar.delete = (userId) => {
let input = `/users/${userId}/avatar`;
let init = {
method: 'DELETE'
};
return Requests.JSONfetch(input, init);
}

204
app/static/js/app.js Normal file
View File

@ -0,0 +1,204 @@
nopaque.App = class App {
constructor() {
this.data = {
promises: {getUser: {}, subscribeUser: {}},
users: {},
};
this.socket = io({transports: ['websocket'], upgrade: false});
this.socket.on('PATCH', (patch) => {this.onPatch(patch);});
}
getUser(userId) {
if (userId in this.data.promises.getUser) {
return this.data.promises.getUser[userId];
}
this.data.promises.getUser[userId] = new Promise((resolve, reject) => {
this.socket.emit('GET /users/<user_id>', userId, (response) => {
if (response.status === 200) {
this.data.users[userId] = response.body;
resolve(this.data.users[userId]);
} else {
reject(`[${response.status}] ${response.statusText}`);
}
});
});
return this.data.promises.getUser[userId];
}
subscribeUser(userId) {
if (userId in this.data.promises.subscribeUser) {
return this.data.promises.subscribeUser[userId];
}
this.data.promises.subscribeUser[userId] = new Promise((resolve, reject) => {
this.socket.emit('SUBSCRIBE /users/<user_id>', userId, (response) => {
if (response.status !== 200) {
reject(response);
return;
}
resolve(response);
});
});
return this.data.promises.subscribeUser[userId];
}
flash(message, category) {
let iconPrefix = '';
switch (category) {
case 'corpus': {
iconPrefix = '<i class="left material-icons">book</i>';
break;
}
case 'error': {
iconPrefix = '<i class="error-color-text left material-icons">error</i>';
break;
}
case 'job': {
iconPrefix = '<i class="left nopaque-icons">J</i>';
break;
}
case 'settings': {
iconPrefix = '<i class="left material-icons">settings</i>';
break;
}
default: {
iconPrefix = '<i class="left material-icons">notifications</i>';
break;
}
}
let toast = M.toast(
{
html: `
<span>${iconPrefix}${message}</span>
<button class="action-button btn-flat toast-action white-text" data-action="close">
<i class="material-icons">close</i>
</button>
`.trim()
}
);
let toastCloseActionElement = toast.el.querySelector('.action-button[data-action="close"]');
toastCloseActionElement.addEventListener('click', () => {toast.dismiss();});
}
onPatch(patch) {
// Filter Patch to only include operations on users that are initialized
let regExp = new RegExp(`^/users/(${Object.keys(this.data.users).join('|')})`);
let filteredPatch = patch.filter(operation => regExp.test(operation.path));
// Handle job status updates
let subRegExp = new RegExp(`^/users/([A-Za-z0-9]*)/jobs/([A-Za-z0-9]*)/status$`);
let subFilteredPatch = filteredPatch
.filter((operation) => {return operation.op === 'replace';})
.filter((operation) => {return subRegExp.test(operation.path);});
for (let operation of subFilteredPatch) {
let [match, userId, jobId] = operation.path.match(subRegExp);
this.flash(`[<a href="/jobs/${jobId}">${this.data.users[userId].jobs[jobId].title}</a>] New status: <span class="job-status-text" data-status="${operation.value}"></span>`, 'job');
}
// Apply Patch
jsonpatch.applyPatch(this.data, filteredPatch);
}
init() {
this.initUi();
}
initUi() {
/* Pre-Initialization fixes */
// #region
// Flask-WTF sets the standard HTML maxlength Attribute on input/textarea
// elements to specify their maximum length (in characters). Unfortunatly
// Materialize won't recognize the maxlength Attribute, instead it uses
// the data-length Attribute. It's conversion time :)
for (let elem of document.querySelectorAll('input[maxlength], textarea[maxlength]')) {
elem.dataset.length = elem.getAttribute('maxlength');
elem.removeAttribute('maxlength');
}
// To work around some limitations with the Form setup of Flask-WTF.
// HTML option elements with an empty value are considered as placeholder
// elements. The user should not be able to actively select these options.
// So they get the disabled attribute.
for (let optionElement of document.querySelectorAll('option[value=""]')) {
optionElement.disabled = true;
}
// TODO: Check why we are doing this.
for (let optgroupElement of document.querySelectorAll('optgroup[label=""]')) {
for (let c of optgroupElement.children) {
optgroupElement.parentElement.insertAdjacentElement('afterbegin', c);
}
optgroupElement.remove();
}
// #endregion
/* Initialize Materialize Components */
// #region
// Automatically initialize Materialize Components that do not require
// additional configuration.
M.AutoInit();
// CharacterCounters
// Materialize didn't include the CharacterCounter plugin within the
// AutoInit method (maybe they forgot it?). Anyway... We do it here. :)
M.CharacterCounter.init(document.querySelectorAll('input[data-length]:not(.no-autoinit), textarea[data-length]:not(.no-autoinit)'));
// Header navigation "more" Dropdown.
M.Dropdown.init(
document.querySelector('#nav-more-dropdown-trigger'),
{
alignment: 'right',
constrainWidth: false,
coverTrigger: false
}
);
// Manual modal
M.Modal.init(
document.querySelector('#manual-modal'),
{
onOpenStart: (modalElement, modalTriggerElement) => {
if ('manualModalChapter' in modalTriggerElement.dataset) {
let manualModalTocElement = document.querySelector('#manual-modal-toc');
let manualModalToc = M.Tabs.getInstance(manualModalTocElement);
manualModalToc.select(modalTriggerElement.dataset.manualModalChapter);
// TODO: Make this work.
// if ('manualModalChapterAnchor' in modalTriggerElement.dataset) {
// let manualModalChapterAnchor = document.querySelector(`#${modalTriggerElement.dataset.manualModalChapterAnchor}`);
// let xCoord = manualModalChapterAnchor.getBoundingClientRect().left;
// let yCoord = manualModalChapterAnchor.getBoundingClientRect().top;
// let modalContentElement = modalElement.querySelector('.modal-content');
// modalContentElement.scroll(xCoord, yCoord);
// }
}
}
}
);
// Terms of use modal
M.Modal.init(
document.querySelector('#terms-of-use-modal'),
{
dismissible: false,
onCloseEnd: (modalElement) => {
nopaque.requests.users.entity.acceptTermsOfUse();
}
}
);
// #endregion
/* Initialize nopaque Components */
// #region
nopaque.resource_displays.AutoInit();
nopaque.resource_lists.AutoInit();
nopaque.forms.AutoInit();
// #endregion
}
};

View File

@ -0,0 +1,119 @@
nopaque.corpus_analysis.App = class App {
constructor(corpusId) {
this.corpusId = corpusId;
this.data = {};
// HTML elements
this.elements = {
container: document.querySelector('#corpus-analysis-container'),
extensionCards: document.querySelector('#corpus-analysis-extension-cards'),
extensionTabs: document.querySelector('#corpus-analysis-extension-tabs'),
initModal: document.querySelector('#corpus-analysis-init-modal')
};
// Materialize elements
this.elements.m = {
extensionTabs: M.Tabs.init(this.elements.extensionTabs),
initModal: M.Modal.init(this.elements.initModal, {dismissible: false})
};
this.extensions = {};
this.settings = {};
}
async init() {
this.disableActionElements();
this.elements.m.initModal.open();
try {
// Setup CQi over SocketIO connection and gather data from the CQPServer
const statusTextElement = this.elements.initModal.querySelector('.status-text');
statusTextElement.innerText = 'Creating CQi over SocketIO client...';
const cqiClient = new nopaque.corpus_analysis.cqi.Client('/cqi_over_sio');
statusTextElement.innerText += ' Done';
statusTextElement.innerHTML = 'Waiting for the CQP server...';
const response = await cqiClient.api.socket.emitWithAck('init', this.corpusId);
if (response.code !== 200) {throw new Error();}
statusTextElement.innerText += ' Done';
statusTextElement.innerHTML = 'Connecting to the CQP server...';
await cqiClient.connect('anonymous', '');
statusTextElement.innerText += ' Done';
statusTextElement.innerHTML = 'Building and receiving corpus data cache from the server (This may take a while)...';
const cqiCorpus = await cqiClient.corpora.get(`NOPAQUE-${this.corpusId.toUpperCase()}`);
statusTextElement.innerText += ' Done';
// TODO: Don't do this hgere
await cqiCorpus.updateDb();
this.data.cqiClient = cqiClient;
this.data.cqiCorpus = cqiCorpus;
this.data.corpus = {o: cqiCorpus}; // legacy
// Initialize extensions
for (const extension of Object.values(this.extensions)) {
statusTextElement.innerHTML = `Initializing ${extension.name} extension...`;
await extension.init();
statusTextElement.innerText += ' Done'
}
} catch (error) {
let errorString = '';
if ('code' in error && error.code !== undefined && error.code !== null) {
errorString += `[${error.code}] `;
}
errorString += `${error.constructor.name}`;
if ('description' in error && error.description !== undefined && error.description !== null) {
errorString += `: ${error.description}`;
}
const errorsElement = this.elements.initModal.querySelector('.errors');
const progressElement = this.elements.initModal.querySelector('.progress');
errorsElement.innerText = errorString;
errorsElement.classList.remove('hide');
progressElement.classList.add('hide');
return;
}
for (const extensionSelectorElement of this.elements.extensionCards.querySelectorAll('.extension-selector')) {
extensionSelectorElement.addEventListener('click', () => {
this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target);
});
}
this.enableActionElements();
this.elements.m.initModal.close();
}
registerExtension(extension) {
if (extension.name in this.extensions) {return;}
this.extensions[extension.name] = extension;
}
disableActionElements() {
const actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
for (const actionElement of actionElements) {
switch(actionElement.nodeName) {
case 'INPUT':
actionElement.disabled = true;
break;
case 'SELECT':
actionElement.parentNode.querySelector('input.select-dropdown').disabled = true;
break;
default:
actionElement.classList.add('disabled');
}
}
}
enableActionElements() {
const actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
for (const actionElement of actionElements) {
switch(actionElement.nodeName) {
case 'INPUT':
actionElement.disabled = false;
break;
case 'SELECT':
actionElement.parentNode.querySelector('input.select-dropdown').disabled = false;
break;
default:
actionElement.classList.remove('disabled');
}
}
}
}

View File

@ -1,4 +1,4 @@
class CorpusAnalysisConcordance { nopaque.corpus_analysis.ConcordanceExtension = class ConcordanceExtension {
name = 'Concordance'; name = 'Concordance';
constructor(app) { constructor(app) {
@ -7,56 +7,50 @@ class CorpusAnalysisConcordance {
this.data = {}; this.data = {};
this.elements = { this.elements = {
// TODO: Prefix elements with "corpus-analysis-app-" container: document.querySelector(`#corpus-analysis-concordance-container`),
container: document.querySelector('#concordance-extension-container'), error: document.querySelector(`#corpus-analysis-concordance-error`),
error: document.querySelector('#concordance-extension-error'), userInterfaceForm: document.querySelector(`#corpus-analysis-concordance-user-interface-form`),
form: document.querySelector('#concordance-extension-form'), expertModeForm: document.querySelector(`#corpus-analysis-concordance-expert-mode-form`),
progress: document.querySelector('#concordance-extension-progress'), queryBuilderForm: document.querySelector(`#corpus-analysis-concordance-query-builder-form`),
subcorpusInfo: document.querySelector('#concordance-extension-subcorpus-info'), progress: document.querySelector(`#corpus-analysis-concordance-progress`),
subcorpusActions: document.querySelector('#concordance-extension-subcorpus-actions'), subcorpusInfo: document.querySelector(`#corpus-analysis-concordance-subcorpus-info`),
subcorpusItems: document.querySelector('#concordance-extension-subcorpus-items'), subcorpusActions: document.querySelector(`#corpus-analysis-concordance-subcorpus-actions`),
subcorpusList: document.querySelector('#concordance-extension-subcorpus-list'), subcorpusItems: document.querySelector(`#corpus-analysis-concordance-subcorpus-items`),
subcorpusPagination: document.querySelector('#concordance-extension-subcorpus-pagination') subcorpusList: document.querySelector(`#corpus-analysis-concordance-subcorpus-list`),
subcorpusPagination: document.querySelector(`#corpus-analysis-concordance-subcorpus-pagination`)
}; };
this.settings = { this.settings = {
context: parseInt(this.elements.form['context'].value), context: parseInt(this.elements.userInterfaceForm['context'].value),
perPage: parseInt(this.elements.form['per-page'].value), perPage: parseInt(this.elements.userInterfaceForm['per-page'].value),
selectedSubcorpus: undefined, selectedSubcorpus: undefined,
textStyle: parseInt(this.elements.form['text-style'].value), textStyle: parseInt(this.elements.userInterfaceForm['text-style'].value),
tokenRepresentation: this.elements.form['token-representation'].value tokenRepresentation: this.elements.userInterfaceForm['token-representation'].value
}; };
this.app.registerExtension(this); this.app.registerExtension(this);
} }
init() { async submitForm(queryModeId) {
// Init data
this.data.corpus = this.app.data.corpus;
this.data.subcorpora = {};
// Add event listeners
this.elements.form.addEventListener('submit', event => {
event.preventDefault();
this.app.disableActionElements(); this.app.disableActionElements();
let query = this.elements.form.query.value.trim(); let queryBuilderQuery = nopaque.Utils.unescape(document.querySelector('#corpus-analysis-concordance-query-preview').innerHTML.trim());
let subcorpusName = this.elements.form['subcorpus-name'].value; let expertModeQuery = this.elements.expertModeForm.query.value.trim();
let query = queryModeId === 'corpus-analysis-concordance-expert-mode-form' ? expertModeQuery : queryBuilderQuery;
let form = queryModeId === 'corpus-analysis-concordance-expert-mode-form' ? this.elements.expertModeForm : this.elements.queryBuilderForm;
let subcorpusName = form['subcorpus-name'].value;
this.elements.error.innerText = ''; this.elements.error.innerText = '';
this.elements.error.classList.add('hide'); this.elements.error.classList.add('hide');
this.elements.progress.classList.remove('hide'); this.elements.progress.classList.remove('hide');
let subcorpus = {}; try {
this.data.corpus.o.query(subcorpusName, query) const subcorpus = {};
.then(cQiStatus => {
subcorpus.q = query; subcorpus.q = query;
subcorpus.selectedItems = new Set(); subcorpus.selectedItems = new Set();
await this.data.corpus.o.query(subcorpusName, query);
if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;} if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;}
return this.data.corpus.o.subcorpora.get(subcorpusName); const cqiSubcorpus = await this.data.corpus.o.subcorpora.get(subcorpusName);
}) subcorpus.o = cqiSubcorpus;
.then(cQiSubcorpus => { const paginatedSubcorpus = await cqiSubcorpus.paginate(this.settings.context, 1, this.settings.perPage);
subcorpus.o = cQiSubcorpus;
return cQiSubcorpus.paginate(1, this.settings.perPage, this.settings.context);
})
.then(
paginatedSubcorpus => {
subcorpus.p = paginatedSubcorpus; subcorpus.p = paginatedSubcorpus;
this.data.subcorpora[subcorpusName] = subcorpus; this.data.subcorpora[subcorpusName] = subcorpus;
this.settings.selectedSubcorpus = subcorpusName; this.settings.selectedSubcorpus = subcorpusName;
@ -66,34 +60,46 @@ class CorpusAnalysisConcordance {
this.renderSubcorpusItems(); this.renderSubcorpusItems();
this.renderSubcorpusPagination(); this.renderSubcorpusPagination();
this.elements.progress.classList.add('hide'); this.elements.progress.classList.add('hide');
this.app.enableActionElements(); } catch (error) {
}, let errorString = '';
cQiError => { if ('code' in error) {errorString += `[${error.code}] `;}
this.elements.error.innerText = JSON.stringify(cQiError); errorString += `${error.constructor.name}`;
this.elements.error.innerText = errorString;
this.elements.error.classList.remove('hide'); this.elements.error.classList.remove('hide');
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) { app.flash(errorString, 'error');
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
}
this.elements.progress.classList.add('hide'); this.elements.progress.classList.add('hide');
}
this.app.enableActionElements(); this.app.enableActionElements();
} }
);
async init() {
// Init data
this.data.corpus = this.app.data.corpus;
this.data.subcorpora = {};
// Add event listeners
this.elements.expertModeForm.addEventListener('submit', (event) => {
event.preventDefault();
this.submitForm(this.elements.expertModeForm.id);
}); });
this.elements.form.addEventListener('change', event => { this.elements.queryBuilderForm.addEventListener('submit', (event) => {
if (event.target === this.elements.form['context']) { event.preventDefault();
this.settings.context = parseInt(this.elements.form['context'].value); this.submitForm(this.elements.queryBuilderForm.id);
this.elements.form.submit.click(); });
this.elements.userInterfaceForm.addEventListener('change', (event) => {
if (event.target === this.elements.userInterfaceForm['context']) {
this.settings.context = parseInt(this.elements.userInterfaceForm['context'].value);
this.submitForm();
} }
if (event.target === this.elements.form['per-page']) { if (event.target === this.elements.userInterfaceForm['per-page']) {
this.settings.perPage = parseInt(this.elements.form['per-page'].value); this.settings.perPage = parseInt(this.elements.userInterfaceForm['per-page'].value);
this.elements.form.submit.click(); this.submitForm();
} }
if (event.target === this.elements.form['text-style']) { if (event.target === this.elements.userInterfaceForm['text-style']) {
this.settings.textStyle = parseInt(this.elements.form['text-style'].value); this.settings.textStyle = parseInt(this.elements.userInterfaceForm['text-style'].value);
this.setTextStyle(); this.setTextStyle();
} }
if (event.target === this.elements.form['token-representation']) { if (event.target === this.elements.userInterfaceForm['token-representation']) {
this.settings.tokenRepresentation = this.elements.form['token-representation'].value; this.settings.tokenRepresentation = this.elements.userInterfaceForm['token-representation'].value;
this.setTokenRepresentation(); this.setTokenRepresentation();
} }
}); });
@ -162,14 +168,14 @@ class CorpusAnalysisConcordance {
</a> </a>
`.trim(); `.trim();
M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped')); M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped'));
this.elements.subcorpusActions.querySelector('.subcorpus-export-trigger').addEventListener('click', event => { this.elements.subcorpusActions.querySelector('.subcorpus-export-trigger').addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
let modalElementId = Utils.generateElementId('export-subcorpus-modal-'); let modalElementId = nopaque.Utils.generateElementId('export-subcorpus-modal-');
let exportFormatSelectElementId = Utils.generateElementId('export-format-select-'); let exportFormatSelectElementId = nopaque.Utils.generateElementId('export-format-select-');
let exportSelectedMatchesOnlyCheckboxElementId = Utils.generateElementId('export-selected-matches-only-checkbox-'); let exportSelectedMatchesOnlyCheckboxElementId = nopaque.Utils.generateElementId('export-selected-matches-only-checkbox-');
let exportFileNameInputElementId = Utils.generateElementId('export-file-name-input-'); let exportFileNameInputElementId = nopaque.Utils.generateElementId('export-file-name-input-');
let modalElement = Utils.HTMLToElement( let modalElement = nopaque.Utils.HTMLToElement(
` `
<div class="modal" id="${modalElementId}"> <div class="modal" id="${modalElementId}">
<div class="modal-content"> <div class="modal-content">
@ -219,7 +225,7 @@ class CorpusAnalysisConcordance {
} }
} }
); );
exportButton.addEventListener('click', event => { exportButton.addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
this.app.disableActionElements(); this.app.disableActionElements();
this.elements.progress.classList.remove('hide'); this.elements.progress.classList.remove('hide');
@ -236,12 +242,12 @@ class CorpusAnalysisConcordance {
app.flash('No matches selected', 'error'); app.flash('No matches selected', 'error');
return; return;
} }
promise = subcorpus.o.partial_export([...subcorpus.selectedItems], 50); promise = subcorpus.o.partialExport([...subcorpus.selectedItems], 50);
} else { } else {
promise = subcorpus.o.export(50); promise = subcorpus.o.export(50);
} }
promise.then( promise.then(
data => { (data) => {
let blob; let blob;
if (exportFormat === 'csv') { if (exportFormat === 'csv') {
let csvContent = 'sep=,\r\n'; let csvContent = 'sep=,\r\n';
@ -287,11 +293,11 @@ class CorpusAnalysisConcordance {
}); });
modal.open(); modal.open();
}); });
this.elements.subcorpusActions.querySelector('.subcorpus-delete-trigger').addEventListener('click', event => { this.elements.subcorpusActions.querySelector('.subcorpus-delete-trigger').addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus]; let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
subcorpus.o.drop().then( subcorpus.o.drop().then(
cQiStatus => { (cQiStatus) => {
app.flash(`${subcorpus.o.name} deleted`, 'corpus'); app.flash(`${subcorpus.o.name} deleted`, 'corpus');
delete this.data.subcorpora[subcorpus.o.name]; delete this.data.subcorpora[subcorpus.o.name];
this.settings.selectedSubcorpus = undefined; this.settings.selectedSubcorpus = undefined;
@ -312,8 +318,9 @@ class CorpusAnalysisConcordance {
this.clearSubcorpusPagination(); this.clearSubcorpusPagination();
} }
}, },
cQiError => { (cqiError) => {
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error'); let errorString = `${cqiError.code}: ${cqiError.constructor.name}`;
app.flash(errorString, 'error');
} }
); );
}); });
@ -362,7 +369,7 @@ class CorpusAnalysisConcordance {
this.setTextStyle(); this.setTextStyle();
this.setTokenRepresentation(); this.setTokenRepresentation();
for (let gotoReaderTriggerElement of this.elements.subcorpusItems.querySelectorAll('.goto-reader-trigger')) { for (let gotoReaderTriggerElement of this.elements.subcorpusItems.querySelectorAll('.goto-reader-trigger')) {
gotoReaderTriggerElement.addEventListener('click', event => { gotoReaderTriggerElement.addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
let corpusAnalysisReader = this.app.extensions.Reader; let corpusAnalysisReader = this.app.extensions.Reader;
let itemId = parseInt(gotoReaderTriggerElement.closest('.item').dataset.id); let itemId = parseInt(gotoReaderTriggerElement.closest('.item').dataset.id);
@ -380,11 +387,13 @@ class CorpusAnalysisConcordance {
document.getSelection().removeAllRanges(); document.getSelection().removeAllRanges();
document.getSelection().addRange(range); document.getSelection().addRange(range);
}); });
this.app.elements.m.extensionTabs.select('reader-extension-container'); this.app.elements.m.extensionTabs.select(
this.app.extensions.Reader.elements.container.id
);
}); });
} }
for (let selectTriggerElement of this.elements.subcorpusItems.querySelectorAll('.select-trigger')) { for (let selectTriggerElement of this.elements.subcorpusItems.querySelectorAll('.select-trigger')) {
selectTriggerElement.addEventListener('click', event => { selectTriggerElement.addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
let itemElement = selectTriggerElement.closest('.item'); let itemElement = selectTriggerElement.closest('.item');
let itemId = parseInt(itemElement.dataset.id); let itemId = parseInt(itemElement.dataset.id);
@ -446,14 +455,14 @@ class CorpusAnalysisConcordance {
</li> </li>
`.trim(); `.trim();
for (let paginationTriggerElement of this.elements.subcorpusPagination.querySelectorAll('.pagination-trigger[data-target]')) { for (let paginationTriggerElement of this.elements.subcorpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
paginationTriggerElement.addEventListener('click', event => { paginationTriggerElement.addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
this.app.disableActionElements(); this.app.disableActionElements();
this.elements.progress.classList.remove('hide'); this.elements.progress.classList.remove('hide');
let page = parseInt(paginationTriggerElement.dataset.target); let page = parseInt(paginationTriggerElement.dataset.target);
subcorpus.o.paginate(page, this.settings.perPage, this.settings.context) subcorpus.o.paginate(this.settings.context, page, this.settings.perPage)
.then( .then(
paginatedSubcorpus => { (paginatedSubcorpus) => {
subcorpus.p = paginatedSubcorpus; subcorpus.p = paginatedSubcorpus;
this.renderSubcorpusItems(); this.renderSubcorpusItems();
this.renderSubcorpusPagination(); this.renderSubcorpusPagination();

View File

@ -1,11 +1,16 @@
cqi.api.APIClient = class APIClient { nopaque.corpus_analysis.cqi.api.Client = class Client {
constructor(host, corpus_id, version = '0.1') { /**
* @param {string} host
* @param {number} [timeout=60] timeout
* @param {string} [version=0.1] version
*/
constructor(host, timeout = 60, version = '0.1') {
this.host = host; this.host = host;
this.timeout = timeout * 1000; // convert seconds to milliseconds
this.version = version; this.version = version;
this.socket = io( this.socket = io(
this.host, this.host,
{ {
auth: {corpus_id: corpus_id},
transports: ['websocket'], transports: ['websocket'],
upgrade: false upgrade: false
} }
@ -15,43 +20,43 @@ cqi.api.APIClient = class APIClient {
/** /**
* @param {string} fn_name * @param {string} fn_name
* @param {object} [fn_args={}] * @param {object} [fn_args={}]
* @returns {Promise<cqi.status.StatusConnectOk>} * @returns {Promise}
*/ */
#request(fn_name, fn_args = {}) { async #request(fn_name, fn_args = {}) {
return new Promise((resolve, reject) => { // TODO: implement timeout
this.socket.emit('cqi_client.api', {fn_name: fn_name, fn_args: fn_args}, (response) => { let response = await this.socket.emitWithAck('exec', fn_name, fn_args);
if (response.code === 200) { if (response.code === 200) {
resolve(response.payload); return response.payload;
} else if (response.code === 500) {
throw new Error(`[${response.code}] ${response.msg}`);
} else if (response.code === 502) {
if (response.payload.code in nopaque.corpus_analysis.cqi.errors.lookup) {
throw new nopaque.corpus_analysis.cqi.errors.lookup[response.payload.code]();
} else {
throw new nopaque.corpus_analysis.cqi.errors.CQiError();
} }
if (response.code === 500) {
reject(new Error(`[${response.code}] ${response.msg}`));
} }
if (response.code === 502) {
reject(new cqi.errors.lookup[response.payload.code]());
}
});
});
} }
/** /**
* @param {string} username * @param {string} username
* @param {string} password * @param {string} password
* @returns {Promise<cqi.status.StatusConnectOk>} * @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusConnectOk>}
*/ */
async ctrl_connect(username, password) { async ctrl_connect(username, password) {
const fn_name = 'ctrl_connect'; const fn_name = 'ctrl_connect';
const fn_args = {username: username, password: password}; const fn_args = {username: username, password: password};
let payload = await this.#request(fn_name, fn_args); let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code](); return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
} }
/** /**
* @returns {Promise<cqi.status.StatusByeOk>} * @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusByeOk>}
*/ */
async ctrl_bye() { async ctrl_bye() {
const fn_name = 'ctrl_bye'; const fn_name = 'ctrl_bye';
let payload = await this.#request(fn_name); let payload = await this.#request(fn_name);
return new cqi.status.lookup[payload.code](); return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
} }
/** /**
@ -63,12 +68,12 @@ cqi.api.APIClient = class APIClient {
} }
/** /**
* @returns {Promise<cqi.status.StatusPingOk>} * @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusPingOk>}
*/ */
async ctrl_ping() { async ctrl_ping() {
const fn_name = 'ctrl_ping'; const fn_name = 'ctrl_ping';
let payload = await this.#request(fn_name); let payload = await this.#request(fn_name);
return new cqi.status.lookup[payload.code](); return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
} }
/** /**
@ -203,13 +208,13 @@ cqi.api.APIClient = class APIClient {
* try to unload a corpus and all its attributes from memory * try to unload a corpus and all its attributes from memory
* *
* @param {string} corpus * @param {string} corpus
* @returns {Promise<cqi.status.StatusOk>} * @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
*/ */
async corpus_drop_corpus(corpus) { async corpus_drop_corpus(corpus) {
const fn_name = 'corpus_drop_corpus'; const fn_name = 'corpus_drop_corpus';
const fn_args = {corpus: corpus}; const fn_args = {corpus: corpus};
let payload = await this.#request(fn_name, fn_args); let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code](); return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
} }
/** /**
@ -245,13 +250,13 @@ cqi.api.APIClient = class APIClient {
* unload attribute from memory * unload attribute from memory
* *
* @param {string} attribute * @param {string} attribute
* @returns {Promise<cqi.status.StatusOk>} * @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
*/ */
async cl_drop_attribute(attribute) { async cl_drop_attribute(attribute) {
const fn_name = 'cl_drop_attribute'; const fn_name = 'cl_drop_attribute';
const fn_args = {attribute: attribute}; const fn_args = {attribute: attribute};
let payload = await this.#request(fn_name, fn_args); let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code](); return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
} }
/** /**
@ -477,13 +482,13 @@ cqi.api.APIClient = class APIClient {
* @param {string} mother_corpus * @param {string} mother_corpus
* @param {string} subcorpus_name * @param {string} subcorpus_name
* @param {string} query * @param {string} query
* @returns {Promise<cqi.status.StatusOk>} * @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
*/ */
async cqp_query(mother_corpus, subcorpus_name, query) { async cqp_query(mother_corpus, subcorpus_name, query) {
const fn_name = 'cqp_query'; const fn_name = 'cqp_query';
const fn_args = {mother_corpus: mother_corpus, subcorpus_name: subcorpus_name, query: query}; const fn_args = {mother_corpus: mother_corpus, subcorpus_name: subcorpus_name, query: query};
let payload = await this.#request(fn_name, fn_args); let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code](); return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
} }
/** /**
@ -519,7 +524,7 @@ cqi.api.APIClient = class APIClient {
/** /**
* Dump the values of <field> for match ranges <first> .. <last> * Dump the values of <field> for match ranges <first> .. <last>
* in <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants. * in <subcorpus>. <field> is one of the nopaque.corpus_analysis.cqi.constants.FIELD_* constants.
* *
* @param {string} subcorpus * @param {string} subcorpus
* @param {number} field * @param {number} field
@ -537,13 +542,13 @@ cqi.api.APIClient = class APIClient {
* delete a subcorpus from memory * delete a subcorpus from memory
* *
* @param {string} subcorpus * @param {string} subcorpus
* @returns {Promise<cqi.status.StatusOk>} * @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
*/ */
async cqp_drop_subcorpus(subcorpus) { async cqp_drop_subcorpus(subcorpus) {
const fn_name = 'cqp_drop_subcorpus'; const fn_name = 'cqp_drop_subcorpus';
const fn_args = {subcorpus: subcorpus}; const fn_args = {subcorpus: subcorpus};
let payload = await this.#request(fn_name, fn_args); let payload = await this.#request(fn_name, fn_args);
return new cqi.status.lookup[payload.code](); return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
} }
/** /**
@ -556,9 +561,9 @@ cqi.api.APIClient = class APIClient {
* *
* returns <n> (id, frequency) pairs flattened into a list of size 2*<n> * returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
* field is one of * field is one of
* - CQI_CONST_FIELD_MATCH * - nopaque.corpus_analysis.cqi.constants.FIELD_MATCH
* - CQI_CONST_FIELD_TARGET * - nopaque.corpus_analysis.cqi.constants.FIELD_TARGET
* - CQI_CONST_FIELD_KEYWORD * - nopaque.corpus_analysis.cqi.constants.FIELD_KEYWORD
* *
* NB: pairs are sorted by frequency desc. * NB: pairs are sorted by frequency desc.
* *
@ -595,4 +600,89 @@ cqi.api.APIClient = class APIClient {
const fn_args = {subcorpus: subcorpus, cutoff: cutoff, field1: field1, attribute1: attribute1, field2: field2, attribute2: attribute2}; const fn_args = {subcorpus: subcorpus, cutoff: cutoff, field1: field1, attribute1: attribute1, field2: field2, attribute2: attribute2};
return await this.#request(fn_name, fn_args); return await this.#request(fn_name, fn_args);
} }
/**************************************************************************
* NOTE: The following is not included in the CQi specification. *
**************************************************************************/
/**************************************************************************
* Custom additions for nopaque *
**************************************************************************/
/**
* @param {string} corpus
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
*/
async ext_corpus_update_db(corpus) {
const fn_name = 'ext_corpus_update_db';
const fn_args = {corpus: corpus};
let payload = await this.#request(fn_name, fn_args);
return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
}
/**
* @param {string} corpus
* @returns {Promise<object>}
*/
async ext_corpus_static_data(corpus) {
const fn_name = 'ext_corpus_static_data';
const fn_args = {corpus: corpus};
let compressedEncodedData = await this.#request(fn_name, fn_args);
let data = pako.inflate(compressedEncodedData, {to: 'string'});
return JSON.parse(data);
}
/**
* @param {string} corpus
* @param {number=} page
* @param {number=} per_page
* @returns {Promise<object>}
*/
async ext_corpus_paginate_corpus(corpus, page, per_page) {
const fn_name = 'ext_corpus_paginate_corpus';
const fn_args = {corpus: corpus}
if (page !== undefined) {fn_args.page = page;}
if (per_page !== undefined) {fn_args.per_page = per_page;}
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} subcorpus
* @param {number=} context
* @param {number=} page
* @param {number=} per_page
* @returns {Promise<object>}
*/
async ext_cqp_paginate_subcorpus(subcorpus, context, page, per_page) {
const fn_name = 'ext_cqp_paginate_subcorpus';
const fn_args = {subcorpus: subcorpus}
if (context !== undefined) {fn_args.context = context;}
if (page !== undefined) {fn_args.page = page;}
if (per_page !== undefined) {fn_args.per_page = per_page;}
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} subcorpus
* @param {number[]} match_id_list
* @param {number=} context
* @returns {Promise<object>}
*/
async ext_cqp_partial_export_subcorpus(subcorpus, match_id_list, context) {
const fn_name = 'ext_cqp_partial_export_subcorpus';
const fn_args = {subcorpus: subcorpus, match_id_list: match_id_list};
if (context !== undefined) {fn_args.context = context;}
return await this.#request(fn_name, fn_args);
}
/**
* @param {string} subcorpus
* @param {number=} context
* @returns {Promise<object>}
*/
async ext_cqp_export_subcorpus(subcorpus, context) {
const fn_name = 'ext_cqp_export_subcorpus';
const fn_args = {subcorpus: subcorpus};
if (context !== undefined) {fn_args.context = context;}
return await this.#request(fn_name, fn_args);
}
}; };

View File

@ -0,0 +1 @@
nopaque.corpus_analysis.cqi.api = {};

View File

@ -0,0 +1,57 @@
nopaque.corpus_analysis.cqi.Client = class Client {
/**
* @param {string} host
* @param {number} [timeout=60] timeout
* @param {string} [version=0.1] version
*/
constructor(host, timeout = 60, version = '0.1') {
/** @type {nopaque.corpus_analysis.cqi.api.Client} */
this.api = new nopaque.corpus_analysis.cqi.api.Client(host, timeout, version);
}
/**
* @returns {nopaque.corpus_analysis.cqi.models.corpora.CorpusCollection}
*/
get corpora() {
return new nopaque.corpus_analysis.cqi.models.corpora.CorpusCollection(this);
}
/**
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusByeOk>}
*/
async bye() {
return await this.api.ctrl_bye();
}
/**
* @param {string} username
* @param {string} password
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusConnectOk>}
*/
async connect(username, password) {
return await this.api.ctrl_connect(username, password);
}
/**
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusPingOk>}
*/
async ping() {
return await this.api.ctrl_ping();
}
/**
* @returns {Promise<null>}
*/
async userAbort() {
return await this.api.ctrl_user_abort();
}
/**
* Alias for "bye" method
*
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusByeOk>}
*/
async disconnect() {
return await this.api.ctrl_bye();
}
};

View File

@ -0,0 +1,43 @@
nopaque.corpus_analysis.cqi.constants = {};
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_KEYWORD = 9;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_MATCH = 16;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_MATCHEND = 17;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET = 0;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET_0 = 0;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET_1 = 1;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET_2 = 2;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET_3 = 3;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET_4 = 4;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET_5 = 5;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET_6 = 6;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET_7 = 7;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET_8 = 8;
/** @type {number} */
nopaque.corpus_analysis.cqi.constants.FIELD_TARGET_9 = 9;

View File

@ -0,0 +1,185 @@
nopaque.corpus_analysis.cqi.errors = {};
/**
* A base class from which all other errors inherit.
* If you want to catch all errors that the CQi package might throw,
* catch this base error.
*/
nopaque.corpus_analysis.cqi.errors.CQiError = class CQiError extends Error {
constructor(message) {
super(message);
this.code = undefined;
this.description = undefined;
}
};
nopaque.corpus_analysis.cqi.errors.Error = class Error extends nopaque.corpus_analysis.cqi.errors.CQiError {
constructor(message) {
super(message);
this.code = 2;
}
};
nopaque.corpus_analysis.cqi.errors.ErrorGeneralError = class ErrorGeneralError extends nopaque.corpus_analysis.cqi.errors.Error {
constructor(message) {
super(message);
this.code = 513;
}
};
nopaque.corpus_analysis.cqi.errors.ErrorConnectRefused = class ErrorConnectRefused extends nopaque.corpus_analysis.cqi.errors.Error {
constructor(message) {
super(message);
this.code = 514;
}
};
nopaque.corpus_analysis.cqi.errors.ErrorUserAbort = class ErrorUserAbort extends nopaque.corpus_analysis.cqi.errors.Error {
constructor(message) {
super(message);
this.code = 515;
}
};
nopaque.corpus_analysis.cqi.errors.ErrorSyntaxError = class ErrorSyntaxError extends nopaque.corpus_analysis.cqi.errors.Error {
constructor(message) {
super(message);
this.code = 516;
}
};
nopaque.corpus_analysis.cqi.errors.CLError = class Error extends nopaque.corpus_analysis.cqi.errors.CQiError {
constructor(message) {
super(message);
this.code = 4;
}
};
nopaque.corpus_analysis.cqi.errors.CLErrorNoSuchAttribute = class CLErrorNoSuchAttribute extends nopaque.corpus_analysis.cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1025;
this.description = "CQi server couldn't open attribute";
}
};
nopaque.corpus_analysis.cqi.errors.CLErrorWrongAttributeType = class CLErrorWrongAttributeType extends nopaque.corpus_analysis.cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1026;
}
};
nopaque.corpus_analysis.cqi.errors.CLErrorOutOfRange = class CLErrorOutOfRange extends nopaque.corpus_analysis.cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1027;
}
};
nopaque.corpus_analysis.cqi.errors.CLErrorRegex = class CLErrorRegex extends nopaque.corpus_analysis.cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1028;
}
};
nopaque.corpus_analysis.cqi.errors.CLErrorCorpusAccess = class CLErrorCorpusAccess extends nopaque.corpus_analysis.cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1029;
}
};
nopaque.corpus_analysis.cqi.errors.CLErrorOutOfMemory = class CLErrorOutOfMemory extends nopaque.corpus_analysis.cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1030;
this.description = 'CQi server has run out of memory; try discarding some other corpora and/or subcorpora';
}
};
nopaque.corpus_analysis.cqi.errors.CLErrorInternal = class CLErrorInternal extends nopaque.corpus_analysis.cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1031;
this.description = "The classical 'please contact technical support' error";
}
};
nopaque.corpus_analysis.cqi.errors.CQPError = class Error extends nopaque.corpus_analysis.cqi.errors.CQiError {
constructor(message) {
super(message);
this.code = 5;
}
};
nopaque.corpus_analysis.cqi.errors.CQPErrorGeneral = class CQPErrorGeneral extends nopaque.corpus_analysis.cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1281;
}
};
nopaque.corpus_analysis.cqi.errors.CQPErrorNoSuchCorpus = class CQPErrorNoSuchCorpus extends nopaque.corpus_analysis.cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1282;
}
};
nopaque.corpus_analysis.cqi.errors.CQPErrorInvalidField = class CQPErrorInvalidField extends nopaque.corpus_analysis.cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1283;
}
};
nopaque.corpus_analysis.cqi.errors.CQPErrorOutOfRange = class CQPErrorOutOfRange extends nopaque.corpus_analysis.cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1284;
this.description = 'A number is out of range';
}
};
nopaque.corpus_analysis.cqi.errors.lookup = {
2: nopaque.corpus_analysis.cqi.errors.Error,
513: nopaque.corpus_analysis.cqi.errors.ErrorGeneralError,
514: nopaque.corpus_analysis.cqi.errors.ErrorConnectRefused,
515: nopaque.corpus_analysis.cqi.errors.ErrorUserAbort,
516: nopaque.corpus_analysis.cqi.errors.ErrorSyntaxError,
4: nopaque.corpus_analysis.cqi.errors.CLError,
1025: nopaque.corpus_analysis.cqi.errors.CLErrorNoSuchAttribute,
1026: nopaque.corpus_analysis.cqi.errors.CLErrorWrongAttributeType,
1027: nopaque.corpus_analysis.cqi.errors.CLErrorOutOfRange,
1028: nopaque.corpus_analysis.cqi.errors.CLErrorRegex,
1029: nopaque.corpus_analysis.cqi.errors.CLErrorCorpusAccess,
1030: nopaque.corpus_analysis.cqi.errors.CLErrorOutOfMemory,
1031: nopaque.corpus_analysis.cqi.errors.CLErrorInternal,
5: nopaque.corpus_analysis.cqi.errors.CQPError,
1281: nopaque.corpus_analysis.cqi.errors.CQPErrorGeneral,
1282: nopaque.corpus_analysis.cqi.errors.CQPErrorNoSuchCorpus,
1283: nopaque.corpus_analysis.cqi.errors.CQPErrorInvalidField,
1284: nopaque.corpus_analysis.cqi.errors.CQPErrorOutOfRange
};

View File

@ -0,0 +1 @@
nopaque.corpus_analysis.cqi = {};

View File

@ -1,7 +1,7 @@
cqi.models.attributes = {}; nopaque.corpus_analysis.cqi.models.attributes = {};
cqi.models.attributes.Attribute = class Attribute extends cqi.models.resource.Model { nopaque.corpus_analysis.cqi.models.attributes.Attribute = class Attribute extends nopaque.corpus_analysis.cqi.models.resource.Model {
/** /**
* @returns {string} * @returns {string}
*/ */
@ -24,7 +24,7 @@ cqi.models.attributes.Attribute = class Attribute extends cqi.models.resource.Mo
} }
/** /**
* @returns {Promise<cqi.status.StatusOk>} * @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
*/ */
async drop() { async drop() {
return await this.client.api.cl_drop_attribute(this.apiName); return await this.client.api.cl_drop_attribute(this.apiName);
@ -32,17 +32,17 @@ cqi.models.attributes.Attribute = class Attribute extends cqi.models.resource.Mo
}; };
cqi.models.attributes.AttributeCollection = class AttributeCollection extends cqi.models.resource.Collection { nopaque.corpus_analysis.cqi.models.attributes.AttributeCollection = class AttributeCollection extends nopaque.corpus_analysis.cqi.models.resource.Collection {
/** @type{typeof cqi.models.attributes.Attribute} */ /** @type{typeof nopaque.corpus_analysis.cqi.models.attributes.Attribute} */
static model = cqi.models.attributes.Attribute; static model = nopaque.corpus_analysis.cqi.models.attributes.Attribute;
/** /**
* @param {cqi.CQiClient} client * @param {nopaque.corpus_analysis.cqi.Client} client
* @param {cqi.models.corpora.Corpus} corpus * @param {nopaque.corpus_analysis.cqi.models.corpora.Corpus} corpus
*/ */
constructor(client, corpus) { constructor(client, corpus) {
super(client); super(client);
/** @type {cqi.models.corpora.Corpus} */ /** @type {nopaque.corpus_analysis.cqi.models.corpora.Corpus} */
this.corpus = corpus; this.corpus = corpus;
} }
@ -62,7 +62,7 @@ cqi.models.attributes.AttributeCollection = class AttributeCollection extends cq
/** /**
* @param {string} attributeName * @param {string} attributeName
* @returns {Promise<cqi.models.attributes.Attribute>} * @returns {Promise<nopaque.corpus_analysis.cqi.models.attributes.Attribute>}
*/ */
async get(attributeName) { async get(attributeName) {
return this.prepareModel(await this._get(attributeName)); return this.prepareModel(await this._get(attributeName));
@ -70,7 +70,7 @@ cqi.models.attributes.AttributeCollection = class AttributeCollection extends cq
}; };
cqi.models.attributes.AlignmentAttribute = class AlignmentAttribute extends cqi.models.attributes.Attribute { nopaque.corpus_analysis.cqi.models.attributes.AlignmentAttribute = class AlignmentAttribute extends nopaque.corpus_analysis.cqi.models.attributes.Attribute {
/** /**
* @param {number} id * @param {number} id
* @returns {Promise<[number, number, number, number]>} * @returns {Promise<[number, number, number, number]>}
@ -89,17 +89,17 @@ cqi.models.attributes.AlignmentAttribute = class AlignmentAttribute extends cqi.
}; };
cqi.models.attributes.AlignmentAttributeCollection = class AlignmentAttributeCollection extends cqi.models.attributes.AttributeCollection { nopaque.corpus_analysis.cqi.models.attributes.AlignmentAttributeCollection = class AlignmentAttributeCollection extends nopaque.corpus_analysis.cqi.models.attributes.AttributeCollection {
/** @type{typeof cqi.models.attributes.AlignmentAttribute} */ /** @type{typeof nopaque.corpus_analysis.cqi.models.attributes.AlignmentAttribute} */
static model = cqi.models.attributes.AlignmentAttribute; static model = nopaque.corpus_analysis.cqi.models.attributes.AlignmentAttribute;
/** /**
* @returns {Promise<cqi.models.attributes.AlignmentAttribute[]>} * @returns {Promise<nopaque.corpus_analysis.cqi.models.attributes.AlignmentAttribute[]>}
*/ */
async list() { async list() {
/** @type {string[]} */ /** @type {string[]} */
let alignmentAttributeNames = await this.client.api.corpus_alignment_attributes(this.corpus.apiName); let alignmentAttributeNames = await this.client.api.corpus_alignment_attributes(this.corpus.apiName);
/** @type {cqi.models.attributes.AlignmentAttribute[]} */ /** @type {nopaque.corpus_analysis.cqi.models.attributes.AlignmentAttribute[]} */
let alignmentAttributes = []; let alignmentAttributes = [];
for (let alignmentAttributeName of alignmentAttributeNames) { for (let alignmentAttributeName of alignmentAttributeNames) {
alignmentAttributes.push(await this.get(alignmentAttributeName)); alignmentAttributes.push(await this.get(alignmentAttributeName));
@ -109,7 +109,7 @@ cqi.models.attributes.AlignmentAttributeCollection = class AlignmentAttributeCol
}; };
cqi.models.attributes.PositionalAttribute = class PositionalAttribute extends cqi.models.attributes.Attribute { nopaque.corpus_analysis.cqi.models.attributes.PositionalAttribute = class PositionalAttribute extends nopaque.corpus_analysis.cqi.models.attributes.Attribute {
/** /**
* @returns {number} * @returns {number}
*/ */
@ -183,9 +183,9 @@ cqi.models.attributes.PositionalAttribute = class PositionalAttribute extends cq
}; };
cqi.models.attributes.PositionalAttributeCollection = class PositionalAttributeCollection extends cqi.models.attributes.AttributeCollection { nopaque.corpus_analysis.cqi.models.attributes.PositionalAttributeCollection = class PositionalAttributeCollection extends nopaque.corpus_analysis.cqi.models.attributes.AttributeCollection {
/** @type{typeof cqi.models.attributes.PositionalAttribute} */ /** @type{typeof nopaque.corpus_analysis.cqi.models.attributes.PositionalAttribute} */
static model = cqi.models.attributes.PositionalAttribute; static model = nopaque.corpus_analysis.cqi.models.attributes.PositionalAttribute;
/** /**
* @param {string} positionalAttributeName * @param {string} positionalAttributeName
@ -198,7 +198,7 @@ cqi.models.attributes.PositionalAttributeCollection = class PositionalAttributeC
} }
/** /**
* @returns {Promise<cqi.models.attributes.PositionalAttribute[]>} * @returns {Promise<nopaque.corpus_analysis.cqi.models.attributes.PositionalAttribute[]>}
*/ */
async list() { async list() {
let positionalAttributeNames = await this.client.api.corpus_positional_attributes(this.corpus.apiName); let positionalAttributeNames = await this.client.api.corpus_positional_attributes(this.corpus.apiName);
@ -211,7 +211,7 @@ cqi.models.attributes.PositionalAttributeCollection = class PositionalAttributeC
}; };
cqi.models.attributes.StructuralAttribute = class StructuralAttribute extends cqi.models.attributes.Attribute { nopaque.corpus_analysis.cqi.models.attributes.StructuralAttribute = class StructuralAttribute extends nopaque.corpus_analysis.cqi.models.attributes.Attribute {
/** /**
* @returns {boolean} * @returns {boolean}
*/ */
@ -261,9 +261,9 @@ cqi.models.attributes.StructuralAttribute = class StructuralAttribute extends cq
}; };
cqi.models.attributes.StructuralAttributeCollection = class StructuralAttributeCollection extends cqi.models.attributes.AttributeCollection { nopaque.corpus_analysis.cqi.models.attributes.StructuralAttributeCollection = class StructuralAttributeCollection extends nopaque.corpus_analysis.cqi.models.attributes.AttributeCollection {
/** @type{typeof cqi.models.attributes.StructuralAttribute} */ /** @type{typeof nopaque.corpus_analysis.cqi.models.attributes.StructuralAttribute} */
static model = cqi.models.attributes.StructuralAttribute; static model = nopaque.corpus_analysis.cqi.models.attributes.StructuralAttribute;
/** /**
* @param {string} structuralAttributeName * @param {string} structuralAttributeName
@ -276,7 +276,7 @@ cqi.models.attributes.StructuralAttributeCollection = class StructuralAttributeC
} }
/** /**
* @returns {Promise<cqi.models.attributes.StructuralAttribute[]>} * @returns {Promise<nopaque.corpus_analysis.cqi.models.attributes.StructuralAttribute[]>}
*/ */
async list() { async list() {
let structuralAttributeNames = await this.client.api.corpus_structural_attributes(this.corpus.apiName); let structuralAttributeNames = await this.client.api.corpus_structural_attributes(this.corpus.apiName);

View File

@ -0,0 +1,166 @@
nopaque.corpus_analysis.cqi.models.corpora = {};
nopaque.corpus_analysis.cqi.models.corpora.Corpus = class Corpus extends nopaque.corpus_analysis.cqi.models.resource.Model {
/**
* @returns {string}
*/
get apiName() {
return this.attrs.api_name;
}
/**
* @returns {string}
*/
get name() {
return this.attrs.name;
}
/**
* @returns {number}
*/
get size() {
return this.attrs.size;
}
/**
* @returns {string}
*/
get charset() {
return this.attrs.charset;
}
/**
* @returns {string[]}
*/
get properties() {
return this.attrs?.properties;
}
/**
* @returns {nopaque.corpus_analysis.cqi.models.attributes.AlignmentAttributeCollection}
*/
get alignmentAttributes() {
return new nopaque.corpus_analysis.cqi.models.attributes.AlignmentAttributeCollection(this.client, this);
}
/**
* @returns {nopaque.corpus_analysis.cqi.models.attributes.PositionalAttributeCollection}
*/
get positionalAttributes() {
return new nopaque.corpus_analysis.cqi.models.attributes.PositionalAttributeCollection(this.client, this);
}
/**
* @returns {nopaque.corpus_analysis.cqi.models.attributes.StructuralAttributeCollection}
*/
get structuralAttributes() {
return new nopaque.corpus_analysis.cqi.models.attributes.StructuralAttributeCollection(this.client, this);
}
/**
* @returns {nopaque.corpus_analysis.cqi.models.subcorpora.SubcorpusCollection}
*/
get subcorpora() {
return new nopaque.corpus_analysis.cqi.models.subcorpora.SubcorpusCollection(this.client, this);
}
/**
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
*/
async drop() {
return await this.client.api.corpus_drop_corpus(this.apiName);
}
/**
* @param {string} subcorpusName
* @param {string} query
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
*/
async query(subcorpusName, query) {
return await this.client.api.cqp_query(this.apiName, subcorpusName, query);
}
/**************************************************************************
* NOTE: The following is not included in the CQi specification. *
**************************************************************************/
/**************************************************************************
* Custom additions for nopaque *
**************************************************************************/
/**
* @returns {string}
*/
get staticData() {
return this.attrs.static_data;
}
/**
* @returns {nopaque.corpus_analysis.cqi.status.StatusOk}
*/
async updateDb() {
return await this.client.api.ext_corpus_update_db(this.apiName);
}
/**
* @param {number=} page
* @param {number=} per_page
* @returns {Promise<object>}
*/
async paginate(page, per_page) {
return await this.client.api.ext_corpus_paginate_corpus(this.apiName, page, per_page);
}
};
nopaque.corpus_analysis.cqi.models.corpora.CorpusCollection = class CorpusCollection extends nopaque.corpus_analysis.cqi.models.resource.Collection {
/** @type {typeof nopaque.corpus_analysis.cqi.models.corpora.Corpus} */
static model = nopaque.corpus_analysis.cqi.models.corpora.Corpus;
/**
* @param {string} corpusName
* @returns {Promise<object>}
*/
async _get(corpusName) {
const returnValue = {
api_name: corpusName,
charset: await this.client.api.corpus_charset(corpusName),
// full_name: await this.client.api.corpus_full_name(corpusName),
// info: await this.client.api.corpus_info(corpusName),
name: corpusName,
properties: await this.client.api.corpus_properties(corpusName),
size: await this.client.api.cl_attribute_size(`${corpusName}.word`)
};
/************************************************************************
* NOTE: The following is not included in the CQi specification. *
************************************************************************/
/************************************************************************
* Custom additions for nopaque *
************************************************************************/
returnValue.static_data = await this.client.api.ext_corpus_static_data(corpusName);
return returnValue;
}
/**
* @param {string} corpusName
* @returns {Promise<nopaque.corpus_analysis.cqi.models.corpora.Corpus>}
*/
async get(corpusName) {
return this.prepareModel(await this._get(corpusName));
}
/**
* @returns {Promise<nopaque.corpus_analysis.cqi.models.corpora.Corpus[]>}
*/
async list() {
/** @type {string[]} */
let corpusNames = await this.client.api.corpus_list_corpora();
/** @type {nopaque.corpus_analysis.cqi.models.corpora.Corpus[]} */
let corpora = [];
for (let corpusName of corpusNames) {
corpora.push(await this.get(corpusName));
}
return corpora;
}
};

View File

@ -0,0 +1 @@
nopaque.corpus_analysis.cqi.models = {};

View File

@ -1,26 +1,26 @@
cqi.models.resource = {}; nopaque.corpus_analysis.cqi.models.resource = {};
/** /**
* A base class for representing a single object on the server. * A base class for representing a single object on the server.
*/ */
cqi.models.resource.Model = class Model { nopaque.corpus_analysis.cqi.models.resource.Model = class Model {
/** /**
* @param {object} attrs * @param {object} attrs
* @param {cqi.CQiClient} client * @param {nopaque.corpus_analysis.cqi.CQiClient} client
* @param {cqi.models.resource.Collection} collection * @param {nopaque.corpus_analysis.cqi.models.resource.Collection} collection
*/ */
constructor(attrs, client, collection) { constructor(attrs, client, collection) {
/** /**
* A client pointing at the server that this object is on. * A client pointing at the server that this object is on.
* *
* @type {cqi.CQiClient} * @type {nopaque.corpus_analysis.cqi.CQiClient}
*/ */
this.client = client; this.client = client;
/** /**
* The collection that this model is part of. * The collection that this model is part of.
* *
* @type {cqi.models.resource.Collection} * @type {nopaque.corpus_analysis.cqi.models.resource.Collection}
*/ */
this.collection = collection; this.collection = collection;
/** /**
@ -50,22 +50,22 @@ cqi.models.resource.Model = class Model {
/** /**
* A base class for representing all objects of a particular type on the server. * A base class for representing all objects of a particular type on the server.
*/ */
cqi.models.resource.Collection = class Collection { nopaque.corpus_analysis.cqi.models.resource.Collection = class Collection {
/** /**
* The type of object this collection represents, set by subclasses * The type of object this collection represents, set by subclasses
* *
* @type {typeof cqi.models.resource.Model} * @type {typeof nopaque.corpus_analysis.cqi.models.resource.Model}
*/ */
static model; static model;
/** /**
* @param {cqi.CQiClient} client * @param {nopaque.corpus_analysis.cqi.CQiClient} client
*/ */
constructor(client) { constructor(client) {
/** /**
* A client pointing at the server that this object is on. * A client pointing at the server that this object is on.
* *
* @type {cqi.CQiClient} * @type {nopaque.corpus_analysis.cqi.CQiClient}
*/ */
this.client = client; this.client = client;
} }
@ -82,7 +82,7 @@ cqi.models.resource.Collection = class Collection {
* Create a model from a set of attributes. * Create a model from a set of attributes.
* *
* @param {object} attrs * @param {object} attrs
* @returns {cqi.models.resource.Model} * @returns {nopaque.corpus_analysis.cqi.models.resource.Model}
*/ */
prepareModel(attrs) { prepareModel(attrs) {
return new this.constructor.model(attrs, this.client, this); return new this.constructor.model(attrs, this.client, this);

View File

@ -0,0 +1,189 @@
nopaque.corpus_analysis.cqi.models.subcorpora = {};
nopaque.corpus_analysis.cqi.models.subcorpora.Subcorpus = class Subcorpus extends nopaque.corpus_analysis.cqi.models.resource.Model {
/**
* @returns {string}
*/
get apiName() {
return this.attrs.api_name;
}
/**
* @returns {object}
*/
get fields() {
return this.attrs.fields;
}
/**
* @returns {string}
*/
get name() {
return this.attrs.name;
}
/**
* @returns {number}
*/
get size() {
return this.attrs.size;
}
/**
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
*/
async drop() {
return await this.client.api.cqp_drop_subcorpus(this.apiName);
}
/**
* @param {number} field
* @param {number} first
* @param {number} last
* @returns {Promise<number[]>}
*/
async dump(field, first, last) {
return await this.client.api.cqp_dump_subcorpus(
this.apiName,
field,
first,
last
);
}
/**
* @param {number} cutoff
* @param {number} field
* @param {nopaque.corpus_analysis.cqi.models.attributes.PositionalAttribute} attribute
* @returns {Promise<number[]>}
*/
async fdist1(cutoff, field, attribute) {
return await this.client.api.cqp_fdist_1(
this.apiName,
cutoff,
field,
attribute.apiName
);
}
/**
* @param {number} cutoff
* @param {number} field1
* @param {nopaque.corpus_analysis.cqi.models.attributes.PositionalAttribute} attribute1
* @param {number} field2
* @param {nopaque.corpus_analysis.cqi.models.attributes.PositionalAttribute} attribute2
* @returns {Promise<number[]>}
*/
async fdist2(cutoff, field1, attribute1, field2, attribute2) {
return await this.client.api.cqp_fdist_2(
this.apiName,
cutoff,
field1,
attribute1.apiName,
field2,
attribute2.apiName
);
}
/**************************************************************************
* NOTE: The following is not included in the CQi specification. *
**************************************************************************/
/**************************************************************************
* Custom additions for nopaque *
**************************************************************************/
/**
* @param {number=} context
* @param {number=} page
* @param {number=} perPage
* @returns {Promise<object>}
*/
async paginate(context, page, perPage) {
return await this.client.api.ext_cqp_paginate_subcorpus(this.apiName, context, page, perPage);
}
/**
* @param {number[]} matchIdList
* @param {number=} context
* @returns {Promise<object>}
*/
async partialExport(matchIdList, context) {
return await this.client.api.ext_cqp_partial_export_subcorpus(this.apiName, matchIdList, context);
}
/**
* @param {number=} context
* @returns {Promise<object>}
*/
async export(context) {
return await this.client.api.ext_cqp_export_subcorpus(this.apiName, context);
}
};
nopaque.corpus_analysis.cqi.models.subcorpora.SubcorpusCollection = class SubcorpusCollection extends nopaque.corpus_analysis.cqi.models.resource.Collection {
/** @type {typeof nopaque.corpus_analysis.cqi.models.subcorpora.Subcorpus} */
static model = nopaque.corpus_analysis.cqi.models.subcorpora.Subcorpus;
/**
* @param {nopaque.corpus_analysis.cqi.CQiClient} client
* @param {nopaque.corpus_analysis.cqi.models.corpora.Corpus} corpus
*/
constructor(client, corpus) {
super(client);
/** @type {nopaque.corpus_analysis.cqi.models.corpora.Corpus} */
this.corpus = corpus;
}
/**
* @param {string} subcorpusName
* @returns {Promise<object>}
*/
async _get(subcorpusName) {
/** @type {string} */
let apiName = `${this.corpus.apiName}:${subcorpusName}`;
/** @type {object} */
let fields = {};
if (await this.client.api.cqp_subcorpus_has_field(apiName, nopaque.corpus_analysis.cqi.constants.FIELD_MATCH)) {
fields.match = nopaque.corpus_analysis.cqi.constants.FIELD_MATCH;
}
if (await this.client.api.cqp_subcorpus_has_field(apiName, nopaque.corpus_analysis.cqi.constants.FIELD_MATCHEND)) {
fields.matchend = nopaque.corpus_analysis.cqi.constants.FIELD_MATCHEND
}
if (await this.client.api.cqp_subcorpus_has_field(apiName, nopaque.corpus_analysis.cqi.constants.FIELD_TARGET)) {
fields.target = nopaque.corpus_analysis.cqi.constants.FIELD_TARGET
}
if (await this.client.api.cqp_subcorpus_has_field(apiName, nopaque.corpus_analysis.cqi.constants.FIELD_KEYWORD)) {
fields.keyword = nopaque.corpus_analysis.cqi.constants.FIELD_KEYWORD
}
return {
api_name: apiName,
fields: fields,
name: subcorpusName,
size: await this.client.api.cqp_subcorpus_size(apiName)
}
}
/**
* @param {string} subcorpusName
* @returns {Promise<nopaque.corpus_analysis.cqi.models.subcorpora.Subcorpus>}
*/
async get(subcorpusName) {
return this.prepareModel(await this._get(subcorpusName));
}
/**
* @returns {Promise<nopaque.corpus_analysis.cqi.models.subcorpora.Subcorpus[]>}
*/
async list() {
/** @type {string[]} */
let subcorpusNames = await this.client.api.cqp_list_subcorpora(this.corpus.apiName);
/** @type {nopaque.corpus_analysis.cqi.models.subcorpora.Subcorpus[]} */
let subcorpora = [];
for (let subcorpusName of subcorpusNames) {
subcorpora.push(await this.get(subcorpusName));
}
return subcorpora;
}
};

View File

@ -0,0 +1,51 @@
nopaque.corpus_analysis.cqi.status = {};
/**
* A base class from which all other status inherit.
*/
nopaque.corpus_analysis.cqi.status.CQiStatus = class CQiStatus {
constructor() {
this.code = undefined;
}
};
nopaque.corpus_analysis.cqi.status.StatusOk = class StatusOk extends nopaque.corpus_analysis.cqi.status.CQiStatus {
constructor() {
super();
this.code = 257;
}
};
nopaque.corpus_analysis.cqi.status.StatusConnectOk = class StatusConnectOk extends nopaque.corpus_analysis.cqi.status.CQiStatus {
constructor() {
super();
this.code = 258;
}
};
nopaque.corpus_analysis.cqi.status.StatusByeOk = class StatusByeOk extends nopaque.corpus_analysis.cqi.status.CQiStatus {
constructor() {
super();
this.code = 259;
}
};
nopaque.corpus_analysis.cqi.status.StatusPingOk = class StatusPingOk extends nopaque.corpus_analysis.cqi.status.CQiStatus {
constructor() {
super();
this.code = 260;
}
};
nopaque.corpus_analysis.cqi.status.lookup = {
257: nopaque.corpus_analysis.cqi.status.StatusOk,
258: nopaque.corpus_analysis.cqi.status.StatusConnectOk,
259: nopaque.corpus_analysis.cqi.status.StatusByeOk,
260: nopaque.corpus_analysis.cqi.status.StatusPingOk
};

View File

@ -0,0 +1 @@
nopaque.corpus_analysis = {};

View File

@ -0,0 +1,28 @@
nopaque.corpus_analysis.query_builder.ElementReferences = class ElementReferences {
constructor() {
// General Elements
this.queryInputField = document.querySelector('#corpus-analysis-concordance-query-builder-input-field');
this.queryChipElements = [];
this.queryElementTarget = document.querySelector('.query-element-target')
this.editingModusOn = false;
this.editedQueryChipElementIndex = undefined;
this.deleteQueryButton = document.querySelector('#corpus-analysis-concordance-delete-query-button');
// Structural Attribute Builder Elements
this.structuralAttrModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-structural-attr-modal'));
this.englishEntTypeSelection = document.querySelector('#corpus-analysis-concordance-english-ent-type-selection');
this.germanEntTypeSelection = document.querySelector('#corpus-analysis-concordance-german-ent-type-selection');
// Token Attribute Builder Elements
this.positionalAttrModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-positional-attr-modal'));
this.positionalAttrSelection = document.querySelector('#corpus-analysis-concordance-positional-attr-selection');
this.tokenBuilderContent = document.querySelector('#corpus-analysis-concordance-token-builder-content');
this.tokenQuery = document.querySelector('#corpus-analysis-concordance-token-query');
this.tokenQueryTemplate = document.querySelector('#corpus-analysis-concordance-token-query-template');
this.tokenSubmitButton = document.querySelector('#corpus-analysis-concordance-token-submit');
this.noValueMessage = document.querySelector('#corpus-analysis-concordance-no-value-message');
this.isTokenQueryInvalid = false;
this.ignoreCaseCheckbox = document.querySelector('#corpus-analysis-concordance-ignore-case-checkbox');
}
};

View File

@ -0,0 +1 @@
nopaque.corpus_analysis.query_builder = {};

View File

@ -0,0 +1,500 @@
nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
constructor() {
this.elements = new nopaque.corpus_analysis.query_builder.ElementReferences();
this.addEventListenersToQueryElementTarget();
this.addEventListenersToIncidenceModifier();
this.addEventListenersToNAndMInputSubmit();
this.elements.deleteQueryButton.addEventListener('click', () => {this.resetQueryInputField()});
this.expertModeQueryBuilderSwitchHandler();
this.extensions = {
structuralAttributeBuilderFunctions: new nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions(this),
tokenAttributeBuilderFunctions: new nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions(this),
};
this.dropdown = M.Dropdown.init(
document.querySelector('.dropdown-trigger[data-toggle-area="token-incidence-modifiers"]'),
{
onCloseStart: () => {
this.unselectChipElement(this.elements.queryInputField.querySelector('.chip.teal'));
}
}
)
}
addEventListenersToQueryElementTarget() {
this.elements.queryElementTarget.addEventListener('click', () => {
this.elements.positionalAttrModal.open();
});
this.elements.queryElementTarget.addEventListener('dragstart', this.handleDragStart.bind(this, this.elements.queryElementTarget));
this.elements.queryElementTarget.addEventListener('dragend', this.handleDragEnd);
}
addEventListenersToIncidenceModifier() {
// Eventlisteners for the incidence modifiers. There are two different types of incidence modifiers: token and character incidence modifiers.
document.querySelectorAll('.incidence-modifier-selection').forEach(button => {
let dropdownId = button.parentNode.parentNode.id;
if (dropdownId === 'corpus-analysis-concordance-token-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.tokenIncidenceModifierHandler(button.dataset.token, button.innerHTML));
} else if (dropdownId === 'corpus-analysis-concordance-character-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.extensions.tokenAttributeBuilderFunctions.characterIncidenceModifierHandler(button));
}
});
}
addEventListenersToNAndMInputSubmit() {
// Eventlisteners for the submit of n- and m-values of the incidence modifier modal for "exactly n" or "between n and m".
document.querySelectorAll('.n-m-submit-button').forEach(button => {
let modalId = button.dataset.modalId;
if (modalId === 'corpus-analysis-concordance-exactly-n-token-modal' || modalId === 'corpus-analysis-concordance-between-nm-token-modal') {
button.addEventListener('click', () => this.tokenNMSubmitHandler(modalId));
} else if (modalId === 'corpus-analysis-concordance-exactly-n-character-modal' || modalId === 'corpus-analysis-concordance-between-nm-character-modal') {
button.addEventListener('click', () => this.extensions.tokenAttributeBuilderFunctions.characterNMSubmitHandler(modalId));
}
});
}
toggleClass(elements, className, action) {
elements.forEach(element => {
document.querySelector(`[data-toggle-area="${element}"]`).classList[action](className);
});
}
resetQueryInputField() {
this.elements.queryInputField.innerHTML = '';
this.addQueryElementTarget();
this.updateChipList();
this.queryPreviewBuilder();
}
addQueryElementTarget() {
let queryElementTarget = nopaque.Utils.HTMLToElement(
`
<a class="query-element-target btn-floating btn-small blue-grey lighten-4 waves-effect waves-light tooltipped" style="margin-bottom:10px; margin-right:5px;" draggable="true" data-position="bottom" data-tooltip="Add an Element to your query">
<i class="material-icons">add</i>
</a>
`
);
this.elements.queryInputField.appendChild(queryElementTarget);
this.elements.queryElementTarget = queryElementTarget;
this.addEventListenersToQueryElementTarget();
}
updateChipList() {
this.elements.queryChipElements = this.elements.queryInputField.querySelectorAll('.query-component');
}
resetMaterializeSelection(selectionElements, value = "default") {
selectionElements.forEach(selectionElement => {
if (selectionElement.querySelector(`option[value=${value}]`) !== null) {
selectionElement.querySelector(`option[value=${value}]`).selected = true;
}
let instance = M.FormSelect.getInstance(selectionElement);
instance.destroy();
M.FormSelect.init(selectionElement);
})
}
submitQueryChipElement(dataType=undefined, prettyQueryText=undefined, queryText=undefined, index=null, isClosingTag=false, isEditable=false) {
if (this.elements.editingModusOn) {
let editedQueryChipElement = this.elements.queryChipElements[this.elements.editedQueryChipElementIndex];
editedQueryChipElement.dataset.type = dataType;
editedQueryChipElement.dataset.query = queryText;
editedQueryChipElement.firstChild.textContent = prettyQueryText;
this.updateChipList();
this.queryPreviewBuilder();
} else {
this.queryChipFactory(dataType, prettyQueryText, queryText, index, isClosingTag, isEditable);
}
}
queryChipFactory(dataType, prettyQueryText, queryText, index=null, isClosingTag=false, isEditable=false) {
// Creates a new query chip element, adds Eventlisteners for selection, deletion and drag and drop and appends it to the query input field.
queryText = nopaque.Utils.escape(queryText);
prettyQueryText = nopaque.Utils.escape(prettyQueryText);
let queryChipElement = nopaque.Utils.HTMLToElement(
`
<span class="chip query-component" data-type="${dataType}" data-query="${queryText}" draggable="true"">
${prettyQueryText}${isEditable ? '<i class="material-icons chip-action-button" data-chip-action="edit" style="padding-left:5px; font-size:18px; cursor:pointer;">edit</i>': ''}
${isClosingTag ? '' : '<i class="material-icons close chip-action-button" data-chip-action="delete">close</i>'}
</span>
`
);
this.addActionListeners(queryChipElement);
queryChipElement.addEventListener('dragstart', this.handleDragStart.bind(this, queryChipElement));
queryChipElement.addEventListener('dragend', this.handleDragEnd);
// If an index is given, inserts the query chip after the given index (only relevant for Incidence Modifier) and if there is a closing tag, inserts the query chip before the closing tag.
if (index !== null) {
this.updateChipList();
this.elements.queryChipElements[index].after(queryChipElement);
} else {
this.elements.queryInputField.insertBefore(queryChipElement, this.elements.queryElementTarget);
}
if (isClosingTag) {
this.moveQueryElementTarget(queryChipElement);
}
this.updateChipList();
this.queryPreviewBuilder();
}
moveQueryElementTarget(element) {
this.elements.queryInputField.insertBefore(this.elements.queryElementTarget, element);
}
addActionListeners(queryChipElement) {
let notQuantifiableDataTypes = ['start-sentence', 'end-sentence', 'start-entity', 'start-empty-entity', 'end-entity', 'token-incidence-modifier'];
queryChipElement.addEventListener('click', (event) => {
if (event.target.classList.contains('chip')) {
if (!notQuantifiableDataTypes.includes(queryChipElement.dataset.type)) {
this.selectChipElement(queryChipElement);
}
}
});
let chipActionButtons = queryChipElement.querySelectorAll('.chip-action-button');
chipActionButtons.forEach(button => {
button.addEventListener('click', (event) => {
if (event.target.dataset.chipAction === 'delete') {
this.deleteChipElement(queryChipElement);
} else if (event.target.dataset.chipAction === 'edit') {
this.editChipElement(queryChipElement);
}
});
});
}
editChipElement(queryChipElement) {
this.elements.editingModusOn = true;
this.elements.editedQueryChipElementIndex = Array.from(this.elements.queryInputField.children).indexOf(queryChipElement);
switch (queryChipElement.dataset.type) {
case 'start-entity':
this.extensions.structuralAttributeBuilderFunctions.editStartEntityChipElement(queryChipElement);
break;
case 'token':
let queryElementsContent = this.extensions.tokenAttributeBuilderFunctions.prepareTokenQueryElementsContent(queryChipElement);
this.extensions.tokenAttributeBuilderFunctions.editTokenChipElement(queryElementsContent);
break;
default:
break;
}
}
deleteChipElement(attr) {
let elementIndex = Array.from(this.elements.queryInputField.children).indexOf(attr);
switch (attr.dataset.type) {
case 'start-sentence':
this.deleteClosingTagHandler(elementIndex, 'end-sentence');
break;
case 'start-empty-entity':
case 'start-entity':
this.deleteClosingTagHandler(elementIndex, 'end-entity');
break;
case 'token':
let nextElement = Array.from(this.elements.queryInputField.children)[elementIndex+1];
if (nextElement !== undefined && nextElement.dataset.type === 'token-incidence-modifier') {
this.deleteChipElement(nextElement);
}
default:
break;
}
this.elements.queryInputField.removeChild(attr);
this.updateChipList();
this.queryPreviewBuilder();
}
deleteClosingTagHandler(elementIndex, closingTagType) {
let closingTags = this.elements.queryInputField.querySelectorAll(`[data-type="${closingTagType}"]`);
for (let i = 0; i < closingTags.length; i++) {
let closingTag = closingTags[i];
if (Array.from(this.elements.queryInputField.children).indexOf(closingTag) > elementIndex) {
this.deleteChipElement(closingTag);
break;
}
}
}
handleDragStart(queryChipElement) {
// is called when a query chip is dragged. It creates a dropzone (in form of a chip) for the dragged chip and adds it to the query input field.
let queryChips = this.elements.queryInputField.querySelectorAll('.query-component');
if (queryChipElement.dataset.type === 'token-incidence-modifier') {
queryChips = this.elements.queryInputField.querySelectorAll('.query-component[data-type="token"]');
}
setTimeout(() => {
let targetChipElement = nopaque.Utils.HTMLToElement('<span class="chip drop-target">Drop here</span>');
for (let element of queryChips) {
if (element === this.elements.queryInputField.querySelectorAll('.query-component')[0]) {
let secondTargetChipClone = targetChipElement.cloneNode(true);
element.insertAdjacentElement('beforebegin', secondTargetChipClone);
this.addDragDropListeners(secondTargetChipClone, queryChipElement);
}
if (element === queryChipElement || element.nextSibling === queryChipElement) {continue;}
let targetChipClone = targetChipElement.cloneNode(true);
element.insertAdjacentElement('afterend', targetChipClone);
//TODO: Change to two different functions for drag and drop
this.addDragDropListeners(targetChipClone, queryChipElement);
}
}, 0);
}
handleDragEnd(event) {
// is called when a query chip is dropped. It removes the dropzones and initializes the tooltips if the dragged element is the query element target.
if (event.target.classList.contains('query-element-target')) {
M.Tooltip.init(event.target);
}
document.querySelectorAll('.drop-target').forEach(target => target.remove());
}
addDragDropListeners(targetChipClone, queryChipElement) {
targetChipClone.addEventListener('dragover', (event) => {
event.preventDefault();
});
targetChipClone.addEventListener('dragenter', (event) => {
event.preventDefault();
event.target.style.borderStyle = 'solid dotted';
});
targetChipClone.addEventListener('dragleave', (event) => {
event.preventDefault();
event.target.style.borderStyle = 'hidden';
});
targetChipClone.addEventListener('drop', (event) => {
let dropzone = event.target;
dropzone.parentElement.replaceChild(queryChipElement, dropzone);
this.updateChipList();
this.queryPreviewBuilder();
});
}
queryPreviewBuilder() {
// Builds the query preview in the form of pure CQL and displays it in the query preview field.
let queryPreview = document.querySelector('#corpus-analysis-concordance-query-preview');
let queryInputFieldContent = [];
this.elements.queryChipElements.forEach(element => {
let queryElement = element.dataset.query;
if (queryElement !== undefined) {
queryElement = nopaque.Utils.escape(queryElement);
}
queryInputFieldContent.push(queryElement);
});
let queryString = queryInputFieldContent.join(' ');
let replacements = {
' +': '+',
' *': '*',
' ?': '?',
' {': '{'
};
for (let key in replacements) {
queryString = queryString.replace(key, replacements[key]);
}
queryString += ';';
queryPreview.innerHTML = queryString;
queryPreview.parentNode.classList.toggle('hide', queryString === ';');
}
selectChipElement(attr) {
if (attr.classList.contains('teal')) {
return;
}
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'toggle');
attr.classList.toggle('teal');
attr.classList.toggle('lighten-5');
M.Dropdown.getInstance(document.querySelector('.dropdown-trigger[data-toggle-area="token-incidence-modifiers"]')).open();
}
unselectChipElement(attr) {
let nModalInstance = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-exactly-n-token-modal'));
let nmModalInstance = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-between-nm-token-modal'));
if (nModalInstance.isOpen || nmModalInstance.isOpen) {
return;
}
attr.classList.remove('teal', 'lighten-5');
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'add');
}
tokenIncidenceModifierHandler(incidenceModifier, incidenceModifierPretty, nOrNM = false) {
// Adds a token incidence modifier to the query input field.
let selectedChip = this.elements.queryInputField.querySelector('.chip.teal');
let selectedChipIndex = Array.from(this.elements.queryChipElements).indexOf(selectedChip);
if (nOrNM) {
this.unselectChipElement(selectedChip);
}
this.submitQueryChipElement('token-incidence-modifier', incidenceModifierPretty, incidenceModifier, selectedChipIndex);
}
tokenNMSubmitHandler(modalId) {
// Adds a token incidence modifier (exactly n or between n and m) to the query input field.
let modal = document.querySelector(`#${modalId}`);
let input_n = modal.querySelector('.n-m-input[data-value-type="n"]').value;
let input_m = modal.querySelector('.n-m-input[data-value-type="m"]') || undefined;
input_m = input_m !== undefined ? input_m.value : '';
let input = `{${input_n}${input_m !== '' ? ',' : ''}${input_m}}`;
let pretty_input = `between ${input_n} and ${input_m} (${input})`;
if (input_m === '') {
pretty_input = `exactly ${input_n} (${input})`;
}
let instance = M.Modal.getInstance(modal);
instance.close();
this.tokenIncidenceModifierHandler(input, pretty_input, true);
}
expertModeQueryBuilderSwitchHandler() {
let queryBuilderDisplay = document.querySelector("#corpus-analysis-concordance-query-builder-display");
let expertModeDisplay = document.querySelector("#corpus-analysis-concordance-expert-mode-display");
let expertModeSwitch = document.querySelector("#corpus-analysis-concordance-expert-mode-switch");
let submitModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-switch-to-query-builder-submit-modal'));
let confirmSwitchToQueryBuilderButton = document.querySelector('.switch-action[data-switch-action="confirm"]');
confirmSwitchToQueryBuilderButton.addEventListener("click", () => {
queryBuilderDisplay.classList.remove("hide");
expertModeDisplay.classList.add("hide");
this.switchToQueryBuilderParser();
});
expertModeSwitch.addEventListener("change", () => {
const isChecked = expertModeSwitch.checked;
if (isChecked) {
queryBuilderDisplay.classList.add("hide");
expertModeDisplay.classList.remove("hide");
this.switchToExpertModeParser();
} else {
submitModal.open();
}
});
}
switchToExpertModeParser() {
let expertModeInputField = document.querySelector('#corpus-analysis-concordance-form-query');
expertModeInputField.value = '';
let queryBuilderInputFieldValue = nopaque.Utils.unescape(document.querySelector('#corpus-analysis-concordance-query-preview').innerHTML.trim());
if (queryBuilderInputFieldValue !== "" && queryBuilderInputFieldValue !== ";") {
expertModeInputField.value = queryBuilderInputFieldValue;
}
}
switchToQueryBuilderParser() {
this.resetQueryInputField();
let expertModeInputFieldValue = document.querySelector('#corpus-analysis-concordance-form-query').value;
let chipElements = this.parseTextToChip(expertModeInputFieldValue);
let editableElements = ['start-entity', 'token'];
for (let chipElement of chipElements) {
let isEditable = editableElements.includes(chipElement['type']);
if (chipElement['query'] === '[]'){
isEditable = false;
}
this.submitQueryChipElement(chipElement['type'], chipElement['pretty'], chipElement['query'], null, false, isEditable);
}
}
parseTextToChip(query) {
const parsingElementDict = {
'<s>': {
pretty: 'Sentence Start',
type: 'start-sentence'
},
'<\/s>': {
pretty: 'Sentence End',
type: 'end-sentence'
},
'<ent>': {
pretty: 'Entity Start',
type: 'start-empty-entity'
},
'<ent_type="([A-Z]+)">': {
pretty: '',
type: 'start-entity'
},
'<\\\/ent(_type)?>': {
pretty: 'Entity End',
type: 'end-entity'
},
'\\[(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?)*\\]': {
pretty: '',
type: 'token'
},
'\\[\\]': {
pretty: 'Empty Token',
type: 'token'
},
'(?<!\\[) ?\\+ ?(?![^\\]]\\])': {
pretty: ' one or more (+)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\* ?(?![^\\]]\\])': {
pretty: 'zero or more (*)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\? ?(?![^\\]]\\])': {
pretty: 'zero or one (?)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])': {
pretty: '',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\{[0-9]+(,[0-9]+)?} ?(?![^\\]]\\])': {
pretty: '',
type: 'token-incidence-modifier'
}
}
let chipElements = [];
let regexPattern = Object.keys(parsingElementDict).map(pattern => `(${pattern})`).join('|');
const regex = new RegExp(regexPattern, 'gi');
let match;
while ((match = regex.exec(query)) !== null) {
// this is necessary to avoid infinite loops with zero-width matches
if (match.index === regex.lastIndex) {
regex.lastIndex++;
}
let stringElement = match[0];
for (let [pattern, chipElement] of Object.entries(parsingElementDict)) {
const parsingRegex = new RegExp(pattern, 'gi');
if (parsingRegex.exec(stringElement)) {
// Creating the pretty text for the chip element
let prettyText;
switch (pattern) {
case '<ent_type="([A-Z]+)">':
prettyText = `Entity Type=${stringElement.replace(/<ent_type="|">/g, '')}`;
break;
case ':: ?match\\.text_[A-Za-z]+="[^"]+"':
prettyText = stringElement.replace(/:: ?match\.text_|"|"/g, '');
break;
case '\\[(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?)*\\]':
prettyText = stringElement.replace(/^\[|\]$|(?<!\\)"/g, '');
prettyText = prettyText.replace(/\&/g, ' and ').replace(/\|/g, ' or ');
break;
case '(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])':
prettyText = `exactly ${stringElement.replace(/{|}/g, '')} (${stringElement})`;
break;
case '(?<!\\[) ?\\{[0-9]+(,[0-9]+)?} ?(?![^\\]]\\])':
prettyText = `between${stringElement.replace(/{|}/g, ' ').replace(',', ' and ')}(${stringElement})`;
break;
default:
prettyText = chipElement.pretty;
break;
}
chipElements.push({
type: chipElement.type,
pretty: prettyText,
query: stringElement
});
break;
}
}
}
return chipElements;
}
};

View File

@ -0,0 +1,82 @@
nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions = class StructuralAttributeBuilderFunctions {
constructor(app) {
this.app = app;
this.elements = app.elements;
this.structuralAttrModalEventlisteners();
this.elements.structuralAttrModal = M.Modal.init(
document.querySelector('#corpus-analysis-concordance-structural-attr-modal'),
{
onCloseStart: () => {
this.resetStructuralAttrModal();
}
}
);
}
structuralAttrModalEventlisteners() {
document.querySelectorAll('[data-structural-attr-modal-action-button]').forEach(button => {
button.addEventListener('click', () => {
this.actionButtonInStrucAttrModalHandler(button.dataset.structuralAttrModalActionButton);
});
});
document.querySelector('.ent-type-selection-action[data-ent-type="any"]').addEventListener('click', () => {
this.app.submitQueryChipElement('start-empty-entity', 'Entity Start', '<ent>');
this.app.submitQueryChipElement('end-entity', 'Entity End', '</ent>', null, true);
this.elements.structuralAttrModal.close();
});
document.querySelector('.ent-type-selection-action[data-ent-type="english"]').addEventListener('change', (event) => {
this.app.submitQueryChipElement('start-entity', `Entity Type=${event.target.value}`, `<ent_type="${event.target.value}">`, null, false, true);
if (!this.elements.editingModusOn) {
this.app.submitQueryChipElement('end-entity', 'Entity End', '</ent_type>', null, true);
}
this.elements.structuralAttrModal.close();
});
document.querySelector('.ent-type-selection-action[data-ent-type="german"]').addEventListener('change', (event) => {
this.app.submitQueryChipElement('start-entity', `Entity Type=${event.target.value}`, `<ent_type="${event.target.value}">`, null, false, true);
if (!this.elements.editingModusOn) {
this.app.submitQueryChipElement('end-entity', 'Entity End', '</ent_type>', null, true);
}
this.elements.structuralAttrModal.close();
});
}
resetStructuralAttrModal() {
this.app.resetMaterializeSelection([this.elements.englishEntTypeSelection, this.elements.germanEntTypeSelection]);
this.app.toggleClass(['entity-builder'], 'hide', 'add');
this.toggleEditingAreaStructuralAttrModal('remove');
this.elements.editingModusOn = false;
this.elements.editedQueryChipElementIndex = undefined;
}
actionButtonInStrucAttrModalHandler(action) {
switch (action) {
case 'sentence':
this.app.submitQueryChipElement('start-sentence', 'Sentence Start', '<s>');
this.app.submitQueryChipElement('end-sentence', 'Sentence End', '</s>', null, true);
this.elements.structuralAttrModal.close();
break;
case 'entity':
this.app.toggleClass(['entity-builder'], 'hide', 'toggle');
break;
default:
break;
}
}
toggleEditingAreaStructuralAttrModal(action) {
// If the user edits a query chip element, the corresponding editing area is displayed and the other areas are hidden or disabled.
this.app.toggleClass(['sentence-button', 'entity-button', 'any-type-entity-button'], 'disabled', action);
}
editStartEntityChipElement(queryChipElement) {
this.elements.structuralAttrModal.open();
this.app.toggleClass(['entity-builder'], 'hide', 'remove');
this.toggleEditingAreaStructuralAttrModal('add');
let entType = queryChipElement.dataset.query.replace(/<ent_type="|">/g, '');
let isEnglishEntType = this.elements.englishEntTypeSelection.querySelector(`option[value=${entType}]`) !== null;
let selection = isEnglishEntType ? this.elements.englishEntTypeSelection : this.elements.germanEntTypeSelection;
this.app.resetMaterializeSelection([selection], entType);
}
}

View File

@ -0,0 +1,329 @@
nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class TokenAttributeBuilderFunctions {
constructor(app) {
this.app = app;
this.elements = app.elements;
this.elements.positionalAttrSelection.addEventListener('change', () => {
this.preparePositionalAttrModal();
});
// Options for positional attribute selection
document.querySelectorAll('.positional-attr-options-action-button[data-options-action]').forEach(button => {
button.addEventListener('click', () => {this.actionButtonInOptionSectionHandler(button.dataset.optionsAction);});
});
this.elements.tokenSubmitButton.addEventListener('click', () => {this.addTokenToQuery();});
this.elements.positionalAttrModal = M.Modal.init(
document.querySelector('#corpus-analysis-concordance-positional-attr-modal'),
{
onOpenStart: () => {
this.preparePositionalAttrModal();
},
onCloseStart: () => {
this.resetPositionalAttrModal();
}
}
);
}
resetPositionalAttrModal() {
let originalSelectionList =
`
<option value="word" selected>word</option>
<option value="lemma" >lemma</option>
<option value="english-pos">english pos</option>
<option value="german-pos">german pos</option>
<option value="simple_pos">simple_pos</option>
<option value="empty-token">empty token</option>
`;
this.elements.positionalAttrSelection.innerHTML = originalSelectionList;
this.elements.tokenQuery.innerHTML = '';
this.elements.tokenBuilderContent.innerHTML = '';
this.app.toggleClass(['input-field-options'], 'hide', 'remove');
this.app.toggleClass(['incidence-modifiers', 'or', 'and'], 'disabled', 'add');
this.app.resetMaterializeSelection([this.elements.positionalAttrSelection], "word");
this.elements.ignoreCaseCheckbox.checked = false;
this.elements.editingModusOn = false;
this.elements.editedQueryChipElementIndex = undefined;
}
actionButtonInOptionSectionHandler(elem) {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
switch (elem) {
case 'option-group':
this.cursorPositionInputfieldHandler(input, '(option1|option2)');
let firstIndex = input.value.indexOf('option1');
let lastIndex = firstIndex + 'option1'.length;
input.setSelectionRange(firstIndex, lastIndex);
break;
case 'wildcard-char':
this.cursorPositionInputfieldHandler(input, '.');
input.focus();
break;
case 'and':
this.conditionHandler('and');
break;
case 'or':
this.conditionHandler('or');
break;
default:
break;
}
this.optionToggleHandler();
}
cursorPositionInputfieldHandler(input, addedInput) {
let cursorPosition = input.selectionStart;
let textBeforeCursor = input.value.substring(0, cursorPosition);
let textAfterCursor = input.value.substring(cursorPosition);
let newInputValue = textBeforeCursor + addedInput + textAfterCursor;
input.value = newInputValue;
let newCursorPosition = cursorPosition + addedInput.length;
input.setSelectionRange(newCursorPosition, newCursorPosition);
}
characterIncidenceModifierHandler(elem) {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
this.cursorPositionInputfieldHandler(input, elem.dataset.token);
}
characterNMSubmitHandler(modalId) {
let modal = document.querySelector(`#${modalId}`);
let input_n = modal.querySelector('.n-m-input[data-value-type="n"]').value;
let input_m = modal.querySelector('.n-m-input[data-value-type="m"]') || undefined;
input_m = input_m !== undefined ? ',' + input_m.value : '';
let addedInput = `${input_n}${input_m}`;
let instance = M.Modal.getInstance(modal);
instance.close();
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
this.cursorPositionInputfieldHandler(input, `{${addedInput}}`);
}
conditionHandler(conditionText) {
let tokenQueryTemplateClone = this.elements.tokenQueryTemplate.content.cloneNode(true);
tokenQueryTemplateClone.querySelector('.token-query-template-content').appendChild(this.elements.tokenBuilderContent.firstElementChild);
let notSelectedButton = tokenQueryTemplateClone.querySelector(`[data-condition-pretty-text]:not([data-condition-pretty-text="${conditionText}"])`);
let deleteButton = tokenQueryTemplateClone.querySelector(`[data-token-query-content-action="delete"]`);
deleteButton.addEventListener('click', (event) => {
this.deleteTokenQueryRow(event.target);
});
notSelectedButton.parentNode.removeChild(notSelectedButton);
this.elements.tokenQuery.appendChild(tokenQueryTemplateClone);
let lastTokenQueryRow = this.elements.tokenQuery.lastElementChild;
if(lastTokenQueryRow.querySelector('[data-kind-of-token="word"]') || lastTokenQueryRow.querySelector('[data-kind-of-token="lemma"]')) {
this.appendIgnoreCaseCheckbox(lastTokenQueryRow.querySelector('.token-query-template-content'), this.elements.ignoreCaseCheckbox.checked);
}
this.elements.ignoreCaseCheckbox.checked = false;
this.setTokenSelection();
}
deleteTokenQueryRow(deleteButton) {
let deletedRow = deleteButton.closest('.row');
let condition = deletedRow.querySelector('[data-condition-pretty-text]').dataset.conditionPrettyText;
if (condition === 'and') {
let kindOfToken = deletedRow.querySelector('[data-kind-of-token]').dataset.kindOfToken;
switch (kindOfToken) {
case 'english-pos' || 'german-pos':
this.createOptionElementForPosAttrSelection('english-pos');
this.createOptionElementForPosAttrSelection('german-pos');
break;
default:
this.createOptionElementForPosAttrSelection(kindOfToken);
break;
}
M.FormSelect.init(this.elements.positionalAttrSelection);
}
deletedRow.remove();
}
createOptionElementForPosAttrSelection(kindOfToken) {
let option = document.createElement('option');
option.value = kindOfToken;
option.text = kindOfToken;
this.elements.positionalAttrSelection.appendChild(option);
}
appendIgnoreCaseCheckbox(parentElement, checked=false) {
let ignoreCaseCheckboxClone = document.querySelector('#ignore-case-checkbox-template').content.cloneNode(true);
parentElement.appendChild(ignoreCaseCheckboxClone);
M.Tooltip.init(parentElement.querySelectorAll('.tooltipped'));
if (checked) {
parentElement.querySelector('input[type="checkbox"]').checked = true;
}
}
setTokenSelection(selection="word", optionDeleteList=['empty-token']) {
optionDeleteList.forEach(option => {
if (this.elements.positionalAttrSelection.querySelector(`option[value=${option}]`) !== null) {
this.elements.positionalAttrSelection.querySelector(`option[value=${option}]`).remove();
}
});
this.app.resetMaterializeSelection([this.elements.positionalAttrSelection], selection);
this.preparePositionalAttrModal();
}
preparePositionalAttrModal() {
let selection = this.elements.positionalAttrSelection.value;
if (selection !== 'empty-token') {
let selectionTemplate = document.querySelector(`.token-builder-section[data-token-builder-section="${selection}"]`);
let selectionTemplateClone = selectionTemplate.content.cloneNode(true);
this.elements.tokenBuilderContent.innerHTML = '';
this.elements.tokenBuilderContent.appendChild(selectionTemplateClone);
if (this.elements.tokenBuilderContent.querySelector('select') !== null) {
let selectElement = this.elements.tokenBuilderContent.querySelector('select');
M.FormSelect.init(selectElement);
selectElement.addEventListener('change', () => {this.optionToggleHandler();});
} else {
this.elements.tokenBuilderContent.querySelector('input').addEventListener('input', () => {this.optionToggleHandler();});
}
}
this.optionToggleHandler();
if (selection === 'word' || selection === 'lemma') {
this.app.toggleClass(['input-field-options'], 'hide', 'remove');
} else if (selection === 'empty-token'){
this.addTokenToQuery();
} else {
this.app.toggleClass(['input-field-options'], 'hide', 'add');
}
}
tokenInputCheck(elem) {
return elem.querySelector('select') !== null ? elem.querySelector('select') : elem.querySelector('input');
}
optionToggleHandler() {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
if (input.value === '' && this.elements.editingModusOn === false) {
this.app.toggleClass(['incidence-modifiers', 'or', 'and'], 'disabled', 'add');
} else if (this.elements.positionalAttrSelection.querySelectorAll('option').length === 1) {
this.app.toggleClass(['and'], 'disabled', 'add');
this.app.toggleClass(['or'], 'disabled', 'remove');
} else {
this.app.toggleClass(['incidence-modifiers', 'or', 'and'], 'disabled', 'remove');
}
}
addTokenToQuery() {
let tokenQueryPrettyText = '';
let tokenQueryCQLText = '';
let input;
let kindOfToken = this.kindOfTokenCheck(this.elements.positionalAttrSelection.value);
// Takes all rows of the token query (if there is a query concatenation).
// Adds their contents to tokenQueryPrettyText and tokenQueryCQLText, which will later be expanded with the current input field.
let tokenQueryRows = this.elements.tokenQuery.querySelectorAll('.row');
tokenQueryRows.forEach(row => {
let ignoreCaseCheckbox = row.querySelector('input[type="checkbox"]');
let c = ignoreCaseCheckbox !== null && ignoreCaseCheckbox.checked ? ' %c' : '';
let tokenQueryRowInput = this.tokenInputCheck(row.querySelector('.token-query-template-content'));
let tokenQueryKindOfToken = this.kindOfTokenCheck(tokenQueryRowInput.closest('.input-field').dataset.kindOfToken);
let tokenConditionPrettyText = row.querySelector('[data-condition-pretty-text]').dataset.conditionPrettyText;
let tokenConditionCQLText = row.querySelector('[data-condition-cql-text]').dataset.conditionCqlText;
tokenQueryPrettyText += `${tokenQueryKindOfToken}=${tokenQueryRowInput.value}${c} ${tokenConditionPrettyText} `;
tokenQueryCQLText += `${tokenQueryKindOfToken}="${tokenQueryRowInput.value}"${c} ${tokenConditionCQLText}`;
});
if (kindOfToken === 'empty-token') {
tokenQueryPrettyText += 'empty token';
} else {
let c = this.elements.ignoreCaseCheckbox.checked ? ' %c' : '';
input = this.tokenInputCheck(this.elements.tokenBuilderContent);
tokenQueryPrettyText += `${kindOfToken}=${input.value}${c}`;
tokenQueryCQLText += `${kindOfToken}="${input.value}"${c}`;
}
// isTokenQueryInvalid looks if a valid value is passed. If the input fields/dropdowns are empty (isTokenQueryInvalid === true), no token is added.
if (this.elements.positionalAttrSelection.value !== 'empty-token' && input.value === '') {
this.disableTokenSubmit();
} else {
tokenQueryCQLText = `[${tokenQueryCQLText}]`;
this.app.submitQueryChipElement('token', tokenQueryPrettyText, tokenQueryCQLText, null, false, kindOfToken === 'empty-token' ? false : true);
this.elements.positionalAttrModal.close();
}
}
kindOfTokenCheck(kindOfToken) {
return kindOfToken === 'english-pos' || kindOfToken === 'german-pos' ? 'pos' : kindOfToken;
}
disableTokenSubmit() {
this.elements.tokenSubmitButton.classList.add('red');
this.elements.noValueMessage.classList.remove('hide');
setTimeout(() => {
this.elements.tokenSubmitButton.classList.remove('red');
}, 500);
setTimeout(() => {
this.elements.noValueMessage.classList.add('hide');
}, 3000);
}
editTokenChipElement(queryElementsContent) {
this.elements.positionalAttrModal.open();
queryElementsContent.forEach((queryElement) => {
this.app.resetMaterializeSelection([this.elements.positionalAttrSelection], queryElement.tokenAttr);
this.preparePositionalAttrModal();
switch (queryElement.tokenAttr) {
case 'word':
case 'lemma':
this.elements.tokenBuilderContent.querySelector('input').value = queryElement.tokenValue;
break;
case 'english-pos':
// English-pos is selected by default. Then it is checked whether the passed token value occurs in the english-pos selection. If not, the selection is reseted and changed to german-pos.
let selection = this.elements.tokenBuilderContent.querySelector('select');
queryElement.tokenAttr = selection.querySelector(`option[value=${queryElement.tokenValue}]`) ? 'english-pos' : 'german-pos';
this.app.resetMaterializeSelection([this.elements.positionalAttrSelection], queryElement.tokenAttr);
this.preparePositionalAttrModal();
this.app.resetMaterializeSelection([this.elements.tokenBuilderContent.querySelector('select')], queryElement.tokenValue);
break;
case 'simple_pos':
this.app.resetMaterializeSelection([this.elements.tokenBuilderContent.querySelector('select')], queryElement.tokenValue);
default:
break;
}
if (queryElement.ignoreCase) {
this.elements.ignoreCaseCheckbox.checked = true;
}
if (queryElement.condition !== undefined) {
this.conditionHandler(queryElement.condition, true);
}
});
}
prepareTokenQueryElementsContent(queryChipElement) {
//this regex searches for word or lemma or pos or simple_pos="any string (also quotation marks escaped by backslash) within double quotes" followed by one or no ignore case markers, followed by one or no condition characters.
let regex = new RegExp('(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?(\\&|\\|)?', 'gm');
let m;
let queryElementsContent = [];
while ((m = regex.exec(queryChipElement.dataset.query)) !== null) {
// this is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
let tokenAttr = m[1];
// Passes english-pos by default so that the template is added. In editTokenChipElement it is then checked whether it is english-pos or german-pos.
if (tokenAttr === 'pos') {
tokenAttr = 'english-pos';
}
let tokenValue = m[2].replace(/(?<!\\)"/g, '');
let ignoreCase = false;
let condition = undefined;
m.forEach((match) => {
if (match === "%c") {
ignoreCase = true;
} else if (match === "&") {
condition = "and";
} else if (match === "|") {
condition = "or";
}
});
queryElementsContent.push({tokenAttr: tokenAttr, tokenValue: tokenValue, ignoreCase: ignoreCase, condition: condition});
}
return queryElementsContent;
}
}

View File

@ -1,4 +1,4 @@
class CorpusAnalysisReader { nopaque.corpus_analysis.ReaderExtension = class ReaderExtension {
name = 'Reader'; name = 'Reader';
constructor(app) { constructor(app) {
@ -7,71 +7,75 @@ class CorpusAnalysisReader {
this.data = {}; this.data = {};
this.elements = { this.elements = {
// TODO: Prefix elements with "corpus-analysis-app-" container: document.querySelector(`#corpus-analysis-reader-container`),
container: document.querySelector('#reader-extension-container'), corpus: document.querySelector(`#corpus-analysis-reader-corpus`),
error: document.querySelector('#reader-extension-error'), corpusPagination: document.querySelector(`#corpus-analysis-reader-corpus-pagination`),
form: document.querySelector('#reader-extension-form'), error: document.querySelector(`#corpus-analysis-reader-error`),
progress: document.querySelector('#reader-extension-progress'), progress: document.querySelector(`#corpus-analysis-reader-progress`),
corpus: document.querySelector('#reader-extension-corpus'), userInterfaceForm: document.querySelector(`#corpus-analysis-reader-user-interface-form`)
corpusPagination: document.querySelector('#reader-extension-corpus-pagination')
}; };
this.settings = { this.settings = {
perPage: parseInt(this.elements.form['per-page'].value), perPage: parseInt(this.elements.userInterfaceForm['per-page'].value),
textStyle: parseInt(this.elements.form['text-style'].value), textStyle: parseInt(this.elements.userInterfaceForm['text-style'].value),
tokenRepresentation: this.elements.form['token-representation'].value tokenRepresentation: this.elements.userInterfaceForm['token-representation'].value,
pagination: {
innerWindow: 5,
outerWindow: 1
}
} }
this.app.registerExtension(this); this.app.registerExtension(this);
} }
init() { async submitForm() {
// Init data
this.data.corpus = this.app.data.corpus;
this.data.subcorpora = {};
// Add event listeners
this.elements.form.addEventListener('submit', (event) => {
event.preventDefault();
this.app.disableActionElements(); this.app.disableActionElements();
this.elements.error.innerText = ''; this.elements.error.innerText = '';
this.elements.error.classList.add('hide'); this.elements.error.classList.add('hide');
this.elements.progress.classList.remove('hide'); this.elements.progress.classList.remove('hide');
this.data.corpus.o.paginate(1, this.settings.perPage) try {
.then( const paginatedCorpus = await this.data.corpus.o.paginate(1, this.settings.perPage);
paginatedCorpus => {
this.data.corpus.p = paginatedCorpus; this.data.corpus.p = paginatedCorpus;
this.renderCorpus(); this.renderCorpus();
this.renderCorpusPagination(); this.renderCorpusPagination();
this.elements.progress.classList.add('hide'); this.elements.progress.classList.add('hide');
this.app.enableActionElements(); } catch (error) {
}, let errorString = '';
error => { if ('code' in error) {errorString += `[${error.code}] `;}
this.elements.error.innerText = JSON.stringify(error); errorString += `${error.constructor.name}`;
if ('description' in error) {errorString += `: ${error.description}`;}
this.elements.error.innerText = errorString;
this.elements.error.classList.remove('hide'); this.elements.error.classList.remove('hide');
if ('payload' in error && 'code' in error.payload && 'msg' in error.payload) { app.flash(errorString, 'error');
app.flash(`${error.payload.code}: ${error.payload.msg}`, 'error');
}
this.elements.progress.classList.add('hide'); this.elements.progress.classList.add('hide');
}
this.app.enableActionElements(); this.app.enableActionElements();
} }
);
async init() {
// Init data
this.data.corpus = this.app.data.corpus;
// Add event listeners
this.elements.userInterfaceForm.addEventListener('submit', (event) => {
event.preventDefault();
this.submitForm();
}); });
this.elements.form.addEventListener('change', event => { this.elements.userInterfaceForm.addEventListener('change', (event) => {
if (event.target === this.elements.form['per-page']) { if (event.target === this.elements.userInterfaceForm['per-page']) {
this.settings.perPage = parseInt(this.elements.form['per-page'].value); this.settings.perPage = parseInt(this.elements.userInterfaceForm['per-page'].value);
this.elements.form.submit.click(); this.submitForm();
} }
if (event.target === this.elements.form['text-style']) { if (event.target === this.elements.userInterfaceForm['text-style']) {
this.settings.textStyle = parseInt(this.elements.form['text-style'].value); this.settings.textStyle = parseInt(this.elements.userInterfaceForm['text-style'].value);
this.setTextStyle(); this.setTextStyle();
} }
if (event.target === this.elements.form['token-representation']) { if (event.target === this.elements.userInterfaceForm['token-representation']) {
this.settings.tokenRepresentation = this.elements.form['token-representation'].value; this.settings.tokenRepresentation = this.elements.userInterfaceForm['token-representation'].value;
this.setTokenRepresentation(); this.setTokenRepresentation();
} }
}); });
// Load initial data // Load initial data
this.elements.form.submit.click(); await this.submitForm();
} }
clearCorpus() { clearCorpus() {
@ -108,7 +112,7 @@ class CorpusAnalysisReader {
if (this.data.corpus.p.pages === 0) {return;} if (this.data.corpus.p.pages === 0) {return;}
let pageElement; let pageElement;
// First page button. Disables first page button if on first page // First page button. Disables first page button if on first page
pageElement = Utils.HTMLToElement( pageElement = nopaque.Utils.HTMLToElement(
` `
<li class="${this.data.corpus.p.page === 1 ? 'disabled' : 'waves-effect'}"> <li class="${this.data.corpus.p.page === 1 ? 'disabled' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.page === 1 ? '' : 'data-target="1"'}> <a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.page === 1 ? '' : 'data-target="1"'}>
@ -119,7 +123,7 @@ class CorpusAnalysisReader {
); );
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
// Previous page button. Disables previous page button if on first page // Previous page button. Disables previous page button if on first page
pageElement = Utils.HTMLToElement( pageElement = nopaque.Utils.HTMLToElement(
` `
<li class="${this.data.corpus.p.has_prev ? 'waves-effect' : 'disabled'}"> <li class="${this.data.corpus.p.has_prev ? 'waves-effect' : 'disabled'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.has_prev ? 'data-target="' + this.data.corpus.p.prev_num + '"' : ''}> <a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.has_prev ? 'data-target="' + this.data.corpus.p.prev_num + '"' : ''}>
@ -131,7 +135,7 @@ class CorpusAnalysisReader {
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
// First page as number. Hides first page button if on first page // First page as number. Hides first page button if on first page
if (this.data.corpus.p.page > 6) { if (this.data.corpus.p.page > 6) {
pageElement = Utils.HTMLToElement( pageElement = nopaque.Utils.HTMLToElement(
` `
<li class="waves-effect"> <li class="waves-effect">
<a class="corpus-analysis-action pagination-trigger" data-target="1">1</a> <a class="corpus-analysis-action pagination-trigger" data-target="1">1</a>
@ -139,14 +143,14 @@ class CorpusAnalysisReader {
` `
); );
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
pageElement = Utils.HTMLToElement("<li style='margin-top: 5px;'>&hellip;</li>"); pageElement = nopaque.Utils.HTMLToElement("<li style='margin-top: 5px;'>&hellip;</li>");
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
} }
// render page buttons (5 before and 5 after current page) // render page buttons (5 before and 5 after current page)
for (let i = this.data.corpus.p.page -5; i <= this.data.corpus.p.page; i++) { for (let i = this.data.corpus.p.page - this.settings.pagination.innerWindow; i <= this.data.corpus.p.page; i++) {
if (i <= 0) {continue;} if (i <= 0) {continue;}
pageElement = Utils.HTMLToElement( pageElement = nopaque.Utils.HTMLToElement(
` `
<li class="${i === this.data.corpus.p.page ? 'active' : 'waves-effect'}"> <li class="${i === this.data.corpus.p.page ? 'active' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${i === this.data.corpus.p.page ? '' : 'data-target="' + i + '"'}>${i}</a> <a class="corpus-analysis-action pagination-trigger" ${i === this.data.corpus.p.page ? '' : 'data-target="' + i + '"'}>${i}</a>
@ -155,9 +159,9 @@ class CorpusAnalysisReader {
); );
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
}; };
for (let i = this.data.corpus.p.page +1; i <= this.data.corpus.p.page +5; i++) { for (let i = this.data.corpus.p.page +1; i <= this.data.corpus.p.page + this.settings.pagination.innerWindow; i++) {
if (i > this.data.corpus.p.pages) {break;} if (i > this.data.corpus.p.pages) {break;}
pageElement = Utils.HTMLToElement( pageElement = nopaque.Utils.HTMLToElement(
` `
<li class="${i === this.data.corpus.p.page ? 'active' : 'waves-effect'}"> <li class="${i === this.data.corpus.p.page ? 'active' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${i === this.data.corpus.p.page ? '' : 'data-target="' + i + '"'}>${i}</a> <a class="corpus-analysis-action pagination-trigger" ${i === this.data.corpus.p.page ? '' : 'data-target="' + i + '"'}>${i}</a>
@ -168,9 +172,9 @@ class CorpusAnalysisReader {
}; };
// Last page as number. Hides last page button if on last page // Last page as number. Hides last page button if on last page
if (this.data.corpus.p.page < this.data.corpus.p.pages - 6) { if (this.data.corpus.p.page < this.data.corpus.p.pages - 6) {
pageElement = Utils.HTMLToElement("<li style='margin-top: 5px;'>&hellip;</li>"); pageElement = nopaque.Utils.HTMLToElement("<li style='margin-top: 5px;'>&hellip;</li>");
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
pageElement = Utils.HTMLToElement( pageElement = nopaque.Utils.HTMLToElement(
` `
<li class="waves-effect"> <li class="waves-effect">
<a class="corpus-analysis-action pagination-trigger" data-target="${this.data.corpus.p.pages}">${this.data.corpus.p.pages}</a> <a class="corpus-analysis-action pagination-trigger" data-target="${this.data.corpus.p.pages}">${this.data.corpus.p.pages}</a>
@ -180,7 +184,7 @@ class CorpusAnalysisReader {
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
} }
// Next page button. Disables next page button if on last page // Next page button. Disables next page button if on last page
pageElement = Utils.HTMLToElement( pageElement = nopaque.Utils.HTMLToElement(
` `
<li class="${this.data.corpus.p.has_next ? 'waves-effect' : 'disabled'}"> <li class="${this.data.corpus.p.has_next ? 'waves-effect' : 'disabled'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.has_next ? 'data-target="' + this.data.corpus.p.next_num + '"' : ''}> <a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.has_next ? 'data-target="' + this.data.corpus.p.next_num + '"' : ''}>
@ -191,7 +195,7 @@ class CorpusAnalysisReader {
); );
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
// Last page button. Disables last page button if on last page // Last page button. Disables last page button if on last page
pageElement = Utils.HTMLToElement( pageElement = nopaque.Utils.HTMLToElement(
` `
<li class="${this.data.corpus.p.page === this.data.corpus.p.pages ? 'disabled' : 'waves-effect'}"> <li class="${this.data.corpus.p.page === this.data.corpus.p.pages ? 'disabled' : 'waves-effect'}">
<a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.page === this.data.corpus.p.pages ? '' : 'data-target="' + this.data.corpus.p.pages + '"'}> <a class="corpus-analysis-action pagination-trigger" ${this.data.corpus.p.page === this.data.corpus.p.pages ? '' : 'data-target="' + this.data.corpus.p.pages + '"'}>
@ -203,7 +207,7 @@ class CorpusAnalysisReader {
this.elements.corpusPagination.appendChild(pageElement); this.elements.corpusPagination.appendChild(pageElement);
for (let paginateTriggerElement of this.elements.corpusPagination.querySelectorAll('.pagination-trigger[data-target]')) { for (let paginateTriggerElement of this.elements.corpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
paginateTriggerElement.addEventListener('click', event => { paginateTriggerElement.addEventListener('click', (event) => {
event.preventDefault(); event.preventDefault();
let page = parseInt(paginateTriggerElement.dataset.target); let page = parseInt(paginateTriggerElement.dataset.target);
this.page(page); this.page(page);
@ -247,7 +251,7 @@ class CorpusAnalysisReader {
this.elements.progress.classList.remove('hide'); this.elements.progress.classList.remove('hide');
this.data.corpus.o.paginate(pageNum, this.settings.perPage) this.data.corpus.o.paginate(pageNum, this.settings.perPage)
.then( .then(
paginatedCorpus => { (paginatedCorpus) => {
this.data.corpus.p = paginatedCorpus; this.data.corpus.p = paginatedCorpus;
this.renderCorpus(); this.renderCorpus();
this.renderCorpusPagination(); this.renderCorpusPagination();

View File

@ -0,0 +1,446 @@
nopaque.corpus_analysis.StaticVisualizationExtension = class StaticVisualizationExtension {
name = 'Static Visualization (beta)';
constructor(app) {
this.app = app;
this.data = {
stopwords: undefined,
originalStopwords: {},
stopwordCache: {},
promises: {getStopwords: undefined},
tokenSet: new Set()
};
this.app.registerExtension(this);
}
init() {
// Init data
this.data.corpus = this.app.data.corpus;
this.renderGeneralCorpusInfo();
this.renderTextInfoList();
this.renderTextProportionsGraphic();
this.renderTokenList();
// this.renderFrequenciesGraphic();
// Add event listeners
let frequenciesStopwordSettingModal = document.querySelector('#frequencies-stopwords-setting-modal');
let frequenciesStopwordSettingModalButton = document.querySelector('#frequencies-stopwords-setting-modal-button');
frequenciesStopwordSettingModalButton.addEventListener('click', () => {
this.data.stopwordCache = structuredClone(this.data.stopwords);
this.renderStopwordSettingsModal(this.data.stopwords);
M.Modal.init(frequenciesStopwordSettingModal, {dismissible: false});
});
let textProportionsGraphModeButtons = document.querySelectorAll('.text-proportions-graph-mode-button');
textProportionsGraphModeButtons.forEach(graphModeButton => {
graphModeButton.addEventListener('click', (event) => {
textProportionsGraphModeButtons.forEach(btn => {
btn.classList.remove('disabled');
});
event.target.closest('.text-proportions-graph-mode-button').classList.add('disabled');
this.renderTextProportionsGraphic();
});
});
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
this.renderTokenList();
});
let frequenciesGraphModeButtons = document.querySelectorAll('.frequencies-graph-mode-button');
frequenciesGraphModeButtons.forEach(graphModeButton => {
graphModeButton.addEventListener('click', (event) => {
frequenciesGraphModeButtons.forEach(btn => {
btn.classList.remove('disabled');
});
event.target.closest('.frequencies-graph-mode-button').classList.add('disabled');
this.renderFrequenciesGraphic(this.data.tokenSet);
});
});
for (let actionButton of document.querySelectorAll('.frequencies-stopword-setting-modal-action-buttons')) {
actionButton.addEventListener('click', (event) => {
let action = event.target.closest('.frequencies-stopword-setting-modal-action-buttons').dataset.action;
if (action === 'submit') {
this.renderTokenList();
} else if (action === 'cancel') {
this.data.stopwords = structuredClone(this.data.stopwordCache);
}
});
}
}
getStopwords() {
this.data.promises.getStopwords = new Promise((resolve, reject) => {
nopaque.requests.corpora.entity.getStopwords()
.then((response) => {
response.json()
.then((json) => {
this.data.originalStopwords = structuredClone(json);
this.data.stopwords = structuredClone(json);
resolve(this.data.stopwords);
})
.catch((error) => {
reject(error);
});
});
});
return this.data.promises.getStopwords;
}
renderGeneralCorpusInfo() {
let corpusData = this.data.corpus.o.staticData;
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.bounds[1] - corpusData.corpus.bounds[0];
document.querySelector('.corpus-num-s').innerHTML = corpusData.s_attrs.s.lexicon.length;
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length;
}
renderTextInfoList() {
let corpusData = this.data.corpus.o.staticData;
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
let corpusTextInfoList = new nopaque.resource_lists.CorpusTextInfoList(corpusTextInfoListElement);
let texts = corpusData.s_attrs.text.lexicon;
let textData = [];
for (let i = 0; i < Object.entries(texts).length; i++) {
let resource = {
title: corpusData.values.s_attrs.text[i].title,
publishing_year: corpusData.values.s_attrs.text[i].publishing_year,
// num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s,
num_tokens: corpusData.s_attrs.text.lexicon[i].bounds[1] - corpusData.s_attrs.text.lexicon[i].bounds[0],
num_sentences: corpusData.s_attrs.s.lexicon.filter((s) => {
return s.bounds[0] >= corpusData.s_attrs.text.lexicon[i].bounds[0] && s.bounds[1] <= corpusData.s_attrs.text.lexicon[i].bounds[1];
}).length,
num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length,
num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length,
num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length,
num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length
};
textData.push(resource);
}
corpusTextInfoList.add(textData);
let textCountChipElement = document.querySelector('.text-count-chip');
textCountChipElement.innerHTML = `Text count: ${corpusData.s_attrs.text.lexicon.length}`;
}
renderTextProportionsGraphic() {
let corpusData = this.data.corpus.o.staticData;
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
let graphtype = document.querySelector('.text-proportions-graph-mode-button.disabled').dataset.graphType;
let textProportionsTitleElement = document.querySelector('#text-proportions-title-element');
if (graphtype === 'bar') {
textProportionsTitleElement.innerHTML = 'Bounds';
} else if (graphtype === 'pie') {
textProportionsTitleElement.innerHTML = 'Proportions';
}
let graphData = this.createTextProportionsGraphData(texts, graphtype);
let graphLayout = {
barmode: graphtype === 'bar' ? 'relative' : '',
type: graphtype,
showgrid: false,
height: 447,
margin: {
l: 10,
r: 10,
b: graphtype === 'bar' ? 80 : 10,
t: graphtype === 'bar' ? 80 : 10,
},
legend: {
"orientation": "h",
font: {
size: 10
}
},
xaxis: {
rangemode: 'nonnegative',
autorange: true
},
yaxis: {
autorange: true,
showticklabels: false
}
};
let config = {
responsive: true,
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
displaylogo: false
};
Plotly.newPlot(textProportionsGraphicElement, graphData, graphLayout, config);
}
createTextProportionsGraphData(texts, graphtype) {
let corpusData = this.data.corpus.o.staticData;
let graphData = [];
switch (graphtype) {
case 'bar':
for (let text of texts) {
let textData = {
type: 'bar',
orientation: 'h',
x: [text[1].bounds[1] - text[1].bounds[0]],
y: [0.5],
text: [`${text[1].bounds[0]} - ${text[1].bounds[1]}`],
name: `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`,
hovertemplate: `${text[1].bounds[0]} - ${text[1].bounds[1]}`,
};
graphData.push(textData);
}
break;
default:
graphData = [
{
values: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
type: graphtype
}
];
break;
}
return graphData;
}
async renderTokenList() {
let corpusTokenListElement = document.querySelector('.corpus-token-list');
let corpusTokenList = new nopaque.resource_lists.CorpusTokenList(corpusTokenListElement);
let filteredData = this.filterData();
let stopwords = this.data.stopwords;
if (this.data.stopwords === undefined) {
stopwords = await this.getStopwords();
}
stopwords = Object.values(stopwords).flat();
let mostFrequent = Object.entries(filteredData)
.sort((a, b) => b[1].count - a[1].count)
.filter(item => !stopwords.includes(item[0].toLowerCase()))
.slice(0, 4)
.map(item => item[0])
let tokenData = [];
Object.entries(filteredData).forEach(item => {
let resource = {
term: item[0],
count: item[1].count,
mostFrequent: mostFrequent.includes(item[0])
};
if (!Object.values(stopwords).includes(resource.term)) {
tokenData.push(resource);
}
});
corpusTokenList.add(tokenData);
}
filterData() {
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let corpusData = this.data.corpus.o.staticData;
let filteredData = {};
for (let i = 0; i < Object.values(corpusData.corpus.freqs[tokenCategory]).length; i++) {
let term = corpusData.values.p_attrs[tokenCategory][i].toLowerCase();
let count = corpusData.corpus.freqs[tokenCategory][i];
if (filteredData[term]) {
filteredData[term].count += count;
filteredData[term].originalIds.push(i);
} else {
filteredData[term] = {
count: count,
originalIds: [i]
};
}
}
return filteredData;
}
renderFrequenciesGraphic(tokenSet) {
this.data.tokenSet = tokenSet;
let corpusData = this.data.corpus.o.staticData;
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
let graphtype = document.querySelector('.frequencies-graph-mode-button.disabled').dataset.graphType;
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet);
let graphLayout = {
barmode: graphtype === 'bar' ? 'stack' : '',
yaxis: {
showticklabels: graphtype === 'markers' ? false : true
},
height: 627,
margin: {
l: 33
}
};
let config = {
responsive: true,
modeBarButtonsToRemove: ['zoom2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d'],
displaylogo: false
};
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
}
createFrequenciesGraphData(tokenCategory, texts, graphtype, tokenSet) {
let corpusData = this.data.corpus.o.staticData;
let graphData = [];
let filteredData = this.filterData();
switch (graphtype) {
case 'markers':
for (let item of tokenSet) {
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
let tokenCountPerText = [];
for (let originalId of filteredData[item].originalIds) {
for (let i = 0; i < texts.length; i++) {
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
}
}
let data = {
x: textTitles,
y: texts.map(text => item),
name: item,
text: texts.map(text => `${item}<br>${tokenCountPerText || 0}`),
mode: 'markers',
marker: {
size: tokenCountPerText,
sizeref: 0.4
}
};
graphData.push(data);
}
break;
default:
for (let item of tokenSet) {
let textTitles = texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`);
let tokenCountPerText = [];
for (let originalId of filteredData[item].originalIds) {
for (let i = 0; i < texts.length; i++) {
tokenCountPerText[i] = (tokenCountPerText[i] || 0) + (texts[i][1].freqs[tokenCategory][originalId] || 0);
}
}
let data = {
x: textTitles,
y: tokenCountPerText,
name: item,
type: graphtype
};
graphData.push(data);
}
break;
}
return graphData;
}
renderStopwordSettingsModal(stopwords) {
let stopwordInputField = document.querySelector('#stopword-input-field');
let userStopwordListContainer = document.querySelector('#user-stopword-list-container');
let stopwordLanguageSelection = document.querySelector('#stopword-language-selection');
let stopwordLanguageChipList = document.querySelector('#stopword-language-chip-list');
let deleteLanguageStopwordListEntriesButton = document.querySelector('#delete-language-stopword-list-entries-button');
let resetLanguageStopwordListEntriesButton = document.querySelector('#reset-language-stopword-list-entries-button');
stopwordLanguageChipList.innerHTML = '';
userStopwordListContainer.innerHTML = '';
stopwordInputField.value = '';
// Render stopword language selection. Set english as default language. Filter out user_stopwords.
if (stopwordLanguageSelection.children.length === 0) {
Object.keys(stopwords).forEach(language => {
if (language !== 'user_stopwords') {
let optionElement = nopaque.Utils.HTMLToElement(`<option value="${language}" ${language === 'english' ? 'selected' : ''}>${language}</option>`);
stopwordLanguageSelection.appendChild(optionElement);
}
});
}
// Render user stopwords over input field.
if (this.data.stopwords['user_stopwords'].length > 0) {
for (let word of this.data.stopwords['user_stopwords']) {
let chipElement = nopaque.Utils.HTMLToElement(`<div class="chip">${word}<i class="close material-icons">close</i></div>`);
chipElement.addEventListener('click', (event) => {
let removedListItem = event.target.closest('.chip').firstChild.textContent;
this.data.stopwords['user_stopwords'] = structuredClone(this.data.stopwords['user_stopwords'].filter(item => item !== removedListItem));
});
userStopwordListContainer.appendChild(chipElement);
}
}
// Render english stopwords as default ...
let selectedLanguage = document.querySelector('#stopword-language-selection').value;
this.renderStopwordLanguageChipList(selectedLanguage, stopwords[selectedLanguage]);
// ... or render selected language stopwords.
stopwordLanguageSelection.addEventListener('change', (event) => {
this.renderStopwordLanguageChipList(event.target.value, stopwords[event.target.value]);
});
// Eventlistener for deleting all stopwords of a language.
deleteLanguageStopwordListEntriesButton.addEventListener('click', (event) => {
let selectedLanguage = stopwordLanguageSelection.value;
this.data.stopwords[selectedLanguage] = [];
stopwordLanguageChipList.innerHTML = '';
this.buttonRendering();
});
// Eventlistener for resetting all stopwords of a language to the original stopwords.
resetLanguageStopwordListEntriesButton.addEventListener('click', () => {
let selectedLanguage = stopwordLanguageSelection.value;
this.data.stopwords[selectedLanguage] = structuredClone(this.data.originalStopwords[selectedLanguage]);
this.renderStopwordLanguageChipList(selectedLanguage, this.data.stopwords[selectedLanguage]);
});
// Initialize Materialize components.
M.Chips.init(
stopwordInputField,
{
placeholder: 'Add stopwords',
onChipAdd: (event) => {
for (let word of event[0].M_Chips.chipsData) {
if (!this.data.stopwords['user_stopwords'].includes(word.tag.toLowerCase())) {
this.data.stopwords['user_stopwords'].push(word.tag.toLowerCase());
}
}
}
}
);
M.FormSelect.init(stopwordLanguageSelection);
}
buttonRendering() {
let deleteLanguageStopwordListEntriesButton = document.querySelector('#delete-language-stopword-list-entries-button');
let resetLanguageStopwordListEntriesButton = document.querySelector('#reset-language-stopword-list-entries-button');
let selectedLanguage = document.querySelector('#stopword-language-selection').value;
let stopwordLength = this.data.stopwords[selectedLanguage].length;
let originalStopwordListLength = this.data.originalStopwords[selectedLanguage].length;
deleteLanguageStopwordListEntriesButton.classList.toggle('disabled', stopwordLength === 0);
resetLanguageStopwordListEntriesButton.classList.toggle('disabled', stopwordLength === originalStopwordListLength);
}
renderStopwordLanguageChipList(language, stopwords) {
let stopwordLanguageChipList = document.querySelector('#stopword-language-chip-list');
stopwordLanguageChipList.innerHTML = '';
for (let word of stopwords) {
let chipElement = nopaque.Utils.HTMLToElement(`<div class="chip">${word}<i class="close material-icons">close</i></div>`);
chipElement.addEventListener('click', (event) => {
let removedListItem = event.target.closest('.chip').firstChild.textContent;
this.data.stopwords[language] = structuredClone(this.data.stopwords[language].filter(item => item !== removedListItem));
this.buttonRendering();
});
stopwordLanguageChipList.appendChild(chipElement);
}
this.buttonRendering();
}
}

View File

@ -1 +0,0 @@
cqi.api = {};

View File

@ -1,57 +0,0 @@
cqi.CQiClient = class CQiClient {
/**
* @param {string} host
* @param {string} corpusId
* @param {string} [version=0.1] version
*/
constructor(host, corpusId, version = '0.1') {
/** @type {cqi.api.APIClient} */
this.api = new cqi.api.APIClient(host, corpusId, version);
}
/**
* @returns {cqi.models.corpora.CorpusCollection}
*/
get corpora() {
return new cqi.models.corpora.CorpusCollection(this);
}
/**
* @returns {Promise<cqi.status.StatusByeOk>}
*/
async bye() {
return await this.api.ctrl_bye();
}
/**
* @param {string} username
* @param {string} password
* @returns {Promise<cqi.status.StatusConnectOk>}
*/
async connect(username, password) {
return await this.api.ctrl_connect(username, password);
}
/**
* @returns {Promise<cqi.status.StatusPingOk>}
*/
async ping() {
return await this.api.ctrl_ping();
}
/**
* @returns {Promise<null>}
*/
async userAbort() {
return await this.api.ctrl_user_abort();
}
/**
* Alias for "bye" method
*
* @returns {Promise<cqi.status.StatusByeOk>}
*/
async disconnect() {
return await this.api.ctrl_bye();
}
};

View File

@ -1,185 +0,0 @@
cqi.errors = {};
/**
* A base class from which all other errors inherit.
* If you want to catch all errors that the CQi package might throw,
* catch this base error.
*/
cqi.errors.CQiError = class CQiError extends Error {
constructor(message) {
super(message);
this.code = undefined;
this.description = undefined;
}
};
cqi.errors.Error = class Error extends cqi.errors.CQiError {
constructor(message) {
super(message);
this.code = 2;
}
};
cqi.errors.ErrorGeneralError = class ErrorGeneralError extends cqi.errors.Error {
constructor(message) {
super(message);
this.code = 513;
}
};
cqi.errors.ErrorConnectRefused = class ErrorConnectRefused extends cqi.errors.Error {
constructor(message) {
super(message);
this.code = 514;
}
};
cqi.errors.ErrorUserAbort = class ErrorUserAbort extends cqi.errors.Error {
constructor(message) {
super(message);
this.code = 515;
}
};
cqi.errors.ErrorSyntaxError = class ErrorSyntaxError extends cqi.errors.Error {
constructor(message) {
super(message);
this.code = 516;
}
};
cqi.errors.CLError = class Error extends cqi.errors.CQiError {
constructor(message) {
super(message);
this.code = 4;
}
};
cqi.errors.CLErrorNoSuchAttribute = class CLErrorNoSuchAttribute extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1025;
this.description = "CQi server couldn't open attribute";
}
};
cqi.errors.CLErrorWrongAttributeType = class CLErrorWrongAttributeType extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1026;
}
};
cqi.errors.CLErrorOutOfRange = class CLErrorOutOfRange extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1027;
}
};
cqi.errors.CLErrorRegex = class CLErrorRegex extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1028;
}
};
cqi.errors.CLErrorCorpusAccess = class CLErrorCorpusAccess extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1029;
}
};
cqi.errors.CLErrorOutOfMemory = class CLErrorOutOfMemory extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1030;
this.description = 'CQi server has run out of memory; try discarding some other corpora and/or subcorpora';
}
};
cqi.errors.CLErrorInternal = class CLErrorInternal extends cqi.errors.CLError {
constructor(message) {
super(message);
this.code = 1031;
this.description = "The classical 'please contact technical support' error";
}
};
cqi.errors.CQPError = class Error extends cqi.errors.CQiError {
constructor(message) {
super(message);
this.code = 5;
}
};
cqi.errors.CQPErrorGeneral = class CQPErrorGeneral extends cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1281;
}
};
cqi.errors.CQPErrorNoSuchCorpus = class CQPErrorNoSuchCorpus extends cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1282;
}
};
cqi.errors.CQPErrorInvalidField = class CQPErrorInvalidField extends cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1283;
}
};
cqi.errors.CQPErrorOutOfRange = class CQPErrorOutOfRange extends cqi.errors.CQPError {
constructor(message) {
super(message);
this.code = 1284;
this.description = 'A number is out of range';
}
};
cqi.errors.lookup = {
2: cqi.errors.Error,
513: cqi.errors.ErrorGeneralError,
514: cqi.errors.ErrorConnectRefused,
515: cqi.errors.ErrorUserAbort,
516: cqi.errors.ErrorSyntaxError,
4: cqi.errors.CLError,
1025: cqi.errors.CLErrorNoSuchAttribute,
1026: cqi.errors.CLErrorWrongAttributeType,
1027: cqi.errors.CLErrorOutOfRange,
1028: cqi.errors.CLErrorRegex,
1029: cqi.errors.CLErrorCorpusAccess,
1030: cqi.errors.CLErrorOutOfMemory,
1031: cqi.errors.CLErrorInternal,
5: cqi.errors.CQPError,
1281: cqi.errors.CQPErrorGeneral,
1282: cqi.errors.CQPErrorNoSuchCorpus,
1283: cqi.errors.CQPErrorInvalidField,
1284: cqi.errors.CQPErrorOutOfRange
};

View File

@ -1,127 +0,0 @@
cqi.models.corpora = {};
cqi.models.corpora.Corpus = class Corpus extends cqi.models.resource.Model {
/**
* @returns {string}
*/
get apiName() {
return this.attrs.api_name;
}
/**
* @returns {string}
*/
get name() {
return this.attrs.name;
}
/**
* @returns {number}
*/
get size() {
return this.attrs.size;
}
/**
* @returns {string}
*/
get charset() {
return this.attrs.charset;
}
/**
* @returns {string[]}
*/
get properties() {
return this.attrs?.properties;
}
/**
* @returns {cqi.models.attributes.AlignmentAttributeCollection}
*/
get alignmentAttributes() {
return new cqi.models.attributes.AlignmentAttributeCollection(this.client, this);
}
/**
* @returns {cqi.models.attributes.PositionalAttributeCollection}
*/
get positionalAttributes() {
return new cqi.models.attributes.PositionalAttributeCollection(this.client, this);
}
/**
* @returns {cqi.models.attributes.StructuralAttributeCollection}
*/
get structuralAttributes() {
return new cqi.models.attributes.StructuralAttributeCollection(this.client, this);
}
/**
* @returns {cqi.models.subcorpora.SubcorpusCollection}
*/
get subcorpora() {
return new cqi.models.subcorpora.SubcorpusCollection(this.client, this);
}
/**
* @returns {Promise<cqi.status.StatusOk>}
*/
async drop() {
return await this.client.api.corpus_drop_corpus(this.apiName);
}
/**
* @param {string} subcorpusName
* @param {string} query
* @returns {Promise<cqi.status.StatusOk>}
*/
async query(subcorpusName, query) {
return await this.client.api.cqp_query(this.apiName, subcorpusName, query);
}
};
cqi.models.corpora.CorpusCollection = class CorpusCollection extends cqi.models.resource.Collection {
/** @type {typeof cqi.models.corpora.Corpus} */
static model = cqi.models.corpora.Corpus;
/**
* @param {string} corpusName
* @returns {Promise<object>}
*/
async _get(corpusName) {
return {
api_name: corpusName,
charset: await this.client.api.corpus_charset(corpusName),
// full_name: await this.client.api.corpus_full_name(api_name),
// info: await this.client.api.corpus_info(api_name),
name: corpusName,
properties: await this.client.api.corpus_properties(corpusName),
size: await this.client.api.cl_attribute_size(`${corpusName}.word`)
}
}
/**
* @param {string} corpusName
* @returns {Promise<cqi.models.corpora.Corpus>}
*/
async get(corpusName) {
return this.prepareModel(await this._get(corpusName));
}
/**
* @returns {Promise<cqi.models.corpora.Corpus[]>}
*/
async list() {
/** @type {string[]} */
let corpusNames = await this.client.api.corpus_list_corpora();
/** @type {cqi.models.corpora.Corpus[]} */
let corpora = [];
for (let corpusName of corpusNames) {
corpora.push(await this.get(corpusName));
}
return corpora;
}
};

View File

@ -1 +0,0 @@
cqi.models = {};

View File

@ -1,155 +0,0 @@
cqi.models.subcorpora = {};
cqi.models.subcorpora.Subcorpus = class Subcorpus extends cqi.models.resource.Model {
/**
* @returns {string}
*/
get apiName() {
return this.attrs.api_name;
}
/**
* @returns {object}
*/
get fields() {
return this.attrs.fields;
}
/**
* @returns {string}
*/
get name() {
return this.attrs.name;
}
/**
* @returns {number}
*/
get size() {
return this.attrs.size;
}
/**
* @returns {Promise<cqi.status.StatusOk>}
*/
async drop() {
return await this.client.api.cqp_drop_subcorpus(this.apiName);
}
/**
* @param {number} field
* @param {number} first
* @param {number} last
* @returns {Promise<number[]>}
*/
async dump(field, first, last) {
return await this.client.api.cqp_dump_subcorpus(
this.apiName,
field,
first,
last
);
}
/**
* @param {number} cutoff
* @param {number} field
* @param {cqi.models.attributes.PositionalAttribute} attribute
* @returns {Promise<number[]>}
*/
async fdist1(cutoff, field, attribute) {
return await this.client.api.cqp_fdist_1(
this.apiName,
cutoff,
field,
attribute.apiName
);
}
/**
* @param {number} cutoff
* @param {number} field1
* @param {cqi.models.attributes.PositionalAttribute} attribute1
* @param {number} field2
* @param {cqi.models.attributes.PositionalAttribute} attribute2
* @returns {Promise<number[]>}
*/
async fdist2(cutoff, field1, attribute1, field2, attribute2) {
return await this.client.api.cqp_fdist_2(
this.apiName,
cutoff,
field1,
attribute1.apiName,
field2,
attribute2.apiName
);
}
};
cqi.models.subcorpora.SubcorpusCollection = class SubcorpusCollection extends cqi.models.resource.Collection {
/** @type {typeof cqi.models.subcorpora.Subcorpus} */
static model = cqi.models.subcorpora.Subcorpus;
/**
* @param {cqi.CQiClient} client
* @param {cqi.models.corpora.Corpus} corpus
*/
constructor(client, corpus) {
super(client);
/** @type {cqi.models.corpora.Corpus} */
this.corpus = corpus;
}
/**
* @param {string} subcorpusName
* @returns {Promise<object>}
*/
async _get(subcorpusName) {
/** @type {string} */
let apiName = `${this.corpus.apiName}:${subcorpusName}`;
/** @type {object} */
let fields = {};
if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_MATCH)) {
fields.match = cqi.CONST_FIELD_MATCH;
}
if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_MATCHEND)) {
fields.matchend = cqi.CONST_FIELD_MATCHEND
}
if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_TARGET)) {
fields.target = cqi.CONST_FIELD_TARGET
}
if (await this.client.api.cqp_subcorpus_has_field(apiName, cqi.CONST_FIELD_KEYWORD)) {
fields.keyword = cqi.CONST_FIELD_KEYWORD
}
return {
api_name: apiName,
fields: fields,
name: subcorpusName,
size: await this.client.api.cqp_subcorpus_size(apiName)
}
}
/**
* @param {string} subcorpusName
* @returns {Promise<cqi.models.subcorpora.Subcorpus>}
*/
async get(subcorpusName) {
return this.prepareModel(await this._get(subcorpusName));
}
/**
* @returns {Promise<cqi.models.subcorpora.Subcorpus[]>}
*/
async list() {
/** @type {string[]} */
let subcorpusNames = await this.client.api.cqp_list_subcorpora(this.corpus.apiName);
/** @type {cqi.models.subcorpora.Subcorpus[]} */
let subcorpora = [];
for (let subcorpusName of subcorpusNames) {
subcorpora.push(await this.get(subcorpusName));
}
return subcorpora;
}
};

View File

@ -1,6 +0,0 @@
var cqi = {};
cqi.CONST_FIELD_KEYWORD = 9;
cqi.CONST_FIELD_MATCH = 16;
cqi.CONST_FIELD_MATCHEND = 17;
cqi.CONST_FIELD_TARGET = 0;

View File

@ -1,51 +0,0 @@
cqi.status = {};
/**
* A base class from which all other status inherit.
*/
cqi.status.CQiStatus = class CQiStatus {
constructor() {
this.code = undefined;
}
};
cqi.status.StatusOk = class StatusOk extends cqi.status.CQiStatus {
constructor() {
super();
this.code = 257;
}
};
cqi.status.StatusConnectOk = class StatusConnectOk extends cqi.status.CQiStatus {
constructor() {
super();
this.code = 258;
}
};
cqi.status.StatusByeOk = class StatusByeOk extends cqi.status.CQiStatus {
constructor() {
super();
this.code = 259;
}
};
cqi.status.StatusPingOk = class StatusPingOk extends cqi.status.CQiStatus {
constructor() {
super();
this.code = 260;
}
};
cqi.status.lookup = {
257: cqi.status.StatusOk,
258: cqi.status.StatusConnectOk,
259: cqi.status.StatusByeOk,
260: cqi.status.StatusPingOk
};

Some files were not shown because too many files have changed in this diff Show More