56 Commits

Author SHA1 Message Date
48fe7c0702 Slight changes to services description 2024-06-20 12:53:48 +02:00
5a2723b617 updates and restructuring 2024-03-26 15:29:26 +01:00
4425d50140 more manual updates 2024-03-19 17:33:37 +01:00
39113a6f17 manual sections 01, 02, 06 2024-03-14 17:07:53 +01:00
a53f1d216b manual v.a. dashboard, services 2024-03-14 09:35:22 +01:00
ffd7a3ad91 Manual Ergänzungen Intro /Getting Started 2024-03-05 15:41:17 +01:00
5dce269736 Version number + original slogan font 2023-12-18 12:49:30 +01:00
13369296d3 rename docker-entrypoint.sh to docker-nopaque-entrypoint.sh 2023-12-15 13:56:03 +01:00
4f6e1c121f Add nopaque version config variable 2023-12-15 08:47:59 +01:00
438a257fe3 Update CI script 2023-12-15 08:47:46 +01:00
2e88d7d035 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-15 08:37:02 +01:00
b338c33d42 Bump cwb version 2023-12-15 08:36:50 +01:00
d6cebddd92 Updated query builder gifs and instructions 2023-12-12 14:56:08 +01:00
07fda0e95a Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-07 22:35:41 +01:00
3927d9e4cd Edits in structural attributes section and others 2023-12-07 22:34:00 +01:00
8f5d5ffdec Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-07 12:46:48 +01:00
f02d1619e2 Try to implement anchor tags 2023-12-07 12:46:37 +01:00
892f1f799e Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-05 15:00:49 +01:00
f5e98ae655 Add badges to README 2023-12-05 15:00:21 +01:00
f790106e0e Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-05 14:54:05 +01:00
c57acc73d2 Manual changes 2023-12-05 14:42:38 +01:00
678a0767b7 Change Manual icon 2023-11-30 11:21:39 +01:00
17a9338d9f Fix job deletion from job page 2023-11-29 16:11:14 +01:00
a7cbce1eda Fix wrong spacy-nlp-pipeline version number 2023-11-29 10:45:35 +01:00
fa28c875e1 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-28 12:40:05 +01:00
0927edcceb Bug Fixes 2023-11-28 12:39:54 +01:00
9c22370eea Implement force download parameter in model insert_defaults methods 2023-11-28 12:10:55 +01:00
bdcc80a66f Add new tesseract-ocr-pipeline version. Remove redundant spacy-nlp-pipeline version. 2023-11-28 10:34:30 +01:00
9be5ce6014 link logo to homepage 2023-11-23 13:32:54 +01:00
00e4c3ade3 Add logo to sidenav 2023-11-23 13:26:19 +01:00
79a16cae83 Add links to my profile page 2023-11-23 13:16:21 +01:00
c5aea0be94 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-22 12:50:18 +01:00
afcb890ccf Element Target drag&drop + small improvements 2023-11-22 12:50:08 +01:00
9627708950 rename manual files to fit new naming convention 2023-11-21 12:31:10 +01:00
1bb1408988 make the workshops package fit the new file scheme 2023-11-21 10:11:49 +01:00
79bafdea89 Switch back to older settings and extension .vscode setup 2023-11-20 15:26:22 +01:00
a2d617718b Update .vscode directory contents 2023-11-20 11:05:56 +01:00
691b2de5b2 Bug Fix: lock chips after switch to QB 2023-11-20 09:48:06 +01:00
eb0e7c9ba1 Fix error on not authenticated users 2023-11-20 09:35:53 +01:00
ab132746e7 Add TODO in migration scripts 2023-11-17 10:42:55 +01:00
ae5646512d Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-17 10:15:50 +01:00
fc66327920 Make double quotation marks escapable again 2023-11-17 10:15:39 +01:00
9bfc96ad41 minor codestyle fix 2023-11-16 17:22:07 +01:00
008938b46b Avatar in top right corner 2023-11-16 15:57:27 +01:00
4f24e9f9da Erase meta data logic from struc attribute builder 2023-11-14 09:48:38 +01:00
d0fe4360bb Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-13 15:37:26 +01:00
1c18806c9c Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-13 15:53:17 +01:00
9487aa7a60 Restructure modals and base template 2023-11-13 15:53:14 +01:00
6559051fd5 Delete condition logic in token builder 2023-11-13 15:37:19 +01:00
0882e085a3 Function renaming 2023-11-13 14:46:19 +01:00
ff1bcb40f3 update query builder code to fit the new style 2023-11-13 14:20:19 +01:00
d298b200dc Move javascript files to fit new style 2023-11-13 12:59:36 +01:00
660d7ebc99 Fix sidenav profile entries 2023-11-13 12:46:48 +01:00
df33c7b36d Fix old Utils references in js 2023-11-13 10:30:24 +01:00
bf8b22fb58 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-13 09:43:03 +01:00
b216ad8a40 QB parts as extensions 2023-11-13 09:42:56 +01:00
88 changed files with 1689 additions and 1435 deletions

View File

@ -8,6 +8,6 @@
!.flaskenv !.flaskenv
!boot.sh !boot.sh
!config.py !config.py
!docker-entrypoint.sh !docker-nopaque-entrypoint.sh
!nopaque.py !nopaque.py
!requirements.txt !requirements.txt

View File

@ -1,3 +1,37 @@
include:
- template: Security/Container-Scanning.gitlab-ci.yml
##############################################################################
# Pipeline stages in order of execution #
##############################################################################
stages:
- build
- publish
- sca
##############################################################################
# Pipeline behavior #
##############################################################################
workflow:
rules:
# Run the pipeline on commits to the default branch
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
variables:
# Set the Docker image tag to `latest`
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:latest
when: always
# Run the pipeline on tag creation
- if: $CI_COMMIT_TAG
variables:
# Set the Docker image tag to the Git tag name
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
when: always
# Don't run the pipeline on all other occasions
- when: never
##############################################################################
# Default values for pipeline jobs #
##############################################################################
default: default:
image: docker:24.0.6 image: docker:24.0.6
services: services:
@ -5,38 +39,46 @@ default:
tags: tags:
- docker - docker
##############################################################################
# CI/CD variables for all jobs in the pipeline #
##############################################################################
variables: variables:
DOCKER_TLS_CERTDIR: /certs DOCKER_TLS_CERTDIR: /certs
DOCKER_BUILD_PATH: .
DOCKERFILE: Dockerfile
build_image: ##############################################################################
# Pipeline jobs #
##############################################################################
build:
stage: build stage: build
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: on_success
variables:
IMAGE_TAG: $CI_REGISTRY_IMAGE:latest
- if: $CI_COMMIT_TAG
when: "on_success"
variables:
IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
- when: never
before_script:
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
script: script:
- docker build -t $IMAGE_TAG . - docker build --tag $DOCKER_IMAGE --file $DOCKERFILE $DOCKER_BUILD_PATH
- docker push $IMAGE_TAG - docker save $DOCKER_IMAGE > docker_image.tar
artifacts:
paths:
- docker_image.tar
include: publish:
- template: Security/Container-Scanning.gitlab-ci.yml stage: publish
before_script:
- docker login --username gitlab-ci-token --password $CI_JOB_TOKEN $CI_REGISTRY
script:
- docker load --input docker_image.tar
- docker push $DOCKER_IMAGE
after_script:
- docker logout $CI_REGISTRY
container_scanning: container_scanning:
stage: sca
rules: rules:
# Run the job on commits to the default branch
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: on_success when: always
variables: # Run the job on tag creation
CS_IMAGE: $CI_REGISTRY_IMAGE:latest
- if: $CI_COMMIT_TAG - if: $CI_COMMIT_TAG
when: on_success when: always
variables: # Don't run the job on all other occasions
CS_IMAGE: ${CI_REGISTRY_IMAGE}:${CI_COMMIT_REF_NAME}
- when: never - when: never
variables:
CS_IMAGE: $DOCKER_IMAGE

View File

@ -1,7 +1,8 @@
{ {
"recommendations": [ "recommendations": [
"samuelcolvin.jinjahtml", "irongeek.vscode-env",
"ms-azuretools.vscode-docker", "ms-azuretools.vscode-docker",
"ms-python.python" "ms-python.python",
"samuelcolvin.jinjahtml"
] ]
} }

View File

@ -1,13 +1,9 @@
{ {
"editor.rulers": [79], "editor.rulers": [79],
"files.insertFinalNewline": true, "files.insertFinalNewline": true,
"python.terminal.activateEnvironment": false,
"[css]": { "[css]": {
"editor.tabSize": 2 "editor.tabSize": 2
}, },
"[scss]": {
"editor.tabSize": 2
},
"[html]": { "[html]": {
"editor.tabSize": 2 "editor.tabSize": 2
}, },
@ -17,7 +13,7 @@
"[jinja-html]": { "[jinja-html]": {
"editor.tabSize": 2 "editor.tabSize": 2
}, },
"[jinja-js]": { "[scss]": {
"editor.tabSize": 2 "editor.tabSize": 2
} }
} }

View File

@ -17,9 +17,6 @@ RUN apt-get update \
&& rm --recursive /var/lib/apt/lists/* && rm --recursive /var/lib/apt/lists/*
COPY docker-entrypoint.sh /usr/local/bin/
RUN useradd --create-home --no-log-init nopaque \ RUN useradd --create-home --no-log-init nopaque \
&& groupadd docker \ && groupadd docker \
&& usermod --append --groups docker nopaque && usermod --append --groups docker nopaque
@ -47,7 +44,10 @@ RUN python3 -m pip install --requirement requirements.txt \
USER root USER root
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
EXPOSE 5000 EXPOSE 5000
ENTRYPOINT ["docker-entrypoint.sh"] ENTRYPOINT ["docker-nopaque-entrypoint.sh"]

View File

@ -1,5 +1,8 @@
# nopaque # nopaque
![release badge](https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque/-/badges/release.svg)
![pipeline badge](https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque/badges/master/pipeline.svg?ignore_skipped=true)
nopaque bundles various tools and services that provide humanities scholars with DH methods and thus can support their various individual research processes. Using nopaque, researchers can subject digitized sources to Optical Character Recognition (OCR). The resulting text files can then be used as a data basis for Natural Language Processing (NLP). The texts are automatically subjected to various linguistic annotations. The data processed via NLP can then be summarized in the web application as corpora and analyzed by means of an information retrieval system through complex search queries. The range of functions of the web application will be successively extended according to the needs of the researchers. nopaque bundles various tools and services that provide humanities scholars with DH methods and thus can support their various individual research processes. Using nopaque, researchers can subject digitized sources to Optical Character Recognition (OCR). The resulting text files can then be used as a data basis for Natural Language Processing (NLP). The texts are automatically subjected to various linguistic annotations. The data processed via NLP can then be summarized in the web application as corpora and analyzed by means of an information retrieval system through complex search queries. The range of functions of the web application will be successively extended according to the needs of the researchers.
## Prerequisites and requirements ## Prerequisites and requirements

View File

@ -120,7 +120,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'German' - title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.' description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz'
@ -132,7 +131,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Greek' - title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.' description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz'
@ -144,7 +142,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'English' - title: 'English'
description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz'
@ -156,7 +153,6 @@
version: '3.4.1' version: '3.4.1'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Spanish' - title: 'Spanish'
description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz'
@ -168,7 +164,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'French' - title: 'French'
description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz'
@ -180,7 +175,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Italian' - title: 'Italian'
description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner' description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner'
url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz'
@ -192,7 +186,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Polish' - title: 'Polish'
description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.' description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz'
@ -204,7 +197,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Russian' - title: 'Russian'
description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz'
@ -216,7 +208,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Chinese' - title: 'Chinese'
description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz'
@ -228,4 +219,3 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'

View File

@ -9,6 +9,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Amharic' # - title: 'Amharic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
@ -20,6 +21,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Arabic' - title: 'Arabic'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
@ -31,6 +33,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Assamese' # - title: 'Assamese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
@ -42,6 +45,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Azerbaijani' # - title: 'Azerbaijani'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
@ -53,6 +57,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Azerbaijani - Cyrillic' # - title: 'Azerbaijani - Cyrillic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
@ -64,6 +69,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Belarusian' # - title: 'Belarusian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
@ -75,6 +81,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Bengali' # - title: 'Bengali'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
@ -86,6 +93,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tibetan' # - title: 'Tibetan'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
@ -97,6 +105,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Bosnian' # - title: 'Bosnian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
@ -108,6 +117,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Bulgarian' # - title: 'Bulgarian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
@ -119,6 +129,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Catalan; Valencian' # - title: 'Catalan; Valencian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
@ -130,6 +141,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Cebuano' # - title: 'Cebuano'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
@ -141,6 +153,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Czech' # - title: 'Czech'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
@ -152,6 +165,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Chinese - Simplified' # - title: 'Chinese - Simplified'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
@ -163,6 +177,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Chinese - Traditional' - title: 'Chinese - Traditional'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
@ -174,6 +189,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Cherokee' # - title: 'Cherokee'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
@ -185,6 +201,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Welsh' # - title: 'Welsh'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
@ -196,6 +213,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Danish' - title: 'Danish'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
@ -207,6 +225,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'German' - title: 'German'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
@ -218,6 +237,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Dzongkha' # - title: 'Dzongkha'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
@ -229,6 +249,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Greek, Modern (1453-)' - title: 'Greek, Modern (1453-)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
@ -240,6 +261,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'English' - title: 'English'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
@ -251,6 +273,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'English, Middle (1100-1500)' - title: 'English, Middle (1100-1500)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
@ -262,6 +285,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Esperanto' # - title: 'Esperanto'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
@ -273,6 +297,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Estonian' # - title: 'Estonian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
@ -284,6 +309,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Basque' # - title: 'Basque'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
@ -295,6 +321,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Persian' # - title: 'Persian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
@ -306,6 +333,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Finnish' # - title: 'Finnish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
@ -317,6 +345,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'French' - title: 'French'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
@ -328,6 +357,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'German Fraktur' - title: 'German Fraktur'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
@ -339,6 +369,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'French, Middle (ca. 1400-1600)' - title: 'French, Middle (ca. 1400-1600)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
@ -350,6 +381,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Irish' # - title: 'Irish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
@ -361,6 +393,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Galician' # - title: 'Galician'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
@ -372,6 +405,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Greek, Ancient (-1453)' - title: 'Greek, Ancient (-1453)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
@ -383,6 +417,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Gujarati' # - title: 'Gujarati'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
@ -394,6 +429,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Haitian; Haitian Creole' # - title: 'Haitian; Haitian Creole'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
@ -405,6 +441,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Hebrew' # - title: 'Hebrew'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
@ -416,6 +453,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Hindi' # - title: 'Hindi'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
@ -427,6 +465,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Croatian' # - title: 'Croatian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
@ -438,6 +477,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Hungarian' # - title: 'Hungarian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
@ -449,6 +489,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Inuktitut' # - title: 'Inuktitut'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
@ -460,6 +501,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Indonesian' # - title: 'Indonesian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
@ -471,6 +513,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Icelandic' # - title: 'Icelandic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
@ -482,6 +525,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Italian' - title: 'Italian'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
@ -493,6 +537,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Italian - Old' - title: 'Italian - Old'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
@ -504,6 +549,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Javanese' # - title: 'Javanese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
@ -515,6 +561,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Japanese' # - title: 'Japanese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
@ -526,6 +573,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kannada' # - title: 'Kannada'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
@ -537,6 +585,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Georgian' # - title: 'Georgian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
@ -548,6 +597,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Georgian - Old' # - title: 'Georgian - Old'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
@ -559,6 +609,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kazakh' # - title: 'Kazakh'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
@ -570,6 +621,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Central Khmer' # - title: 'Central Khmer'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
@ -581,6 +633,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kirghiz; Kyrgyz' # - title: 'Kirghiz; Kyrgyz'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
@ -592,6 +645,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Korean' # - title: 'Korean'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
@ -603,6 +657,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kurdish' # - title: 'Kurdish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
@ -614,6 +669,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Lao' # - title: 'Lao'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
@ -625,6 +681,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Latin' # - title: 'Latin'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
@ -636,6 +693,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Latvian' # - title: 'Latvian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
@ -647,6 +705,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Lithuanian' # - title: 'Lithuanian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
@ -658,6 +717,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Malayalam' # - title: 'Malayalam'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
@ -669,6 +729,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Marathi' # - title: 'Marathi'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
@ -680,6 +741,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Macedonian' # - title: 'Macedonian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
@ -691,6 +753,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Maltese' # - title: 'Maltese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
@ -702,6 +765,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Malay' # - title: 'Malay'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
@ -713,6 +777,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Burmese' # - title: 'Burmese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
@ -724,6 +789,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Nepali' # - title: 'Nepali'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
@ -735,6 +801,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Dutch; Flemish' # - title: 'Dutch; Flemish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
@ -746,6 +813,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Norwegian' # - title: 'Norwegian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
@ -757,6 +825,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Oriya' # - title: 'Oriya'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
@ -768,6 +837,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Panjabi; Punjabi' # - title: 'Panjabi; Punjabi'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
@ -779,6 +849,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Polish' # - title: 'Polish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
@ -790,6 +861,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Portuguese' - title: 'Portuguese'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
@ -801,6 +873,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Pushto; Pashto' # - title: 'Pushto; Pashto'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
@ -812,6 +885,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Romanian; Moldavian; Moldovan' # - title: 'Romanian; Moldavian; Moldovan'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
@ -823,6 +897,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Russian' - title: 'Russian'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
@ -834,6 +909,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Sanskrit' # - title: 'Sanskrit'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
@ -845,6 +921,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Sinhala; Sinhalese' # - title: 'Sinhala; Sinhalese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
@ -856,6 +933,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Slovak' # - title: 'Slovak'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
@ -867,6 +945,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Slovenian' # - title: 'Slovenian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
@ -878,6 +957,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Spanish; Castilian' - title: 'Spanish; Castilian'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
@ -889,6 +969,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Spanish; Castilian - Old' - title: 'Spanish; Castilian - Old'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
@ -900,6 +981,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Albanian' # - title: 'Albanian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
@ -911,6 +993,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Serbian' # - title: 'Serbian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
@ -922,6 +1005,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Serbian - Latin' # - title: 'Serbian - Latin'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
@ -933,6 +1017,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Swahili' # - title: 'Swahili'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
@ -944,6 +1029,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Swedish' # - title: 'Swedish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
@ -955,6 +1041,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Syriac' # - title: 'Syriac'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
@ -966,6 +1053,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tamil' # - title: 'Tamil'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
@ -977,6 +1065,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Telugu' # - title: 'Telugu'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
@ -988,6 +1077,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tajik' # - title: 'Tajik'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
@ -999,6 +1089,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tagalog' # - title: 'Tagalog'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
@ -1010,6 +1101,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Thai' # - title: 'Thai'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
@ -1021,6 +1113,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tigrinya' # - title: 'Tigrinya'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
@ -1032,6 +1125,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Turkish' # - title: 'Turkish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
@ -1043,6 +1137,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Uighur; Uyghur' # - title: 'Uighur; Uyghur'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
@ -1054,6 +1149,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Ukrainian' # - title: 'Ukrainian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
@ -1065,6 +1161,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Urdu' # - title: 'Urdu'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
@ -1076,6 +1173,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Uzbek' # - title: 'Uzbek'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
@ -1087,6 +1185,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Uzbek - Cyrillic' # - title: 'Uzbek - Cyrillic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
@ -1098,6 +1197,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Vietnamese' # - title: 'Vietnamese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
@ -1109,6 +1209,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Yiddish' # - title: 'Yiddish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
@ -1120,3 +1221,4 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'

View File

@ -45,7 +45,7 @@ def _create_build_corpus_service(corpus):
''' ## Constraints ## ''' ''' ## Constraints ## '''
constraints = ['node.role==worker'] constraints = ['node.role==worker']
''' ## Image ## ''' ''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1853' image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
''' ## Labels ## ''' ''' ## Labels ## '''
labels = { labels = {
'origin': current_app.config['SERVER_NAME'], 'origin': current_app.config['SERVER_NAME'],
@ -139,7 +139,7 @@ def _create_cqpserver_container(corpus):
''' ## Entrypoint ## ''' ''' ## Entrypoint ## '''
entrypoint = ['bash', '-c'] entrypoint = ['bash', '-c']
''' ## Image ## ''' ''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1853' image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
''' ## Name ## ''' ''' ## Name ## '''
name = f'cqpserver_{corpus.id}' name = f'cqpserver_{corpus.id}'
''' ## Network ## ''' ''' ## Network ## '''

View File

@ -45,12 +45,6 @@ def dashboard():
) )
# @bp.route('/user_manual')
# @register_breadcrumb(bp, '.user_manual', '<i class="material-icons left">help</i>User manual')
# def user_manual():
# return render_template('main/user_manual.html.j2', title='User manual')
@bp.route('/news') @bp.route('/news')
@register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News') @register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News')
def news(): def news():

View File

@ -953,7 +953,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
return self.user.hashid return self.user.hashid
@staticmethod @staticmethod
def insert_defaults(): def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first() nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join( defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.path.dirname(os.path.abspath(__file__)),
@ -966,6 +966,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
if model is not None: if model is not None:
model.compatible_service_versions = m['compatible_service_versions'] model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description'] model.description = m['description']
model.filename = f'{model.id}.traineddata'
model.publisher = m['publisher'] model.publisher = m['publisher']
model.publisher_url = m['publisher_url'] model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url'] model.publishing_url = m['publishing_url']
@ -973,7 +974,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
model.is_public = True model.is_public = True
model.title = m['title'] model.title = m['title']
model.version = m['version'] model.version = m['version']
continue else:
model = TesseractOCRPipelineModel( model = TesseractOCRPipelineModel(
compatible_service_versions=m['compatible_service_versions'], compatible_service_versions=m['compatible_service_versions'],
description=m['description'], description=m['description'],
@ -990,6 +991,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.flush(objects=[model]) db.session.flush(objects=[model])
db.session.refresh(model) db.session.refresh(model)
model.filename = f'{model.id}.traineddata' model.filename = f'{model.id}.traineddata'
if not os.path.exists(model.path) or force_download:
r = requests.get(m['url'], stream=True) r = requests.get(m['url'], stream=True)
pbar = tqdm( pbar = tqdm(
desc=f'{model.title} ({model.filename})', desc=f'{model.title} ({model.filename})',
@ -1080,7 +1082,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
return self.user.hashid return self.user.hashid
@staticmethod @staticmethod
def insert_defaults(): def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first() nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join( defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), os.path.dirname(os.path.abspath(__file__)),
@ -1093,6 +1095,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
if model is not None: if model is not None:
model.compatible_service_versions = m['compatible_service_versions'] model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description'] model.description = m['description']
model.filename = m['url'].split('/')[-1]
model.publisher = m['publisher'] model.publisher = m['publisher']
model.publisher_url = m['publisher_url'] model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url'] model.publishing_url = m['publishing_url']
@ -1101,10 +1104,11 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
model.title = m['title'] model.title = m['title']
model.version = m['version'] model.version = m['version']
model.pipeline_name = m['pipeline_name'] model.pipeline_name = m['pipeline_name']
continue else:
model = SpaCyNLPPipelineModel( model = SpaCyNLPPipelineModel(
compatible_service_versions=m['compatible_service_versions'], compatible_service_versions=m['compatible_service_versions'],
description=m['description'], description=m['description'],
filename=m['url'].split('/')[-1],
publisher=m['publisher'], publisher=m['publisher'],
publisher_url=m['publisher_url'], publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'], publishing_url=m['publishing_url'],
@ -1118,7 +1122,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.add(model) db.session.add(model)
db.session.flush(objects=[model]) db.session.flush(objects=[model])
db.session.refresh(model) db.session.refresh(model)
model.filename = m['url'].split('/')[-1] if not os.path.exists(model.path) or force_download:
r = requests.get(m['url'], stream=True) r = requests.get(m['url'], stream=True)
pbar = tqdm( pbar = tqdm(
desc=f'{model.title} ({model.filename})', desc=f'{model.title} ({model.filename})',

View File

@ -10,7 +10,7 @@ file-setup-pipeline:
tesseract-ocr-pipeline: tesseract-ocr-pipeline:
name: 'Tesseract OCR Pipeline' name: 'Tesseract OCR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF' publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.1' latest_version: '0.1.2'
versions: versions:
0.1.0: 0.1.0:
methods: methods:
@ -23,6 +23,12 @@ tesseract-ocr-pipeline:
- 'ocropus_nlbin_threshold' - 'ocropus_nlbin_threshold'
publishing_year: 2022 publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1' url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'binarization'
- 'ocropus_nlbin_threshold'
publishing_year: 2023
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
transkribus-htr-pipeline: transkribus-htr-pipeline:
name: 'Transkribus HTR Pipeline' name: 'Transkribus HTR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF' publisher: 'Bielefeld University - CRC 1288 - INF'
@ -41,7 +47,7 @@ transkribus-htr-pipeline:
spacy-nlp-pipeline: spacy-nlp-pipeline:
name: 'SpaCy NLP Pipeline' name: 'SpaCy NLP Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF' publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.2' latest_version: '0.1.1'
versions: versions:
0.1.0: 0.1.0:
methods: methods:
@ -53,8 +59,3 @@ spacy-nlp-pipeline:
- 'encoding_detection' - 'encoding_detection'
publishing_year: 2022 publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1' url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'encoding_detection'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'

Binary file not shown.

Before

Width:  |  Height:  |  Size: 222 KiB

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 378 KiB

After

Width:  |  Height:  |  Size: 402 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 720 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 854 KiB

After

Width:  |  Height:  |  Size: 589 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 436 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 189 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 511 KiB

After

Width:  |  Height:  |  Size: 381 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1009 KiB

After

Width:  |  Height:  |  Size: 759 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 903 KiB

After

Width:  |  Height:  |  Size: 750 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 413 KiB

After

Width:  |  Height:  |  Size: 524 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

View File

@ -1,495 +0,0 @@
class GeneralQueryBuilderFunctions {
constructor(elements) {
this.elements = elements;
}
toggleClass(elements, className, action){
elements.forEach(element => {
document.querySelector(`[data-toggle-area="${element}"]`).classList[action](className);
});
}
resetQueryInputField() {
this.elements.queryInputField.innerHTML = '';
this.addPlaceholder();
this.updateChipList();
this.queryPreviewBuilder();
}
updateChipList() {
this.elements.queryChipElements = this.elements.queryInputField.querySelectorAll('.query-component');
}
removePlaceholder() {
let placeholder = this.elements.queryInputField.querySelector('#corpus-analysis-concordance-query-builder-input-field-placeholder');
if (placeholder && this.elements.queryInputField !== undefined) {
this.elements.queryInputField.innerHTML = '';
}
}
addPlaceholder() {
let placeholder = nopaque.Utils.HTMLToElement('<span id="corpus-analysis-concordance-query-builder-input-field-placeholder">Click on a button to add a query component</span>');
this.elements.queryInputField.appendChild(placeholder);
}
resetMaterializeSelection(selectionElements, value = "default") {
selectionElements.forEach(selectionElement => {
if (selectionElement.querySelector(`option[value=${value}]`) !== null) {
selectionElement.querySelector(`option[value=${value}]`).selected = true;
}
let instance = M.FormSelect.getInstance(selectionElement);
instance.destroy();
M.FormSelect.init(selectionElement);
})
}
submitQueryChipElement(dataType = undefined, prettyQueryText = undefined, queryText = undefined, index = null, isClosingTag = false, isEditable = false) {
if (this.elements.editingModusOn) {
let editedQueryChipElement = this.elements.queryChipElements[this.elements.editedQueryChipElementIndex];
editedQueryChipElement.dataset.type = dataType;
editedQueryChipElement.dataset.query = queryText;
editedQueryChipElement.firstChild.textContent = prettyQueryText;
this.updateChipList();
this.queryPreviewBuilder();
} else {
this.queryChipFactory(dataType, prettyQueryText, queryText, index, isClosingTag, isEditable);
}
}
queryChipFactory(dataType, prettyQueryText, queryText, index = null, isClosingTag = false, isEditable = false) {
// Creates a new query chip element, adds Eventlisteners for selection, deletion and drag and drop and appends it to the query input field.
queryText = nopaque.Utils.escape(queryText);
prettyQueryText = nopaque.Utils.escape(prettyQueryText);
let queryChipElement = nopaque.Utils.HTMLToElement(
`
<span class="chip query-component" data-type="${dataType}" data-query="${queryText}" draggable="true" data-closing-tag="${isClosingTag}">
${prettyQueryText}${isEditable ? '<i class="material-icons chip-action-button" data-chip-action="edit" style="padding-left:5px; font-size:18px; cursor:pointer;">edit</i>': ''}
${isClosingTag ? '<i class="material-icons chip-action-button" data-chip-action="lock" style="padding-top:5px; font-size:20px; cursor:pointer;">lock_open</i>' : '<i class="material-icons close chip-action-button" data-chip-action="delete">close</i>'}
</span>
`
);
this.actionListeners(queryChipElement);
queryChipElement.addEventListener('dragstart', this.handleDragStart.bind(this, queryChipElement));
queryChipElement.addEventListener('dragend', this.handleDragEnd);
// Ensures that metadata is always at the end of the query and if an index is given, inserts the query chip at the given index and if there is a closing tag, inserts the query chip before the closing tag.
this.removePlaceholder();
let lastChild = this.elements.queryInputField.lastChild;
let isLastChildTextAnnotation = lastChild && lastChild.dataset.type === 'text-annotation';
if (!index) {
let closingTagElement = this.elements.queryInputField.querySelector('[data-closing-tag="true"]');
if (closingTagElement) {
index = Array.from(this.elements.queryInputField.children).indexOf(closingTagElement);
}
}
if (dataType !== 'text-annotation' && index) {
this.elements.queryInputField.insertBefore(queryChipElement, this.elements.queryChipElements[index]);
} else if (dataType !== 'text-annotation' && isLastChildTextAnnotation) {
this.elements.queryInputField.insertBefore(queryChipElement, lastChild);
} else {
this.elements.queryInputField.appendChild(queryChipElement);
}
this.updateChipList();
this.queryPreviewBuilder();
}
actionListeners(queryChipElement) {
let notQuantifiableDataTypes = ['start-sentence', 'end-sentence', 'start-entity', 'start-empty-entity', 'end-entity', 'token-incidence-modifier'];
queryChipElement.addEventListener('click', (event) => {
if (event.target.classList.contains('chip')) {
if (!notQuantifiableDataTypes.includes(queryChipElement.dataset.type)) {
this.selectChipElement(queryChipElement);
}
}
});
let chipActionButtons = queryChipElement.querySelectorAll('.chip-action-button');
// chipActionButtons.forEach(button => {
for (let button of chipActionButtons) {
button.addEventListener('click', (event) => {
if (event.target.dataset.chipAction === 'delete') {
this.deleteChipElement(queryChipElement);
} else if (event.target.dataset.chipAction === 'edit') {
this.editChipElement(queryChipElement);
} else if (event.target.dataset.chipAction === 'lock') {
this.lockClosingChipElement(queryChipElement);
}
});
// });
}
}
editChipElement(queryChipElement) {
this.elements.editingModusOn = true;
this.elements.editedQueryChipElementIndex = Array.from(this.elements.queryInputField.children).indexOf(queryChipElement);
switch (queryChipElement.dataset.type) {
case 'start-entity':
this.editStartEntityChipElement(queryChipElement);
break;
case 'text-annotation':
this.editTextAnnotationChipElement(queryChipElement);
break;
case 'token':
let queryElementsContent = this.prepareQueryElementsContent(queryChipElement);
this.editTokenChipElement(queryElementsContent);
break;
default:
break;
}
}
editStartEntityChipElement(queryChipElement) {
this.elements.structuralAttrModal.open();
this.toggleClass(['entity-builder'], 'hide', 'remove');
this.toggleEditingAreaStructuralAttrModal('add');
let entType = queryChipElement.dataset.query.replace(/<ent_type="|">/g, '');
let isEnglishEntType = this.elements.englishEntTypeSelection.querySelector(`option[value=${entType}]`) !== null;
let selection = isEnglishEntType ? this.elements.englishEntTypeSelection : this.elements.germanEntTypeSelection;
this.resetMaterializeSelection([selection], entType);
}
editTextAnnotationChipElement(queryChipElement) {
this.elements.structuralAttrModal.open();
this.toggleClass(['text-annotation-builder'], 'hide', 'remove');
this.structuralAttributeBuilderFunctions.toggleEditingAreaStructuralAttrModal('add');
let [textAnnotationSelection, textAnnotationContent] = queryChipElement.dataset.query
.replace(/:: ?match\.text_|"|"/g, '')
.split('=');
this.resetMaterializeSelection([this.elements.textAnnotationSelection], textAnnotationSelection);
this.elements.textAnnotationInput.value = textAnnotationContent;
}
prepareQueryElementsContent(queryChipElement) {
//this regex searches for word or lemma or pos or simple_pos="any string within single or double quotes" followed by one or no ignore case markers, followed by one or no condition characters.
let regex = new RegExp('(word|lemma|pos|simple_pos)=(("[^"]+")|(\\\\u0027[^\\\\u0027]+\\\\u0027)) ?(%c)? ?(\\&|\\|)?', 'gm');
let m;
let queryElementsContent = [];
while ((m = regex.exec(queryChipElement.dataset.query)) !== null) {
// this is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
let tokenAttr = m[1];
// Passes english-pos by default so that the template is added. In editTokenChipElement it is then checked whether it is english-pos or german-pos.
if (tokenAttr === 'pos') {
tokenAttr = 'english-pos';
}
let tokenValue = m[2].replace(/"|'/g, '');
let ignoreCase = false;
let condition = undefined;
m.forEach((match) => {
if (match === "%c") {
ignoreCase = true;
} else if (match === "&") {
condition = "and";
} else if (match === "|") {
condition = "or";
}
});
queryElementsContent.push({tokenAttr: tokenAttr, tokenValue: tokenValue, ignoreCase: ignoreCase, condition: condition});
}
return queryElementsContent;
}
editTokenChipElement(queryElementsContent) {
this.elements.positionalAttrModal.open();
queryElementsContent.forEach((queryElement) => {
this.resetMaterializeSelection([this.elements.positionalAttrSelection], queryElement.tokenAttr);
this.preparePositionalAttrModal();
switch (queryElement.tokenAttr) {
case 'word':
case 'lemma':
this.elements.tokenBuilderContent.querySelector('input').value = queryElement.tokenValue;
break;
case 'english-pos':
// English-pos is selected by default. Then it is checked whether the passed token value occurs in the english-pos selection. If not, the selection is reseted and changed to german-pos.
let selection = this.elements.tokenBuilderContent.querySelector('select');
queryElement.tokenAttr = selection.querySelector(`option[value=${queryElement.tokenValue}]`) ? 'english-pos' : 'german-pos';
this.resetMaterializeSelection([this.elements.positionalAttrSelection], queryElement.tokenAttr);
this.preparePositionalAttrModal();
this.resetMaterializeSelection([this.elements.tokenBuilderContent.querySelector('select')], queryElement.tokenValue);
break;
case 'simple_pos':
this.resetMaterializeSelection([this.elements.tokenBuilderContent.querySelector('select')], queryElement.tokenValue);
default:
break;
}
if (queryElement.ignoreCase) {
this.elements.ignoreCaseCheckbox.checked = true;
}
if (queryElement.condition !== undefined) {
this.conditionHandler(queryElement.condition, true);
}
});
}
lockClosingChipElement(queryChipElement) {
queryChipElement.dataset.closingTag = 'false';
let lockIcon = queryChipElement.querySelector('[data-chip-action="lock"]');
lockIcon.textContent = 'lock';
//TODO: Write unlock-Function?
lockIcon.dataset.chipAction = 'unlock';
}
deleteChipElement(attr) {
let elementIndex = Array.from(this.elements.queryInputField.children).indexOf(attr);
switch (attr.dataset.type) {
case 'start-sentence':
this.deletingClosingTagHandler(elementIndex, 'end-sentence');
break;
case 'start-entity':
this.deletingClosingTagHandler(elementIndex, 'end-entity');
break;
case 'token':
let nextElement = Array.from(this.elements.queryInputField.children)[elementIndex+1];
if (nextElement !== undefined && nextElement.dataset.type === 'token-incidence-modifier') {
this.deleteChipElement(nextElement);
}
default:
break;
}
this.elements.queryInputField.removeChild(attr);
if (this.elements.queryInputField.children.length === 0) {
this.addPlaceholder();
}
this.updateChipList();
this.queryPreviewBuilder();
}
deletingClosingTagHandler(elementIndex, closingTagType) {
let closingTags = this.elements.queryInputField.querySelectorAll(`[data-type="${closingTagType}"]`);
for (let i = 0; i < closingTags.length; i++) {
let closingTag = closingTags[i];
if (Array.from(this.elements.queryInputField.children).indexOf(closingTag) > elementIndex) {
this.deleteChipElement(closingTag);
break;
}
}
}
handleDragStart(queryChipElement, event) {
// is called when a query chip is dragged. It creates a dropzone (in form of a chip) for the dragged chip and adds it to the query input field.
let queryChips = this.elements.queryInputField.querySelectorAll('.query-component');
if (queryChipElement.dataset.type === 'token-incidence-modifier') {
queryChips = this.elements.queryInputField.querySelectorAll('.query-component[data-type="token"]');
}
setTimeout(() => {
let targetChipElement = nopaque.Utils.HTMLToElement('<span class="chip drop-target">Drop here</span>');
for (let element of queryChips) {
if (element === this.elements.queryInputField.querySelectorAll('.query-component')[0]) {
let secondTargetChipClone = targetChipElement.cloneNode(true);
element.insertAdjacentElement('beforebegin', secondTargetChipClone);
this.addDragDropListeners(secondTargetChipClone, queryChipElement);
}
if (element === queryChipElement || element.nextSibling === queryChipElement) {continue;}
let targetChipClone = targetChipElement.cloneNode(true);
element.insertAdjacentElement('afterend', targetChipClone);
this.addDragDropListeners(targetChipClone, queryChipElement);
}
}, 0);
}
handleDragEnd(event) {
document.querySelectorAll('.drop-target').forEach(target => target.remove());
}
addDragDropListeners(targetChipClone, queryChipElement) {
targetChipClone.addEventListener('dragover', (event) => {
event.preventDefault();
});
targetChipClone.addEventListener('dragenter', (event) => {
event.preventDefault();
event.target.style.borderStyle = 'solid dotted';
});
targetChipClone.addEventListener('dragleave', (event) => {
event.preventDefault();
event.target.style.borderStyle = 'hidden';
});
targetChipClone.addEventListener('drop', (event) => {
let dropzone = event.target;
dropzone.parentElement.replaceChild(queryChipElement, dropzone);
this.updateChipList();
this.queryPreviewBuilder();
});
}
queryPreviewBuilder() {
// Builds the query preview in the form of pure CQL and displays it in the query preview field.
let queryPreview = document.querySelector('#corpus-analysis-concordance-query-preview');
let queryInputFieldContent = [];
this.elements.queryChipElements.forEach(element => {
let queryElement = element.dataset.query;
if (queryElement !== undefined) {
queryElement = nopaque.Utils.escape(queryElement);
}
queryInputFieldContent.push(queryElement);
});
let queryString = queryInputFieldContent.join(' ');
let replacements = {
' +': '+',
' *': '*',
' ?': '?',
' {': '{'
};
for (let key in replacements) {
queryString = queryString.replace(key, replacements[key]);
}
queryString += ';';
queryPreview.innerHTML = queryString;
queryPreview.parentNode.classList.toggle('hide', queryString === ';');
}
selectChipElement(attr) {
document.querySelectorAll('.chip.teal').forEach(element => {
if (element !== attr) {
element.classList.remove('teal', 'lighten-2');
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'add');
}
});
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'toggle');
attr.classList.toggle('teal');
attr.classList.toggle('lighten-5');
}
tokenIncidenceModifierHandler(incidenceModifier, incidenceModifierPretty) {
// Adds a token incidence modifier to the query input field.
let selectedChip = this.elements.queryInputField.querySelector('.chip.teal');
let selectedChipIndex = Array.from(this.elements.queryInputField.children).indexOf(selectedChip);
this.submitQueryChipElement('token-incidence-modifier', incidenceModifierPretty, incidenceModifier, selectedChipIndex+1);
this.selectChipElement(selectedChip);
}
tokenNMSubmitHandler(modalId) {
// Adds a token incidence modifier (exactly n or between n and m) to the query input field.
let modal = document.querySelector(`#${modalId}`);
let input_n = modal.querySelector('.n-m-input[data-value-type="n"]').value;
let input_m = modal.querySelector('.n-m-input[data-value-type="m"]') || undefined;
input_m = input_m !== undefined ? input_m.value : '';
let input = `{${input_n}${input_m !== '' ? ',' : ''}${input_m}}`;
let pretty_input = `between ${input_n} and ${input_m} (${input})`;
if (input_m === '') {
pretty_input = `exactly ${input_n} (${input})`;
}
let instance = M.Modal.getInstance(modal);
instance.close();
this.tokenIncidenceModifierHandler(input, pretty_input);
}
//#region Functions from other classes
//TODO: Move these functions back to their og classes and make it work.
toggleEditingAreaStructuralAttrModal(action) {
// If the user edits a query chip element, the corresponding editing area is displayed and the other areas are hidden or disabled.
this.toggleClass(['sentence-button', 'entity-button', 'text-annotation-button', 'any-type-entity-button'], 'disabled', action);
}
preparePositionalAttrModal() {
let selection = this.elements.positionalAttrSelection.value;
if (selection !== 'empty-token') {
let selectionTemplate = document.querySelector(`.token-builder-section[data-token-builder-section="${selection}"]`);
let selectionTemplateClone = selectionTemplate.content.cloneNode(true);
this.elements.tokenBuilderContent.innerHTML = '';
this.elements.tokenBuilderContent.appendChild(selectionTemplateClone);
if (this.elements.tokenBuilderContent.querySelector('select') !== null) {
let selectElement = this.elements.tokenBuilderContent.querySelector('select');
M.FormSelect.init(selectElement);
selectElement.addEventListener('change', () => {this.optionToggleHandler();});
} else {
this.elements.tokenBuilderContent.querySelector('input').addEventListener('input', () => {this.optionToggleHandler();});
}
}
this.optionToggleHandler();
if (selection === 'word' || selection === 'lemma') {
this.toggleClass(['input-field-options'], 'hide', 'remove');
} else if (selection === 'empty-token'){
this.addTokenToQuery();
} else {
this.toggleClass(['input-field-options'], 'hide', 'add');
}
}
tokenInputCheck(elem) {
return elem.querySelector('select') !== null ? elem.querySelector('select') : elem.querySelector('input');
}
optionToggleHandler() {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
if (input.value === '' && this.elements.editingModusOn === false) {
this.toggleClass(['incidence-modifiers', 'or', 'and'], 'disabled', 'add');
} else if (this.elements.positionalAttrSelection.querySelectorAll('option').length === 1) {
this.toggleClass(['and'], 'disabled', 'add');
this.toggleClass(['or'], 'disabled', 'remove');
} else {
this.toggleClass(['incidence-modifiers', 'or', 'and'], 'disabled', 'remove');
}
}
addTokenToQuery() {
let tokenQueryPrettyText = '';
let tokenQueryCQLText = '';
let input;
let kindOfToken = this.kindOfTokenCheck(this.elements.positionalAttrSelection.value);
// Takes all rows of the token query (if there is a query concatenation).
// Adds their contents to tokenQueryPrettyText and tokenQueryCQLText, which will later be expanded with the current input field.
let tokenQueryRows = this.elements.tokenQuery.querySelectorAll('.row');
tokenQueryRows.forEach(row => {
let ignoreCaseCheckbox = row.querySelector('input[type="checkbox"]');
let c = ignoreCaseCheckbox !== null && ignoreCaseCheckbox.checked ? ' %c' : '';
let tokenQueryRowInput = this.tokenInputCheck(row.querySelector('.token-query-template-content'));
let tokenQueryKindOfToken = this.kindOfTokenCheck(tokenQueryRowInput.closest('.input-field').dataset.kindOfToken);
let tokenConditionPrettyText = row.querySelector('[data-condition-pretty-text]').dataset.conditionPrettyText;
let tokenConditionCQLText = row.querySelector('[data-condition-cql-text]').dataset.conditionCqlText;
tokenQueryPrettyText += `${tokenQueryKindOfToken}=${tokenQueryRowInput.value}${c} ${tokenConditionPrettyText} `;
tokenQueryCQLText += `${tokenQueryKindOfToken}="${tokenQueryRowInput.value}"${c} ${tokenConditionCQLText}`;
});
if (kindOfToken === 'empty-token') {
tokenQueryPrettyText += 'empty token';
} else {
let c = this.elements.ignoreCaseCheckbox.checked ? ' %c' : '';
input = this.tokenInputCheck(this.elements.tokenBuilderContent);
tokenQueryPrettyText += `${kindOfToken}=${input.value}${c}`;
tokenQueryCQLText += `${kindOfToken}="${input.value}"${c}`;
}
// isTokenQueryInvalid looks if a valid value is passed. If the input fields/dropdowns are empty (isTokenQueryInvalid === true), no token is added.
if (this.elements.positionalAttrSelection.value !== 'empty-token' && input.value === '') {
this.disableTokenSubmit();
} else {
tokenQueryCQLText = `[${tokenQueryCQLText}]`;
this.submitQueryChipElement('token', tokenQueryPrettyText, tokenQueryCQLText, null, false, kindOfToken === 'empty-token' ? false : true);
this.elements.positionalAttrModal.close();
}
}
kindOfTokenCheck(kindOfToken) {
return kindOfToken === 'english-pos' || kindOfToken === 'german-pos' ? 'pos' : kindOfToken;
}
disableTokenSubmit() {
this.elements.tokenSubmitButton.classList.add('red');
this.elements.noValueMessage.classList.remove('hide');
setTimeout(() => {
this.elements.tokenSubmitButton.classList.remove('red');
}, 500);
setTimeout(() => {
this.elements.noValueMessage.classList.add('hide');
}, 3000);
}
//#endregion Functions from other classes
}

View File

@ -1,192 +0,0 @@
class ConcordanceQueryBuilder {
constructor() {
this.elements = new ElementReferencesQueryBuilder();
this.generalFunctions = new GeneralQueryBuilderFunctions(this.elements);
this.tokenAttributeBuilderFunctions = new TokenAttributeBuilderFunctions(this.elements);
this.structuralAttributeBuilderFunctions = new StructuralAttributeBuilderFunctions(this.elements);
this.incidenceModifierEventListeners();
this.nAndMInputSubmitEventListeners();
let queryBuilderDisplay = document.querySelector("#corpus-analysis-concordance-query-builder-display");
let expertModeDisplay = document.querySelector("#corpus-analysis-concordance-expert-mode-display");
let expertModeSwitch = document.querySelector("#corpus-analysis-concordance-expert-mode-switch");
expertModeSwitch.addEventListener("change", () => {
const isChecked = expertModeSwitch.checked;
if (isChecked) {
queryBuilderDisplay.classList.add("hide");
expertModeDisplay.classList.remove("hide");
this.switchToExpertModeParser();
} else {
queryBuilderDisplay.classList.remove("hide");
expertModeDisplay.classList.add("hide");
this.switchToQueryBuilderParser();
}
});
}
incidenceModifierEventListeners() {
// Eventlisteners for the incidence modifiers. There are two different types of incidence modifiers: token and character incidence modifiers.
document.querySelectorAll('.incidence-modifier-selection').forEach(button => {
let dropdownId = button.parentNode.parentNode.id;
if (dropdownId === 'corpus-analysis-concordance-token-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.generalFunctions.tokenIncidenceModifierHandler(button.dataset.token, button.innerHTML));
} else if (dropdownId === 'corpus-analysis-concordance-character-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.tokenAttributeBuilderFunctions.characterIncidenceModifierHandler(button));
}
});
}
nAndMInputSubmitEventListeners() {
// Eventlisteners for the submit of n- and m-values of the incidence modifier modal for "exactly n" or "between n and m".
document.querySelectorAll('.n-m-submit-button').forEach(button => {
let modalId = button.dataset.modalId;
if (modalId === 'corpus-analysis-concordance-exactly-n-token-modal' || modalId === 'corpus-analysis-concordance-between-nm-token-modal') {
button.addEventListener('click', () => this.generalFunctions.tokenNMSubmitHandler(modalId));
} else if (modalId === 'corpus-analysis-concordance-exactly-n-character-modal' || modalId === 'corpus-analysis-concordance-between-nm-character-modal') {
button.addEventListener('click', () => this.tokenAttributeBuilderFunctions.characterNMSubmitHandler(modalId));
}
});
}
switchToExpertModeParser() {
let expertModeInputField = document.querySelector('#corpus-analysis-concordance-form-query');
expertModeInputField.value = '';
let queryBuilderInputFieldValue = Utils.unescape(document.querySelector('#corpus-analysis-concordance-query-preview').innerHTML.trim());
if (queryBuilderInputFieldValue !== "" && queryBuilderInputFieldValue !== ";") {
expertModeInputField.value = queryBuilderInputFieldValue;
}
}
switchToQueryBuilderParser() {
this.generalFunctions.resetQueryInputField();
let expertModeInputFieldValue = document.querySelector('#corpus-analysis-concordance-form-query').value;
let chipElements = this.parseTextToChip(expertModeInputFieldValue);
let closingTagElements = ['end-sentence', 'end-entity'];
let editableElements = ['start-entity', 'text-annotation', 'token'];
for (let chipElement of chipElements) {
let isClosingTag = closingTagElements.includes(chipElement['type']);
let isEditable = editableElements.includes(chipElement['type']);
if (chipElement['query'] === '[]'){
isEditable = false;
}
this.generalFunctions.submitQueryChipElement(chipElement['type'], chipElement['pretty'], chipElement['query'], null, isClosingTag, isEditable);
}
}
parseTextToChip(query) {
const parsingElementDict = {
'<s>': {
pretty: 'Sentence Start',
type: 'start-sentence'
},
'<\/s>': {
pretty: 'Sentence End',
type: 'end-sentence'
},
'<ent>': {
pretty: 'Entity Start',
type: 'start-empty-entity'
},
'<ent_type="([A-Z]+)">': {
pretty: '',
type: 'start-entity'
},
'<\\\/ent(_type)?>': {
pretty: 'Entity End',
type: 'end-entity'
},
':: ?match\\.text_[A-Za-z]+="[^"]+"': {
pretty: '',
type: 'text-annotation'
},
'\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]': {
pretty: '',
type: 'token'
},
'\\[\\]': {
pretty: 'Empty Token',
type: 'token'
},
'(?<!\\[) ?\\+ ?(?![^\\]]\\])': {
pretty: ' one or more (+)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\* ?(?![^\\]]\\])': {
pretty: 'zero or more (*)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\? ?(?![^\\]]\\])': {
pretty: 'zero or one (?)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])': {
pretty: '',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\{[0-9]+(,[0-9]+)?} ?(?![^\\]]\\])': {
pretty: '',
type: 'token-incidence-modifier'
}
}
let chipElements = [];
let regexPattern = Object.keys(parsingElementDict).map(pattern => `(${pattern})`).join('|');
const regex = new RegExp(regexPattern, 'gi');
let match;
while ((match = regex.exec(query)) !== null) {
// this is necessary to avoid infinite loops with zero-width matches
if (match.index === regex.lastIndex) {
regex.lastIndex++;
}
let stringElement = match[0];
for (let [pattern, chipElement] of Object.entries(parsingElementDict)) {
const parsingRegex = new RegExp(pattern, 'gi');
if (parsingRegex.exec(stringElement)) {
// Creating the pretty text for the chip element
let prettyText;
switch (pattern) {
case '<ent_type="([A-Z]+)">':
prettyText = `Entity Type=${stringElement.replace(/<ent_type="|">/g, '')}`;
break;
case ':: ?match\\.text_[A-Za-z]+="[^"]+"':
prettyText = stringElement.replace(/:: ?match\.text_|"|"/g, '');
break;
case '\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]':
let doubleQuotes = /(word|lemma|pos|simple_pos)="[^"]+"/gi;
let singleQuotes = /(word|lemma|pos|simple_pos)='[^']+'/gi;
if (doubleQuotes.exec(stringElement)) {
prettyText = stringElement.replace(/^\[|\]$|"/g, '');
} else if (singleQuotes.exec(stringElement)) {
prettyText = stringElement.replace(/^\[|\]$|'/g, '');
}
prettyText = prettyText.replace(/\&/g, ' and ').replace(/\|/g, ' or ');
break;
case '(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])':
prettyText = `exactly ${stringElement.replace(/{|}/g, '')} (${stringElement})`;
break;
case '(?<!\\[) ?\\{[0-9]+(,[0-9]+)?} ?(?![^\\]]\\])':
prettyText = `between${stringElement.replace(/{|}/g, ' ').replace(',', ' and ')}(${stringElement})`;
break;
default:
prettyText = chipElement.pretty;
break;
}
chipElements.push({
type: chipElement.type,
pretty: prettyText,
query: stringElement
});
break;
}
}
}
return chipElements;
}
}

View File

@ -1,98 +0,0 @@
class StructuralAttributeBuilderFunctions extends GeneralQueryBuilderFunctions {
constructor(elements) {
super(elements);
this.structuralAttrModalEventlisteners();
document.querySelector('#corpus-analysis-concordance-text-annotation-submit').addEventListener('click', () => this.textAnnotationSubmitHandler());
this.elements.structuralAttrModal = M.Modal.init(
document.querySelector('#corpus-analysis-concordance-structural-attr-modal'),
{
onCloseStart: () => {
this.resetStructuralAttrModal();
}
}
);
}
structuralAttrModalEventlisteners() {
document.querySelectorAll('[data-structural-attr-modal-action-button]').forEach(button => {
button.addEventListener('click', () => {
this.actionButtonInStrucAttrModalHandler(button.dataset.structuralAttrModalActionButton);
});
});
document.querySelector('.ent-type-selection-action[data-ent-type="any"]').addEventListener('click', () => {
this.submitQueryChipElement('start-empty-entity', 'Entity Start', '<ent>');
this.submitQueryChipElement('end-entity', 'Entity End', '</ent>', null, true);
this.elements.structuralAttrModal.close();
});
document.querySelector('.ent-type-selection-action[data-ent-type="english"]').addEventListener('change', (event) => {
this.submitQueryChipElement('start-entity', `Entity Type=${event.target.value}`, `<ent_type="${event.target.value}">`, null, false, true);
if (!this.elements.editingModusOn) {
this.submitQueryChipElement('end-entity', 'Entity End', '</ent_type>', null, true);
}
this.elements.structuralAttrModal.close();
});
document.querySelector('.ent-type-selection-action[data-ent-type="german"]').addEventListener('change', (event) => {
this.submitQueryChipElement('start-entity', `Entity Type=${event.target.value}`, `<ent_type="${event.target.value}">`, null, false, true);
if (!this.elements.editingModusOn) {
this.submitQueryChipElement('end-entity', 'Entity End', '</ent_type>', null, true);
}
this.elements.structuralAttrModal.close();
});
}
resetStructuralAttrModal() {
this.resetMaterializeSelection([this.elements.englishEntTypeSelection, this.elements.germanEntTypeSelection]);
this.resetMaterializeSelection([this.elements.textAnnotationSelection], 'address');
this.elements.textAnnotationInput.value = '';
this.toggleClass(['entity-builder', 'text-annotation-builder'], 'hide', 'add');
this.toggleEditingAreaStructuralAttrModal('remove');
this.elements.editingModusOn = false;
this.elements.editedQueryChipElementIndex = undefined;
}
actionButtonInStrucAttrModalHandler(action) {
switch (action) {
case 'sentence':
this.submitQueryChipElement('start-sentence', 'Sentence Start', '<s>');
this.submitQueryChipElement('end-sentence', 'Sentence End', '</s>', null, true);
this.elements.structuralAttrModal.close();
break;
case 'entity':
this.toggleClass(['entity-builder'], 'hide', 'toggle');
this.toggleClass(['text-annotation-builder'], 'hide', 'add');
break;
case 'meta-data':
this.toggleClass(['text-annotation-builder'], 'hide', 'toggle');
this.toggleClass(['entity-builder'], 'hide', 'add');
break;
default:
break;
}
}
textAnnotationSubmitHandler() {
let noValueMetadataMessage = document.querySelector('#corpus-analysis-concordance-no-value-metadata-message');
let textAnnotationSubmit = document.querySelector('#corpus-analysis-concordance-text-annotation-submit');
let textAnnotationInput = document.querySelector('#corpus-analysis-concordance-text-annotation-input');
let textAnnotationOptions = document.querySelector('#corpus-analysis-concordance-text-annotation-options');
if (textAnnotationInput.value === '') {
textAnnotationSubmit.classList.add('red');
noValueMetadataMessage.classList.remove('hide');
setTimeout(() => {
textAnnotationSubmit.classList.remove('red');
}, 500);
setTimeout(() => {
noValueMetadataMessage.classList.add('hide');
}, 3000);
} else {
let queryText = `:: match.text_${textAnnotationOptions.value}="${textAnnotationInput.value}"`;
this.submitQueryChipElement('text-annotation', `${textAnnotationOptions.value}=${textAnnotationInput.value}`, queryText, null, false, true);
this.elements.structuralAttrModal.close();
}
}
}

View File

@ -1,182 +0,0 @@
class TokenAttributeBuilderFunctions extends GeneralQueryBuilderFunctions {
constructor(elements) {
super(elements);
this.elements.positionalAttrSelection.addEventListener('change', () => {
this.preparePositionalAttrModal();
});
// Options for positional attribute selection
document.querySelectorAll('.positional-attr-options-action-button[data-options-action]').forEach(button => {
button.addEventListener('click', () => {this.actionButtonInOptionSectionHandler(button.dataset.optionsAction);});
});
this.elements.tokenSubmitButton.addEventListener('click', () => {this.addTokenToQuery();});
this.elements.positionalAttrModal = M.Modal.init(
document.querySelector('#corpus-analysis-concordance-positional-attr-modal'),
{
onOpenStart: () => {
this.preparePositionalAttrModal();
},
onCloseStart: () => {
this.resetPositionalAttrModal();
}
}
);
}
resetPositionalAttrModal() {
let originalSelectionList =
`
<option value="word" selected>word</option>
<option value="lemma" >lemma</option>
<option value="english-pos">english pos</option>
<option value="german-pos">german pos</option>
<option value="simple_pos">simple_pos</option>
<option value="empty-token">empty token</option>
`;
this.elements.positionalAttrSelection.innerHTML = originalSelectionList;
this.elements.tokenQuery.innerHTML = '';
this.elements.tokenBuilderContent.innerHTML = '';
this.toggleClass(['input-field-options'], 'hide', 'remove');
this.toggleClass(['incidence-modifiers', 'or', 'and'], 'disabled', 'add');
this.resetMaterializeSelection([this.elements.positionalAttrSelection], "word");
this.elements.ignoreCaseCheckbox.checked = false;
this.elements.editingModusOn = false;
this.elements.editedQueryChipElementIndex = undefined;
}
actionButtonInOptionSectionHandler(elem) {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
switch (elem) {
case 'option-group':
input.value += '(option1|option2)';
let firstIndex = input.value.indexOf('option1');
let lastIndex = firstIndex + 'option1'.length;
input.focus();
input.setSelectionRange(firstIndex, lastIndex);
break;
case 'wildcard-char':
input.value += '.';
break;
case 'and':
this.conditionHandler('and');
break;
case 'or':
this.conditionHandler('or');
break;
default:
break;
}
this.optionToggleHandler();
}
characterIncidenceModifierHandler(elem) {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
input.value += elem.dataset.token;
}
characterNMSubmitHandler(modalId) {
let modal = document.querySelector(`#${modalId}`);
let input_n = modal.querySelector('.n-m-input[data-value-type="n"]').value;
let input_m = modal.querySelector('.n-m-input[data-value-type="m"]') || undefined;
input_m = input_m !== undefined ? ',' + input_m.value : '';
let input = `${input_n}${input_m}`;
let instance = M.Modal.getInstance(modal);
instance.close();
let tokenInput = this.tokenInputCheck(this.elements.tokenBuilderContent);
tokenInput.value += '{' + input + '}';
}
conditionHandler(conditionText) {
let tokenQueryTemplateClone = this.elements.tokenQueryTemplate.content.cloneNode(true);
tokenQueryTemplateClone.querySelector('.token-query-template-content').appendChild(this.elements.tokenBuilderContent.firstElementChild);
let notSelectedButton = tokenQueryTemplateClone.querySelector(`[data-condition-pretty-text]:not([data-condition-pretty-text="${conditionText}"])`);
let deleteButton = tokenQueryTemplateClone.querySelector(`[data-token-query-content-action="delete"]`);
deleteButton.addEventListener('click', (event) => {
this.deleteTokenQueryRow(event.target);
});
notSelectedButton.parentNode.removeChild(notSelectedButton);
this.elements.tokenQuery.appendChild(tokenQueryTemplateClone);
// Deleting the options which do not make sense in the context of the condition like "word" AND "word". Also sets selection default.
let selectionDefault = "word";
let optionDeleteList = ['empty-token'];
if (conditionText === 'and') {
switch (this.elements.positionalAttrSelection.value) {
case 'english-pos' || 'german-pos':
optionDeleteList.push('english-pos', 'german-pos');
break;
default:
optionDeleteList.push(this.elements.positionalAttrSelection.value);
break;
}
} else {
let originalSelectionList =
`
<option value="word" selected>word</option>
<option value="lemma" >lemma</option>
<option value="english-pos">english pos</option>
<option value="german-pos">german pos</option>
<option value="simple_pos">simple_pos</option>
`;
this.elements.positionalAttrSelection.innerHTML = originalSelectionList;
M.FormSelect.init(this.elements.positionalAttrSelection);
}
let lastTokenQueryRow = this.elements.tokenQuery.lastElementChild;
if(lastTokenQueryRow.querySelector('[data-kind-of-token="word"]') || lastTokenQueryRow.querySelector('[data-kind-of-token="lemma"]')) {
this.appendIgnoreCaseCheckbox(lastTokenQueryRow.querySelector('.token-query-template-content'), this.elements.ignoreCaseCheckbox.checked);
}
this.elements.ignoreCaseCheckbox.checked = false;
this.setTokenSelection(selectionDefault, optionDeleteList);
}
deleteTokenQueryRow(deleteButton) {
let deletedRow = deleteButton.closest('.row');
let condition = deletedRow.querySelector('[data-condition-pretty-text]').dataset.conditionPrettyText;
if (condition === 'and') {
let kindOfToken = deletedRow.querySelector('[data-kind-of-token]').dataset.kindOfToken;
switch (kindOfToken) {
case 'english-pos' || 'german-pos':
this.createOptionElementForPosAttrSelection('english-pos');
this.createOptionElementForPosAttrSelection('german-pos');
break;
default:
this.createOptionElementForPosAttrSelection(kindOfToken);
break;
}
M.FormSelect.init(this.elements.positionalAttrSelection);
}
deletedRow.remove();
}
createOptionElementForPosAttrSelection(kindOfToken) {
let option = document.createElement('option');
option.value = kindOfToken;
option.text = kindOfToken;
this.elements.positionalAttrSelection.appendChild(option);
}
appendIgnoreCaseCheckbox(parentElement, checked = false) {
let ignoreCaseCheckboxClone = document.querySelector('#ignore-case-checkbox-template').content.cloneNode(true);
parentElement.appendChild(ignoreCaseCheckboxClone);
M.Tooltip.init(parentElement.querySelectorAll('.tooltipped'));
if (checked) {
parentElement.querySelector('input[type="checkbox"]').checked = true;
}
}
setTokenSelection(selection, optionDeleteList) {
optionDeleteList.forEach(option => {
if (this.elements.positionalAttrSelection.querySelector(`option[value=${option}]`) !== null) {
this.elements.positionalAttrSelection.querySelector(`option[value=${option}]`).remove();
}
});
this.resetMaterializeSelection([this.elements.positionalAttrSelection], selection);
this.preparePositionalAttrModal();
}
}

View File

@ -168,6 +168,14 @@ nopaque.App = class App {
let manualModalTocElement = document.querySelector('#manual-modal-toc'); let manualModalTocElement = document.querySelector('#manual-modal-toc');
let manualModalToc = M.Tabs.getInstance(manualModalTocElement); let manualModalToc = M.Tabs.getInstance(manualModalTocElement);
manualModalToc.select(modalTriggerElement.dataset.manualModalChapter); manualModalToc.select(modalTriggerElement.dataset.manualModalChapter);
// TODO: Make this work.
// if ('manualModalChapterAnchor' in modalTriggerElement.dataset) {
// let manualModalChapterAnchor = document.querySelector(`#${modalTriggerElement.dataset.manualModalChapterAnchor}`);
// let xCoord = manualModalChapterAnchor.getBoundingClientRect().left;
// let yCoord = manualModalChapterAnchor.getBoundingClientRect().top;
// let modalContentElement = modalElement.querySelector('.modal-content');
// modalContentElement.scroll(xCoord, yCoord);
// }
} }
} }
} }

View File

@ -1,4 +1,4 @@
class CorpusAnalysisApp { nopaque.corpus_analysis.App = class App {
constructor(corpusId) { constructor(corpusId) {
this.corpusId = corpusId; this.corpusId = corpusId;

View File

@ -1,4 +1,4 @@
class CorpusAnalysisConcordance { nopaque.corpus_analysis.ConcordanceExtension = class ConcordanceExtension {
name = 'Concordance'; name = 'Concordance';
constructor(app) { constructor(app) {

View File

@ -1,17 +1,17 @@
class ElementReferencesQueryBuilder { nopaque.corpus_analysis.query_builder.ElementReferences = class ElementReferences {
constructor() { constructor() {
// General Elements // General Elements
this.queryInputField = document.querySelector('#corpus-analysis-concordance-query-builder-input-field'); this.queryInputField = document.querySelector('#corpus-analysis-concordance-query-builder-input-field');
this.queryChipElements = []; this.queryChipElements = [];
this.queryElementTarget = document.querySelector('.query-element-target')
this.editingModusOn = false; this.editingModusOn = false;
this.editedQueryChipElementIndex = undefined; this.editedQueryChipElementIndex = undefined;
this.deleteQueryButton = document.querySelector('#corpus-analysis-concordance-delete-query-button');
// Structural Attribute Builder Elements // Structural Attribute Builder Elements
this.structuralAttrModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-structural-attr-modal')); this.structuralAttrModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-structural-attr-modal'));
this.englishEntTypeSelection = document.querySelector('#corpus-analysis-concordance-english-ent-type-selection'); this.englishEntTypeSelection = document.querySelector('#corpus-analysis-concordance-english-ent-type-selection');
this.germanEntTypeSelection = document.querySelector('#corpus-analysis-concordance-german-ent-type-selection'); this.germanEntTypeSelection = document.querySelector('#corpus-analysis-concordance-german-ent-type-selection');
this.textAnnotationSelection = document.querySelector('#corpus-analysis-concordance-text-annotation-options');
this.textAnnotationInput = document.querySelector('#corpus-analysis-concordance-text-annotation-input');
// Token Attribute Builder Elements // Token Attribute Builder Elements
this.positionalAttrModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-positional-attr-modal')); this.positionalAttrModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-positional-attr-modal'));
@ -25,4 +25,4 @@ class ElementReferencesQueryBuilder {
this.ignoreCaseCheckbox = document.querySelector('#corpus-analysis-concordance-ignore-case-checkbox'); this.ignoreCaseCheckbox = document.querySelector('#corpus-analysis-concordance-ignore-case-checkbox');
} }
} };

View File

@ -0,0 +1 @@
nopaque.corpus_analysis.query_builder = {};

View File

@ -0,0 +1,500 @@
nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
constructor() {
this.elements = new nopaque.corpus_analysis.query_builder.ElementReferences();
this.addEventListenersToQueryElementTarget();
this.addEventListenersToIncidenceModifier();
this.addEventListenersToNAndMInputSubmit();
this.elements.deleteQueryButton.addEventListener('click', () => {this.resetQueryInputField()});
this.expertModeQueryBuilderSwitchHandler();
this.extensions = {
structuralAttributeBuilderFunctions: new nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions(this),
tokenAttributeBuilderFunctions: new nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions(this),
};
this.dropdown = M.Dropdown.init(
document.querySelector('.dropdown-trigger[data-toggle-area="token-incidence-modifiers"]'),
{
onCloseStart: () => {
this.unselectChipElement(this.elements.queryInputField.querySelector('.chip.teal'));
}
}
)
}
addEventListenersToQueryElementTarget() {
this.elements.queryElementTarget.addEventListener('click', () => {
this.elements.positionalAttrModal.open();
});
this.elements.queryElementTarget.addEventListener('dragstart', this.handleDragStart.bind(this, this.elements.queryElementTarget));
this.elements.queryElementTarget.addEventListener('dragend', this.handleDragEnd);
}
addEventListenersToIncidenceModifier() {
// Eventlisteners for the incidence modifiers. There are two different types of incidence modifiers: token and character incidence modifiers.
document.querySelectorAll('.incidence-modifier-selection').forEach(button => {
let dropdownId = button.parentNode.parentNode.id;
if (dropdownId === 'corpus-analysis-concordance-token-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.tokenIncidenceModifierHandler(button.dataset.token, button.innerHTML));
} else if (dropdownId === 'corpus-analysis-concordance-character-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.extensions.tokenAttributeBuilderFunctions.characterIncidenceModifierHandler(button));
}
});
}
addEventListenersToNAndMInputSubmit() {
// Eventlisteners for the submit of n- and m-values of the incidence modifier modal for "exactly n" or "between n and m".
document.querySelectorAll('.n-m-submit-button').forEach(button => {
let modalId = button.dataset.modalId;
if (modalId === 'corpus-analysis-concordance-exactly-n-token-modal' || modalId === 'corpus-analysis-concordance-between-nm-token-modal') {
button.addEventListener('click', () => this.tokenNMSubmitHandler(modalId));
} else if (modalId === 'corpus-analysis-concordance-exactly-n-character-modal' || modalId === 'corpus-analysis-concordance-between-nm-character-modal') {
button.addEventListener('click', () => this.extensions.tokenAttributeBuilderFunctions.characterNMSubmitHandler(modalId));
}
});
}
toggleClass(elements, className, action) {
elements.forEach(element => {
document.querySelector(`[data-toggle-area="${element}"]`).classList[action](className);
});
}
resetQueryInputField() {
this.elements.queryInputField.innerHTML = '';
this.addQueryElementTarget();
this.updateChipList();
this.queryPreviewBuilder();
}
addQueryElementTarget() {
let queryElementTarget = nopaque.Utils.HTMLToElement(
`
<a class="query-element-target btn-floating btn-small blue-grey lighten-4 waves-effect waves-light tooltipped" style="margin-bottom:10px; margin-right:5px;" draggable="true" data-position="bottom" data-tooltip="Add an Element to your query">
<i class="material-icons">add</i>
</a>
`
);
this.elements.queryInputField.appendChild(queryElementTarget);
this.elements.queryElementTarget = queryElementTarget;
this.addEventListenersToQueryElementTarget();
}
updateChipList() {
this.elements.queryChipElements = this.elements.queryInputField.querySelectorAll('.query-component');
}
resetMaterializeSelection(selectionElements, value = "default") {
selectionElements.forEach(selectionElement => {
if (selectionElement.querySelector(`option[value=${value}]`) !== null) {
selectionElement.querySelector(`option[value=${value}]`).selected = true;
}
let instance = M.FormSelect.getInstance(selectionElement);
instance.destroy();
M.FormSelect.init(selectionElement);
})
}
submitQueryChipElement(dataType=undefined, prettyQueryText=undefined, queryText=undefined, index=null, isClosingTag=false, isEditable=false) {
if (this.elements.editingModusOn) {
let editedQueryChipElement = this.elements.queryChipElements[this.elements.editedQueryChipElementIndex];
editedQueryChipElement.dataset.type = dataType;
editedQueryChipElement.dataset.query = queryText;
editedQueryChipElement.firstChild.textContent = prettyQueryText;
this.updateChipList();
this.queryPreviewBuilder();
} else {
this.queryChipFactory(dataType, prettyQueryText, queryText, index, isClosingTag, isEditable);
}
}
queryChipFactory(dataType, prettyQueryText, queryText, index=null, isClosingTag=false, isEditable=false) {
// Creates a new query chip element, adds Eventlisteners for selection, deletion and drag and drop and appends it to the query input field.
queryText = nopaque.Utils.escape(queryText);
prettyQueryText = nopaque.Utils.escape(prettyQueryText);
let queryChipElement = nopaque.Utils.HTMLToElement(
`
<span class="chip query-component" data-type="${dataType}" data-query="${queryText}" draggable="true"">
${prettyQueryText}${isEditable ? '<i class="material-icons chip-action-button" data-chip-action="edit" style="padding-left:5px; font-size:18px; cursor:pointer;">edit</i>': ''}
${isClosingTag ? '' : '<i class="material-icons close chip-action-button" data-chip-action="delete">close</i>'}
</span>
`
);
this.addActionListeners(queryChipElement);
queryChipElement.addEventListener('dragstart', this.handleDragStart.bind(this, queryChipElement));
queryChipElement.addEventListener('dragend', this.handleDragEnd);
// If an index is given, inserts the query chip after the given index (only relevant for Incidence Modifier) and if there is a closing tag, inserts the query chip before the closing tag.
if (index !== null) {
this.updateChipList();
this.elements.queryChipElements[index].after(queryChipElement);
} else {
this.elements.queryInputField.insertBefore(queryChipElement, this.elements.queryElementTarget);
}
if (isClosingTag) {
this.moveQueryElementTarget(queryChipElement);
}
this.updateChipList();
this.queryPreviewBuilder();
}
moveQueryElementTarget(element) {
this.elements.queryInputField.insertBefore(this.elements.queryElementTarget, element);
}
addActionListeners(queryChipElement) {
let notQuantifiableDataTypes = ['start-sentence', 'end-sentence', 'start-entity', 'start-empty-entity', 'end-entity', 'token-incidence-modifier'];
queryChipElement.addEventListener('click', (event) => {
if (event.target.classList.contains('chip')) {
if (!notQuantifiableDataTypes.includes(queryChipElement.dataset.type)) {
this.selectChipElement(queryChipElement);
}
}
});
let chipActionButtons = queryChipElement.querySelectorAll('.chip-action-button');
chipActionButtons.forEach(button => {
button.addEventListener('click', (event) => {
if (event.target.dataset.chipAction === 'delete') {
this.deleteChipElement(queryChipElement);
} else if (event.target.dataset.chipAction === 'edit') {
this.editChipElement(queryChipElement);
}
});
});
}
editChipElement(queryChipElement) {
this.elements.editingModusOn = true;
this.elements.editedQueryChipElementIndex = Array.from(this.elements.queryInputField.children).indexOf(queryChipElement);
switch (queryChipElement.dataset.type) {
case 'start-entity':
this.extensions.structuralAttributeBuilderFunctions.editStartEntityChipElement(queryChipElement);
break;
case 'token':
let queryElementsContent = this.extensions.tokenAttributeBuilderFunctions.prepareTokenQueryElementsContent(queryChipElement);
this.extensions.tokenAttributeBuilderFunctions.editTokenChipElement(queryElementsContent);
break;
default:
break;
}
}
deleteChipElement(attr) {
let elementIndex = Array.from(this.elements.queryInputField.children).indexOf(attr);
switch (attr.dataset.type) {
case 'start-sentence':
this.deleteClosingTagHandler(elementIndex, 'end-sentence');
break;
case 'start-empty-entity':
case 'start-entity':
this.deleteClosingTagHandler(elementIndex, 'end-entity');
break;
case 'token':
let nextElement = Array.from(this.elements.queryInputField.children)[elementIndex+1];
if (nextElement !== undefined && nextElement.dataset.type === 'token-incidence-modifier') {
this.deleteChipElement(nextElement);
}
default:
break;
}
this.elements.queryInputField.removeChild(attr);
this.updateChipList();
this.queryPreviewBuilder();
}
deleteClosingTagHandler(elementIndex, closingTagType) {
let closingTags = this.elements.queryInputField.querySelectorAll(`[data-type="${closingTagType}"]`);
for (let i = 0; i < closingTags.length; i++) {
let closingTag = closingTags[i];
if (Array.from(this.elements.queryInputField.children).indexOf(closingTag) > elementIndex) {
this.deleteChipElement(closingTag);
break;
}
}
}
handleDragStart(queryChipElement) {
// is called when a query chip is dragged. It creates a dropzone (in form of a chip) for the dragged chip and adds it to the query input field.
let queryChips = this.elements.queryInputField.querySelectorAll('.query-component');
if (queryChipElement.dataset.type === 'token-incidence-modifier') {
queryChips = this.elements.queryInputField.querySelectorAll('.query-component[data-type="token"]');
}
setTimeout(() => {
let targetChipElement = nopaque.Utils.HTMLToElement('<span class="chip drop-target">Drop here</span>');
for (let element of queryChips) {
if (element === this.elements.queryInputField.querySelectorAll('.query-component')[0]) {
let secondTargetChipClone = targetChipElement.cloneNode(true);
element.insertAdjacentElement('beforebegin', secondTargetChipClone);
this.addDragDropListeners(secondTargetChipClone, queryChipElement);
}
if (element === queryChipElement || element.nextSibling === queryChipElement) {continue;}
let targetChipClone = targetChipElement.cloneNode(true);
element.insertAdjacentElement('afterend', targetChipClone);
//TODO: Change to two different functions for drag and drop
this.addDragDropListeners(targetChipClone, queryChipElement);
}
}, 0);
}
handleDragEnd(event) {
// is called when a query chip is dropped. It removes the dropzones and initializes the tooltips if the dragged element is the query element target.
if (event.target.classList.contains('query-element-target')) {
M.Tooltip.init(event.target);
}
document.querySelectorAll('.drop-target').forEach(target => target.remove());
}
addDragDropListeners(targetChipClone, queryChipElement) {
targetChipClone.addEventListener('dragover', (event) => {
event.preventDefault();
});
targetChipClone.addEventListener('dragenter', (event) => {
event.preventDefault();
event.target.style.borderStyle = 'solid dotted';
});
targetChipClone.addEventListener('dragleave', (event) => {
event.preventDefault();
event.target.style.borderStyle = 'hidden';
});
targetChipClone.addEventListener('drop', (event) => {
let dropzone = event.target;
dropzone.parentElement.replaceChild(queryChipElement, dropzone);
this.updateChipList();
this.queryPreviewBuilder();
});
}
queryPreviewBuilder() {
// Builds the query preview in the form of pure CQL and displays it in the query preview field.
let queryPreview = document.querySelector('#corpus-analysis-concordance-query-preview');
let queryInputFieldContent = [];
this.elements.queryChipElements.forEach(element => {
let queryElement = element.dataset.query;
if (queryElement !== undefined) {
queryElement = nopaque.Utils.escape(queryElement);
}
queryInputFieldContent.push(queryElement);
});
let queryString = queryInputFieldContent.join(' ');
let replacements = {
' +': '+',
' *': '*',
' ?': '?',
' {': '{'
};
for (let key in replacements) {
queryString = queryString.replace(key, replacements[key]);
}
queryString += ';';
queryPreview.innerHTML = queryString;
queryPreview.parentNode.classList.toggle('hide', queryString === ';');
}
selectChipElement(attr) {
if (attr.classList.contains('teal')) {
return;
}
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'toggle');
attr.classList.toggle('teal');
attr.classList.toggle('lighten-5');
M.Dropdown.getInstance(document.querySelector('.dropdown-trigger[data-toggle-area="token-incidence-modifiers"]')).open();
}
unselectChipElement(attr) {
let nModalInstance = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-exactly-n-token-modal'));
let nmModalInstance = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-between-nm-token-modal'));
if (nModalInstance.isOpen || nmModalInstance.isOpen) {
return;
}
attr.classList.remove('teal', 'lighten-5');
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'add');
}
tokenIncidenceModifierHandler(incidenceModifier, incidenceModifierPretty, nOrNM = false) {
// Adds a token incidence modifier to the query input field.
let selectedChip = this.elements.queryInputField.querySelector('.chip.teal');
let selectedChipIndex = Array.from(this.elements.queryChipElements).indexOf(selectedChip);
if (nOrNM) {
this.unselectChipElement(selectedChip);
}
this.submitQueryChipElement('token-incidence-modifier', incidenceModifierPretty, incidenceModifier, selectedChipIndex);
}
tokenNMSubmitHandler(modalId) {
// Adds a token incidence modifier (exactly n or between n and m) to the query input field.
let modal = document.querySelector(`#${modalId}`);
let input_n = modal.querySelector('.n-m-input[data-value-type="n"]').value;
let input_m = modal.querySelector('.n-m-input[data-value-type="m"]') || undefined;
input_m = input_m !== undefined ? input_m.value : '';
let input = `{${input_n}${input_m !== '' ? ',' : ''}${input_m}}`;
let pretty_input = `between ${input_n} and ${input_m} (${input})`;
if (input_m === '') {
pretty_input = `exactly ${input_n} (${input})`;
}
let instance = M.Modal.getInstance(modal);
instance.close();
this.tokenIncidenceModifierHandler(input, pretty_input, true);
}
expertModeQueryBuilderSwitchHandler() {
let queryBuilderDisplay = document.querySelector("#corpus-analysis-concordance-query-builder-display");
let expertModeDisplay = document.querySelector("#corpus-analysis-concordance-expert-mode-display");
let expertModeSwitch = document.querySelector("#corpus-analysis-concordance-expert-mode-switch");
let submitModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-switch-to-query-builder-submit-modal'));
let confirmSwitchToQueryBuilderButton = document.querySelector('.switch-action[data-switch-action="confirm"]');
confirmSwitchToQueryBuilderButton.addEventListener("click", () => {
queryBuilderDisplay.classList.remove("hide");
expertModeDisplay.classList.add("hide");
this.switchToQueryBuilderParser();
});
expertModeSwitch.addEventListener("change", () => {
const isChecked = expertModeSwitch.checked;
if (isChecked) {
queryBuilderDisplay.classList.add("hide");
expertModeDisplay.classList.remove("hide");
this.switchToExpertModeParser();
} else {
submitModal.open();
}
});
}
switchToExpertModeParser() {
let expertModeInputField = document.querySelector('#corpus-analysis-concordance-form-query');
expertModeInputField.value = '';
let queryBuilderInputFieldValue = nopaque.Utils.unescape(document.querySelector('#corpus-analysis-concordance-query-preview').innerHTML.trim());
if (queryBuilderInputFieldValue !== "" && queryBuilderInputFieldValue !== ";") {
expertModeInputField.value = queryBuilderInputFieldValue;
}
}
switchToQueryBuilderParser() {
this.resetQueryInputField();
let expertModeInputFieldValue = document.querySelector('#corpus-analysis-concordance-form-query').value;
let chipElements = this.parseTextToChip(expertModeInputFieldValue);
let editableElements = ['start-entity', 'token'];
for (let chipElement of chipElements) {
let isEditable = editableElements.includes(chipElement['type']);
if (chipElement['query'] === '[]'){
isEditable = false;
}
this.submitQueryChipElement(chipElement['type'], chipElement['pretty'], chipElement['query'], null, false, isEditable);
}
}
parseTextToChip(query) {
const parsingElementDict = {
'<s>': {
pretty: 'Sentence Start',
type: 'start-sentence'
},
'<\/s>': {
pretty: 'Sentence End',
type: 'end-sentence'
},
'<ent>': {
pretty: 'Entity Start',
type: 'start-empty-entity'
},
'<ent_type="([A-Z]+)">': {
pretty: '',
type: 'start-entity'
},
'<\\\/ent(_type)?>': {
pretty: 'Entity End',
type: 'end-entity'
},
'\\[(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?)*\\]': {
pretty: '',
type: 'token'
},
'\\[\\]': {
pretty: 'Empty Token',
type: 'token'
},
'(?<!\\[) ?\\+ ?(?![^\\]]\\])': {
pretty: ' one or more (+)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\* ?(?![^\\]]\\])': {
pretty: 'zero or more (*)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\? ?(?![^\\]]\\])': {
pretty: 'zero or one (?)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])': {
pretty: '',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\{[0-9]+(,[0-9]+)?} ?(?![^\\]]\\])': {
pretty: '',
type: 'token-incidence-modifier'
}
}
let chipElements = [];
let regexPattern = Object.keys(parsingElementDict).map(pattern => `(${pattern})`).join('|');
const regex = new RegExp(regexPattern, 'gi');
let match;
while ((match = regex.exec(query)) !== null) {
// this is necessary to avoid infinite loops with zero-width matches
if (match.index === regex.lastIndex) {
regex.lastIndex++;
}
let stringElement = match[0];
for (let [pattern, chipElement] of Object.entries(parsingElementDict)) {
const parsingRegex = new RegExp(pattern, 'gi');
if (parsingRegex.exec(stringElement)) {
// Creating the pretty text for the chip element
let prettyText;
switch (pattern) {
case '<ent_type="([A-Z]+)">':
prettyText = `Entity Type=${stringElement.replace(/<ent_type="|">/g, '')}`;
break;
case ':: ?match\\.text_[A-Za-z]+="[^"]+"':
prettyText = stringElement.replace(/:: ?match\.text_|"|"/g, '');
break;
case '\\[(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?)*\\]':
prettyText = stringElement.replace(/^\[|\]$|(?<!\\)"/g, '');
prettyText = prettyText.replace(/\&/g, ' and ').replace(/\|/g, ' or ');
break;
case '(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])':
prettyText = `exactly ${stringElement.replace(/{|}/g, '')} (${stringElement})`;
break;
case '(?<!\\[) ?\\{[0-9]+(,[0-9]+)?} ?(?![^\\]]\\])':
prettyText = `between${stringElement.replace(/{|}/g, ' ').replace(',', ' and ')}(${stringElement})`;
break;
default:
prettyText = chipElement.pretty;
break;
}
chipElements.push({
type: chipElement.type,
pretty: prettyText,
query: stringElement
});
break;
}
}
}
return chipElements;
}
};

View File

@ -0,0 +1,82 @@
nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions = class StructuralAttributeBuilderFunctions {
constructor(app) {
this.app = app;
this.elements = app.elements;
this.structuralAttrModalEventlisteners();
this.elements.structuralAttrModal = M.Modal.init(
document.querySelector('#corpus-analysis-concordance-structural-attr-modal'),
{
onCloseStart: () => {
this.resetStructuralAttrModal();
}
}
);
}
structuralAttrModalEventlisteners() {
document.querySelectorAll('[data-structural-attr-modal-action-button]').forEach(button => {
button.addEventListener('click', () => {
this.actionButtonInStrucAttrModalHandler(button.dataset.structuralAttrModalActionButton);
});
});
document.querySelector('.ent-type-selection-action[data-ent-type="any"]').addEventListener('click', () => {
this.app.submitQueryChipElement('start-empty-entity', 'Entity Start', '<ent>');
this.app.submitQueryChipElement('end-entity', 'Entity End', '</ent>', null, true);
this.elements.structuralAttrModal.close();
});
document.querySelector('.ent-type-selection-action[data-ent-type="english"]').addEventListener('change', (event) => {
this.app.submitQueryChipElement('start-entity', `Entity Type=${event.target.value}`, `<ent_type="${event.target.value}">`, null, false, true);
if (!this.elements.editingModusOn) {
this.app.submitQueryChipElement('end-entity', 'Entity End', '</ent_type>', null, true);
}
this.elements.structuralAttrModal.close();
});
document.querySelector('.ent-type-selection-action[data-ent-type="german"]').addEventListener('change', (event) => {
this.app.submitQueryChipElement('start-entity', `Entity Type=${event.target.value}`, `<ent_type="${event.target.value}">`, null, false, true);
if (!this.elements.editingModusOn) {
this.app.submitQueryChipElement('end-entity', 'Entity End', '</ent_type>', null, true);
}
this.elements.structuralAttrModal.close();
});
}
resetStructuralAttrModal() {
this.app.resetMaterializeSelection([this.elements.englishEntTypeSelection, this.elements.germanEntTypeSelection]);
this.app.toggleClass(['entity-builder'], 'hide', 'add');
this.toggleEditingAreaStructuralAttrModal('remove');
this.elements.editingModusOn = false;
this.elements.editedQueryChipElementIndex = undefined;
}
actionButtonInStrucAttrModalHandler(action) {
switch (action) {
case 'sentence':
this.app.submitQueryChipElement('start-sentence', 'Sentence Start', '<s>');
this.app.submitQueryChipElement('end-sentence', 'Sentence End', '</s>', null, true);
this.elements.structuralAttrModal.close();
break;
case 'entity':
this.app.toggleClass(['entity-builder'], 'hide', 'toggle');
break;
default:
break;
}
}
toggleEditingAreaStructuralAttrModal(action) {
// If the user edits a query chip element, the corresponding editing area is displayed and the other areas are hidden or disabled.
this.app.toggleClass(['sentence-button', 'entity-button', 'any-type-entity-button'], 'disabled', action);
}
editStartEntityChipElement(queryChipElement) {
this.elements.structuralAttrModal.open();
this.app.toggleClass(['entity-builder'], 'hide', 'remove');
this.toggleEditingAreaStructuralAttrModal('add');
let entType = queryChipElement.dataset.query.replace(/<ent_type="|">/g, '');
let isEnglishEntType = this.elements.englishEntTypeSelection.querySelector(`option[value=${entType}]`) !== null;
let selection = isEnglishEntType ? this.elements.englishEntTypeSelection : this.elements.germanEntTypeSelection;
this.app.resetMaterializeSelection([selection], entType);
}
}

View File

@ -0,0 +1,329 @@
nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class TokenAttributeBuilderFunctions {
constructor(app) {
this.app = app;
this.elements = app.elements;
this.elements.positionalAttrSelection.addEventListener('change', () => {
this.preparePositionalAttrModal();
});
// Options for positional attribute selection
document.querySelectorAll('.positional-attr-options-action-button[data-options-action]').forEach(button => {
button.addEventListener('click', () => {this.actionButtonInOptionSectionHandler(button.dataset.optionsAction);});
});
this.elements.tokenSubmitButton.addEventListener('click', () => {this.addTokenToQuery();});
this.elements.positionalAttrModal = M.Modal.init(
document.querySelector('#corpus-analysis-concordance-positional-attr-modal'),
{
onOpenStart: () => {
this.preparePositionalAttrModal();
},
onCloseStart: () => {
this.resetPositionalAttrModal();
}
}
);
}
resetPositionalAttrModal() {
let originalSelectionList =
`
<option value="word" selected>word</option>
<option value="lemma" >lemma</option>
<option value="english-pos">english pos</option>
<option value="german-pos">german pos</option>
<option value="simple_pos">simple_pos</option>
<option value="empty-token">empty token</option>
`;
this.elements.positionalAttrSelection.innerHTML = originalSelectionList;
this.elements.tokenQuery.innerHTML = '';
this.elements.tokenBuilderContent.innerHTML = '';
this.app.toggleClass(['input-field-options'], 'hide', 'remove');
this.app.toggleClass(['incidence-modifiers', 'or', 'and'], 'disabled', 'add');
this.app.resetMaterializeSelection([this.elements.positionalAttrSelection], "word");
this.elements.ignoreCaseCheckbox.checked = false;
this.elements.editingModusOn = false;
this.elements.editedQueryChipElementIndex = undefined;
}
actionButtonInOptionSectionHandler(elem) {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
switch (elem) {
case 'option-group':
this.cursorPositionInputfieldHandler(input, '(option1|option2)');
let firstIndex = input.value.indexOf('option1');
let lastIndex = firstIndex + 'option1'.length;
input.setSelectionRange(firstIndex, lastIndex);
break;
case 'wildcard-char':
this.cursorPositionInputfieldHandler(input, '.');
input.focus();
break;
case 'and':
this.conditionHandler('and');
break;
case 'or':
this.conditionHandler('or');
break;
default:
break;
}
this.optionToggleHandler();
}
cursorPositionInputfieldHandler(input, addedInput) {
let cursorPosition = input.selectionStart;
let textBeforeCursor = input.value.substring(0, cursorPosition);
let textAfterCursor = input.value.substring(cursorPosition);
let newInputValue = textBeforeCursor + addedInput + textAfterCursor;
input.value = newInputValue;
let newCursorPosition = cursorPosition + addedInput.length;
input.setSelectionRange(newCursorPosition, newCursorPosition);
}
characterIncidenceModifierHandler(elem) {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
this.cursorPositionInputfieldHandler(input, elem.dataset.token);
}
characterNMSubmitHandler(modalId) {
let modal = document.querySelector(`#${modalId}`);
let input_n = modal.querySelector('.n-m-input[data-value-type="n"]').value;
let input_m = modal.querySelector('.n-m-input[data-value-type="m"]') || undefined;
input_m = input_m !== undefined ? ',' + input_m.value : '';
let addedInput = `${input_n}${input_m}`;
let instance = M.Modal.getInstance(modal);
instance.close();
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
this.cursorPositionInputfieldHandler(input, `{${addedInput}}`);
}
conditionHandler(conditionText) {
let tokenQueryTemplateClone = this.elements.tokenQueryTemplate.content.cloneNode(true);
tokenQueryTemplateClone.querySelector('.token-query-template-content').appendChild(this.elements.tokenBuilderContent.firstElementChild);
let notSelectedButton = tokenQueryTemplateClone.querySelector(`[data-condition-pretty-text]:not([data-condition-pretty-text="${conditionText}"])`);
let deleteButton = tokenQueryTemplateClone.querySelector(`[data-token-query-content-action="delete"]`);
deleteButton.addEventListener('click', (event) => {
this.deleteTokenQueryRow(event.target);
});
notSelectedButton.parentNode.removeChild(notSelectedButton);
this.elements.tokenQuery.appendChild(tokenQueryTemplateClone);
let lastTokenQueryRow = this.elements.tokenQuery.lastElementChild;
if(lastTokenQueryRow.querySelector('[data-kind-of-token="word"]') || lastTokenQueryRow.querySelector('[data-kind-of-token="lemma"]')) {
this.appendIgnoreCaseCheckbox(lastTokenQueryRow.querySelector('.token-query-template-content'), this.elements.ignoreCaseCheckbox.checked);
}
this.elements.ignoreCaseCheckbox.checked = false;
this.setTokenSelection();
}
deleteTokenQueryRow(deleteButton) {
let deletedRow = deleteButton.closest('.row');
let condition = deletedRow.querySelector('[data-condition-pretty-text]').dataset.conditionPrettyText;
if (condition === 'and') {
let kindOfToken = deletedRow.querySelector('[data-kind-of-token]').dataset.kindOfToken;
switch (kindOfToken) {
case 'english-pos' || 'german-pos':
this.createOptionElementForPosAttrSelection('english-pos');
this.createOptionElementForPosAttrSelection('german-pos');
break;
default:
this.createOptionElementForPosAttrSelection(kindOfToken);
break;
}
M.FormSelect.init(this.elements.positionalAttrSelection);
}
deletedRow.remove();
}
createOptionElementForPosAttrSelection(kindOfToken) {
let option = document.createElement('option');
option.value = kindOfToken;
option.text = kindOfToken;
this.elements.positionalAttrSelection.appendChild(option);
}
appendIgnoreCaseCheckbox(parentElement, checked=false) {
let ignoreCaseCheckboxClone = document.querySelector('#ignore-case-checkbox-template').content.cloneNode(true);
parentElement.appendChild(ignoreCaseCheckboxClone);
M.Tooltip.init(parentElement.querySelectorAll('.tooltipped'));
if (checked) {
parentElement.querySelector('input[type="checkbox"]').checked = true;
}
}
setTokenSelection(selection="word", optionDeleteList=['empty-token']) {
optionDeleteList.forEach(option => {
if (this.elements.positionalAttrSelection.querySelector(`option[value=${option}]`) !== null) {
this.elements.positionalAttrSelection.querySelector(`option[value=${option}]`).remove();
}
});
this.app.resetMaterializeSelection([this.elements.positionalAttrSelection], selection);
this.preparePositionalAttrModal();
}
preparePositionalAttrModal() {
let selection = this.elements.positionalAttrSelection.value;
if (selection !== 'empty-token') {
let selectionTemplate = document.querySelector(`.token-builder-section[data-token-builder-section="${selection}"]`);
let selectionTemplateClone = selectionTemplate.content.cloneNode(true);
this.elements.tokenBuilderContent.innerHTML = '';
this.elements.tokenBuilderContent.appendChild(selectionTemplateClone);
if (this.elements.tokenBuilderContent.querySelector('select') !== null) {
let selectElement = this.elements.tokenBuilderContent.querySelector('select');
M.FormSelect.init(selectElement);
selectElement.addEventListener('change', () => {this.optionToggleHandler();});
} else {
this.elements.tokenBuilderContent.querySelector('input').addEventListener('input', () => {this.optionToggleHandler();});
}
}
this.optionToggleHandler();
if (selection === 'word' || selection === 'lemma') {
this.app.toggleClass(['input-field-options'], 'hide', 'remove');
} else if (selection === 'empty-token'){
this.addTokenToQuery();
} else {
this.app.toggleClass(['input-field-options'], 'hide', 'add');
}
}
tokenInputCheck(elem) {
return elem.querySelector('select') !== null ? elem.querySelector('select') : elem.querySelector('input');
}
optionToggleHandler() {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
if (input.value === '' && this.elements.editingModusOn === false) {
this.app.toggleClass(['incidence-modifiers', 'or', 'and'], 'disabled', 'add');
} else if (this.elements.positionalAttrSelection.querySelectorAll('option').length === 1) {
this.app.toggleClass(['and'], 'disabled', 'add');
this.app.toggleClass(['or'], 'disabled', 'remove');
} else {
this.app.toggleClass(['incidence-modifiers', 'or', 'and'], 'disabled', 'remove');
}
}
addTokenToQuery() {
let tokenQueryPrettyText = '';
let tokenQueryCQLText = '';
let input;
let kindOfToken = this.kindOfTokenCheck(this.elements.positionalAttrSelection.value);
// Takes all rows of the token query (if there is a query concatenation).
// Adds their contents to tokenQueryPrettyText and tokenQueryCQLText, which will later be expanded with the current input field.
let tokenQueryRows = this.elements.tokenQuery.querySelectorAll('.row');
tokenQueryRows.forEach(row => {
let ignoreCaseCheckbox = row.querySelector('input[type="checkbox"]');
let c = ignoreCaseCheckbox !== null && ignoreCaseCheckbox.checked ? ' %c' : '';
let tokenQueryRowInput = this.tokenInputCheck(row.querySelector('.token-query-template-content'));
let tokenQueryKindOfToken = this.kindOfTokenCheck(tokenQueryRowInput.closest('.input-field').dataset.kindOfToken);
let tokenConditionPrettyText = row.querySelector('[data-condition-pretty-text]').dataset.conditionPrettyText;
let tokenConditionCQLText = row.querySelector('[data-condition-cql-text]').dataset.conditionCqlText;
tokenQueryPrettyText += `${tokenQueryKindOfToken}=${tokenQueryRowInput.value}${c} ${tokenConditionPrettyText} `;
tokenQueryCQLText += `${tokenQueryKindOfToken}="${tokenQueryRowInput.value}"${c} ${tokenConditionCQLText}`;
});
if (kindOfToken === 'empty-token') {
tokenQueryPrettyText += 'empty token';
} else {
let c = this.elements.ignoreCaseCheckbox.checked ? ' %c' : '';
input = this.tokenInputCheck(this.elements.tokenBuilderContent);
tokenQueryPrettyText += `${kindOfToken}=${input.value}${c}`;
tokenQueryCQLText += `${kindOfToken}="${input.value}"${c}`;
}
// isTokenQueryInvalid looks if a valid value is passed. If the input fields/dropdowns are empty (isTokenQueryInvalid === true), no token is added.
if (this.elements.positionalAttrSelection.value !== 'empty-token' && input.value === '') {
this.disableTokenSubmit();
} else {
tokenQueryCQLText = `[${tokenQueryCQLText}]`;
this.app.submitQueryChipElement('token', tokenQueryPrettyText, tokenQueryCQLText, null, false, kindOfToken === 'empty-token' ? false : true);
this.elements.positionalAttrModal.close();
}
}
kindOfTokenCheck(kindOfToken) {
return kindOfToken === 'english-pos' || kindOfToken === 'german-pos' ? 'pos' : kindOfToken;
}
disableTokenSubmit() {
this.elements.tokenSubmitButton.classList.add('red');
this.elements.noValueMessage.classList.remove('hide');
setTimeout(() => {
this.elements.tokenSubmitButton.classList.remove('red');
}, 500);
setTimeout(() => {
this.elements.noValueMessage.classList.add('hide');
}, 3000);
}
editTokenChipElement(queryElementsContent) {
this.elements.positionalAttrModal.open();
queryElementsContent.forEach((queryElement) => {
this.app.resetMaterializeSelection([this.elements.positionalAttrSelection], queryElement.tokenAttr);
this.preparePositionalAttrModal();
switch (queryElement.tokenAttr) {
case 'word':
case 'lemma':
this.elements.tokenBuilderContent.querySelector('input').value = queryElement.tokenValue;
break;
case 'english-pos':
// English-pos is selected by default. Then it is checked whether the passed token value occurs in the english-pos selection. If not, the selection is reseted and changed to german-pos.
let selection = this.elements.tokenBuilderContent.querySelector('select');
queryElement.tokenAttr = selection.querySelector(`option[value=${queryElement.tokenValue}]`) ? 'english-pos' : 'german-pos';
this.app.resetMaterializeSelection([this.elements.positionalAttrSelection], queryElement.tokenAttr);
this.preparePositionalAttrModal();
this.app.resetMaterializeSelection([this.elements.tokenBuilderContent.querySelector('select')], queryElement.tokenValue);
break;
case 'simple_pos':
this.app.resetMaterializeSelection([this.elements.tokenBuilderContent.querySelector('select')], queryElement.tokenValue);
default:
break;
}
if (queryElement.ignoreCase) {
this.elements.ignoreCaseCheckbox.checked = true;
}
if (queryElement.condition !== undefined) {
this.conditionHandler(queryElement.condition, true);
}
});
}
prepareTokenQueryElementsContent(queryChipElement) {
//this regex searches for word or lemma or pos or simple_pos="any string (also quotation marks escaped by backslash) within double quotes" followed by one or no ignore case markers, followed by one or no condition characters.
let regex = new RegExp('(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?(\\&|\\|)?', 'gm');
let m;
let queryElementsContent = [];
while ((m = regex.exec(queryChipElement.dataset.query)) !== null) {
// this is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
let tokenAttr = m[1];
// Passes english-pos by default so that the template is added. In editTokenChipElement it is then checked whether it is english-pos or german-pos.
if (tokenAttr === 'pos') {
tokenAttr = 'english-pos';
}
let tokenValue = m[2].replace(/(?<!\\)"/g, '');
let ignoreCase = false;
let condition = undefined;
m.forEach((match) => {
if (match === "%c") {
ignoreCase = true;
} else if (match === "&") {
condition = "and";
} else if (match === "|") {
condition = "or";
}
});
queryElementsContent.push({tokenAttr: tokenAttr, tokenValue: tokenValue, ignoreCase: ignoreCase, condition: condition});
}
return queryElementsContent;
}
}

View File

@ -1,4 +1,4 @@
class CorpusAnalysisReader { nopaque.corpus_analysis.ReaderExtension = class ReaderExtension {
name = 'Reader'; name = 'Reader';
constructor(app) { constructor(app) {

View File

@ -1,4 +1,4 @@
class CorpusAnalysisStaticVisualization { nopaque.corpus_analysis.StaticVisualizationExtension = class StaticVisualizationExtension {
name = 'Static Visualization (beta)'; name = 'Static Visualization (beta)';
constructor(app) { constructor(app) {

View File

@ -0,0 +1,34 @@
<h3 class="manual-chapter-title">Introduction</h3>
<h4>Introduction</h4>
<p>
Nopaque is a web application that offers different services and tools to support
researchers working with image and text-based data. These services are logically
connected and build upon each other. They include:
</p>
<ol style="list-style-type:disc; margin-left:2em; padding-bottom:0;">
<li><b>File setup</b>, which converts and merges different data (e.g., books, letters)
for further processing.</li>
<li><b>Image-to-text conversion tools:</b></li>
<ol style="list-style-type:circle; margin-left:1em; padding-bottom:0;"><li><b>Optical Character Recognition</b> converts photos and
scans into text data, making them machine-readable.</li>
<li><b>Transkribus HTR (Handwritten Text Recognition) Pipeline</b> (currently deactivated)*
also converts images into text data, making them machine-readable.</li>
</ol>
<li><b>Natural Language Processing</b> extracts information from your text via
computational linguistic data processing (tokenization, lemmatization, part-of-speech
tagging and named-entity recognition.</li>
<li><b>Corpus analysis</b> makes use of CQP Query Language to search through text
corpora with the aid of metadata and Natural Language Processing tags.</li>
</ol>
Nopaque also features a <b>Social Area</b>, where researchers can create a personal profile, connect with other users and share corpora if desired.
These services can be accessed from the sidebar in nopaque.
All processes are implemented in a specially provided cloud environment with established open-source software.
This always ensures that no personal data of the users is disclosed.
<p>
*Note: the Transkribus HTR Pipeline is currently
deactivated; we are working on an alternative solution. You can try using Tesseract OCR,
though the results will likely be poor.
</p>

View File

@ -0,0 +1,104 @@
<h3 class="manual-chapter-title">Getting Started</h3>
<h4>Getting Started</h4>
<p>
In this section, we will take you through all the steps you need to start analyzing your data with nopaque.
</p>
<div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;">
<h5>Content</h5>
<ol style="list-style-type:disc">
<li><a href="#registration-and-login">Registration and login</a></li>
<li><a href="#preparing-files">Preparing files for analysis</a></li>
<li><a href="#converting-a-pdf-into-text">Converting a PDF into text data</a></li>
<li><a href="#extracting-linguistic-data">Extracting linguistic data from text</a></li>
<li><a href="#creating-a-corpus">Creating a corpus</a></li>
<li><a href="#analyzing-a-corpus">Analyzing a corpus</a></li>
</ol>
</div>
<p></p>
<h5 id="registration-and-login">Registration and login</h5>
<p>Before you can begin using nopaque, you will need to create a personal user account.
Open the menu (three dots) at the top right of the screen and choose “Register.” Enter
the required details listed on the registration page (username, password, email address).
After verifying your account via the link sent to your email, you can log in.</p>
<h5 id="preparing-files">Preparing files for analysis</h5>
<p>A few steps need to be taken before images, scans, or other text data are ready for
analysis in nopaque. The SpaCy NLP Pipeline service can only extract linguistic data
from texts in plain text (.txt) format. If your text is already in this format, you
can skip the next steps and go directly to <b>Extracting linguistic data from text</b>.
Otherwise, the next steps assume that you are starting off with image data.</p>
<p>
First, all data needs to be converted into PDF format. Using the <b>File Setup</b> service,
you can bundle images together even of different formats and convert them all into
one PDF file. Note that the File Setup service will sort the images based on their file
name in ascending order. It is thus recommended to name them accordingly, for example:
page-01.png, page-02.jpg, page-03.tiff.
</p>
<p>
Add a title and description to your job and select the File Setup version* you want to use.
After uploading the images and completing the File Setup job, the list of files added
can be seen under “Inputs.” Further below, under “Results,” you can find and download
the PDF output.</p>
<h5 id="converting-a-pdf-into-text">Converting a PDF into text data</h5>
<p>Select an image-to-text conversion tool depending on whether your PDF is primarily
composed of handwritten text or printed text. For printed text, select the <b>Tesseract OCR
Pipeline</b>. For handwritten text, select the <b>Transkribus HTR Pipeline</b>. Select the desired
language model or upload your own. Select the version* of Tesseract OCR you want to use
and click on submit to start the conversion. When the job is finished, various output
files can be seen and downloaded further below, under “Results.” You may want to review
the text output for errors and coherence. (Note: the Transkribus HTR Pipeline is currently
deactivated; we are working on an alternative solution. You can try using Tesseract OCR,
though the results will likely be poor.)
</p>
<h5 id="extracting-linguistic-data">Extracting linguistic data from text</h5>
<p>The <b>SpaCy NLP Pipeline</b> service extracts linguistic information from plain text files
(in .txt format). Select the corresponding .txt file, the language model, and the
version* you want to use. When the job is finished, find and download the files in
<b>.json</b> and <b>.vrt</b> format under “Results.”</p>
<h5 id="creating-a-corpus">Creating a corpus</h5>
<p>Now, using the files in .vrt format, you can create a corpus. This can be done
in the <a href="{{ url_for('main.dashboard') }}">Dashboard</a> or
<a href="{{ url_for('services.corpus_analysis') }}">Corpus Analysis</a> sections under “My Corpora.” Click on “Create corpus”
and add a title and description for your corpus. After submitting, you will automatically
be taken to the corpus overview page (which can be called up again via the corpus lists)
of your new, still empty corpus. </p>
<p>
Further down in the “Corpus files” section, you can add texts in .vrt format
(results of the NLP service) to your new corpus. To do this, use the "Add Corpus File"
button and fill in the form that appears. Here, you can add
metadata to each text. After adding all texts to the corpus, it must
be prepared for analysis. This process can be initiated by clicking on the
"Build" button under "Actions".
On the corpus overview page, you can see information about the current status of
the corpus in the upper right corner. After the build process, the status "built" should be shown here.
Now, your corpus is ready for analysis.</p>
<h5 id="analyzing-a-corpus">Analyzing a corpus</h5>
<p>Navigate to the corpus you would like to analyze and click on the Analyze button.
This will take you to an analysis overview page for your corpus. Here, you can find a
visualization of general linguistic information of your corpus, including tokens,
sentences, unique words, unique lemmas, unique parts of speech and unique simple parts
of speech. You will also find a pie chart of the proportional textual makeup of your
corpus and can view the linguistic information for each individual text file. A more
detailed visualization of token frequencies with a search option is also on this page.</p>
<p>From the corpus analysis overview page, you can navigate to other analysis modules:
the <b>Query Builder</b> (under <b>Concordance</b>) and the <b>Reader</b>. With the Reader, you can read
your corpus texts tokenized with the associated linguistic information. The tokens can
be shown as lemmas, parts of speech, words, and can be displayed in different ways:
visually as plain text with the option of highlighted entities or as chips.</p>
<p>The <b>Concordance</b> module allows for more specific, query-oriented text analyses.
Here, you can filter out text parameters and structural attributes in different
combinations. This is explained in more detail in the Query Builder section of the
manual.</p>
<br>
<br>
*For all services, it is recommended to use the latest version unless you need a model
only available in an earlier version or are looking to reproduce data that was originally generated
using an older version.

View File

@ -1,15 +1,22 @@
<h3 class="manual-chapter-title">Dashboard</h3> <h3 class="manual-chapter-title">Dashboard</h3>
<h4>About the dashboard</h4>
<br>
<div class="row"> <div class="row">
<div class="col s12 m4"> <div class="col s12 m4">
<img alt="Dashboard" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/dashboard.png') }}"> <img alt="Dashboard" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/dashboard.png') }}">
</div> </div>
<div class="col s12 m8"> <div class="col s12 m8">
<p> <p>
The <a href="{{ url_for('main.dashboard') }}">dashboard</a> provides a central overview of all resources assigned to the The <a href="{{ url_for('main.dashboard') }}">dashboard</a> provides a central
user. These are <a href="{{ url_for('main.dashboard', _anchor='corpora') }}">corpora</a> and created <a href="{{ url_for('main.dashboard', _anchor='jobs') }}">jobs</a>. Corpora are freely composable overview of all user-specific resources.
annotated text collections and jobs are the initiated file processing These are <a href="{{ url_for('main.dashboard', _anchor='corpora') }}">corpora</a>,
procedures. Both the job and the corpus listings can be searched using created <a href="{{ url_for('main.dashboard', _anchor='jobs') }}">jobs</a>, and
the search field displayed above them. model <a href="{{ url_for('main.dashboard', _anchor='contributions') }}"">contributions</a>.
A <b>corpus</b> is a freely composable annotated text collection.
A <b>job</b> is an initiated file processing procedure.
A <b>model</b> is a mathematical system for pattern recognition based on data examples that have been processed by AI. One can search for jobs as
well as corpus listings using the search field displayed above them on the dashboard.
Uploaded models can be found and edited by clicking on the corresponding service under <b>My Contributions</b>.
</p> </p>
</div> </div>
<div class="col s12">&nbsp;</div> <div class="col s12">&nbsp;</div>
@ -20,10 +27,10 @@
<p> <p>
A corpus is a collection of texts that can be analyzed using the A corpus is a collection of texts that can be analyzed using the
Corpus Analysis service. All texts must be in the verticalized text Corpus Analysis service. All texts must be in the verticalized text
file format, which can be obtained via the Natrual Language file format, which can be obtained via the Natural Language
Processing service. It contains, in addition to the actual text, Processing service. It contains, in addition to the text,
further annotations that are searchable in combination with optional further annotations that are searchable in combination with optional
addable metadata during your analysis. metadata that can be added during your analysis.
</p> </p>
</div> </div>
</div> </div>

View File

@ -0,0 +1,107 @@
<h3 class="manual-chapter-title">Services</h5>
<h4>Services</h4>
<p>
In this section, we will describe the different services nopaque has to offer.
</p>
<div class="row">
<div class="col s12 m4">
<img alt="Services" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/services.png') }}">
</div>
<div class="col s12 m8">
<p>
Nopaque was designed to be modular. Its modules are implemented in
self-contained <b>services</b>, each of which represents a step in the
workflow. The typical workflow involves using services one after another,
consecutively.
The typical workflow order can be taken from the listing of the
services in the left sidebar or from the nopaque manual (accessible via the pink
button in the upper right corner).
The services can also be applied at different starting and ending points,
which allows you to conduct your work flexibly.
All services are versioned, so the data generated with nopaque is always
reproducible.
<p>For all services, it is recommended to use the latest version (selected
in the drop-down menu on the service page) unless you need a model
only available in an earlier version or are looking to reproduce data that was originally generated
using an older version.</p>
</p>
</div>
</div>
<h4>File Setup</h4>
<p>
The <a href="{{ url_for('services.file_setup_pipeline') }}">File Setup Service</a> bundles image data, such as scans and photos,
together in a handy PDF file. To use this service, use the job form to
select the images to be bundled, choose the desired service version, and
specify a title and description.
Note that the File Setup service will sort the images based on their file name in
ascending order. It is thus important and highly recommended to name
them accordingly, for example:
page-01.png, page-02.jpg, page-03.tiff. Generally, you can assume
that the images will be sorted in the order in which the file explorer of
your operating system lists them when you view the files in a folder
sorted in ascending order by file name.
</p>
<h4>Optical Character Recognition (OCR)</h4>
<p>
The <a href="{{ url_for('services.tesseract_ocr_pipeline') }}">Tesseract OCR Pipeline</a>
converts image data - like photos and scans - into text data, making them machine-readable.
This step enables you to proceed with the computational analysis of your documents.
To use this service, use the job form to select the file you want to convert into text data.
Then, choose the language model and service version you would like to use. Enter a title and description for your file and then
submit your job. Once the job is finished, the results can be found and downloaded further below on the page, under
the section labeled "Inputs."
</p>
<h4>Handwritten Text Recognition (HTR)</h4>
<p>The Transkribus HTR Pipeline is currently
deactivated. We are working on an alternative solution. In the meantime, you can
try using Tesseract OCR, though the results will likely be poor.</p>
<h4>Natural Language Processing (NLP)</h4>
<p>The <a href="{{ url_for('services.spacy_nlp_pipeline') }}">SpaCy NLP Pipeline</a> extracts
information from plain text files (.txt format) via computational linguistic data processing
(tokenization, lemmatization, part-of-speech tagging and named-entity recognition).
To use this service, select the .txt file that you want to extract this information from.
Then select the language model and the version you want to use. Once the job is finished, you can find and download the files in
<b>.json</b> and <b>.vrt</b> format under the section labeled “Results.”</p>
<h4>Corpus Analysis</h4>
<p>
With the <a href="{{ url_for('services.corpus_analysis') }}">Corpus Analysis</a>
service, it is possible to create a text corpus
and then explore through it with analytical tools. The analysis session is realized
on the server side by the Open Corpus Workbench software, which enables
efficient and complex searches with the help of the CQP Query Language.</p>
<p>
To use this service, navigate to the corpus you would like to analyze and click on the Analyze button.
This will take you to an analysis overview page for your corpus. Here, you can find
a visualization of general linguistic information of your corpus, including tokens,
sentences, unique words, unique lemmas, unique parts of speech and unique simple
parts of speech. You will also find a pie chart of the proportional textual makeup
of your corpus and can view the linguistic information for each individual text file.
A more detailed visualization of token frequencies with a search option is also on
this page.
</p>
<p>
From the corpus analysis overview page, you can navigate to other analysis modules:
the Query Builder (under Concordance) and the Reader.
</p>
<p>
With the <b>Reader</b>, you can read your corpus texts tokenized with the associated linguistic information. The tokens
can be shown as lemmas, parts of speech, words, and can be displayed in different
ways: visually as plain text with the option of highlighted entities or as chips.
</p>
<p>
The Concordance module allows for more specific, query-oriented text analyses.
Here, you can filter out text parameters and structural attributes in different
combinations. This is explained in more detail in the <b>Query Builder</b> section of the
manual.
</p>
</p>

View File

@ -7,7 +7,7 @@
<div class="col s12 m8"> <div class="col s12 m8">
<p> <p>
To <a href="{{ url_for('corpora.create_corpus') }}">create a corpus</a>, you To <a href="{{ url_for('corpora.create_corpus') }}">create a corpus</a>, you
can use the "New Corpus" button, which can be found on both, the Corpus can use the "New Corpus" button, which can be found on both the Corpus
Analysis Service page and the Dashboard below the corpus list. Fill in the input Analysis Service page and the Dashboard below the corpus list. Fill in the input
mask to Create a corpus. After you have completed the input mask, you will mask to Create a corpus. After you have completed the input mask, you will
be automatically taken to the corpus overview page (which can be called up be automatically taken to the corpus overview page (which can be called up
@ -43,5 +43,5 @@
the way of how a token is displayed, by using the text style switch. The the way of how a token is displayed, by using the text style switch. The
concordance module offers some more options regarding the context size of concordance module offers some more options regarding the context size of
search results. If the context does not provide enough information you can search results. If the context does not provide enough information you can
hop into the reader module by using the lupe icon next to a match. hop into the reader module by using the magnifier icon next to a match.
</p> </p>

View File

@ -1,5 +1,22 @@
<h3 class="manual-chapter-title">CQP Query Language</h3> <h3 class="manual-chapter-title">CQP Query Language</h3>
<p>Within the Corpus Query Language, a distinction is made between two types of annotations: positional attributes and structural attributes. Positional attributes refer to a token, e.g. the word "book" is assigned the part-of-speech tag "NN", the lemma "book" and the simplified part-of-speech tag "NOUN" within the token structure. Structural attributes refer to text structure-giving elements such as sentence and entity markup. For example, the markup of a sentence is represented in the background as follows:</p> <h4 id="cqp-query-language">CQP Query Language</h4>
<p>In this section, we will provide some functional explanations of the properties of the Corpus Query Language. This includes
the types of linguistic attributes one can work with and how to use them in your query.</p>
<div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;">
<h5>Content</h5>
<ol style="list-style-type:disc">
<li><a href="#overview-annotations">Overview of annotation types</a></li>
<li><a href="#positional-attributes">Positional attributes</a></li>
<li><a href="#searching-positional-attributes">How to search for positional attributes</a></li>
<li><a href="#structural-attributes">Structural attributes</a></li>
<li><a href="#searching-structural-attributes">How to search for structural attributes</a></li>
</ol>
</div>
<h4 id="overview-annotations">Overview of annotation types</h4>
<p>Within the Corpus Query Language, a distinction is made between two types of annotations: <b>positional attributes</b> and <b>structural attributes</b>. Positional attributes refer to a token, e.g. the word "book" is assigned the part-of-speech tag "NN", the lemma "book" and the simplified part-of-speech tag "NOUN" within the token structure. Structural attributes refer to text structure-giving elements such as sentence and entity markup. For example, the markup of a sentence is represented in the background as follows:</p>
<pre> <pre>
<code> <code>
<span class="green-text">&lt;s&gt; structural attribute</span> <span class="green-text">&lt;s&gt; structural attribute</span>
@ -13,7 +30,7 @@
</code> </code>
</pre> </pre>
<h4>Positional attributes</h4> <h4 id="positional-attributes">Positional attributes</h4>
<p>Before you can start searching for positional attributes (also called tokens), it is necessary to know what properties they contain.</p> <p>Before you can start searching for positional attributes (also called tokens), it is necessary to know what properties they contain.</p>
<ol> <ol>
<li><span class="blue-text"><b>word</b></span>: The string as it is also found in the original text</li> <li><span class="blue-text"><b>word</b></span>: The string as it is also found in the original text</li>
@ -33,7 +50,7 @@
</li> </li>
</ol> </ol>
<h5>Searching for positional attributes</h5> <h5 id="searching-positional-attributes">How to search for positional attributes</h5>
<div> <div>
<p> <p>
<b>Token with no condition on any property (also called <span class="blue-text">wildcard token</span>)</b><br> <b>Token with no condition on any property (also called <span class="blue-text">wildcard token</span>)</b><br>
@ -118,7 +135,7 @@
<pre style="margin-top: 0;" ><code> ^ ^ the braces indicate the start and end of an option group</code></pre> <pre style="margin-top: 0;" ><code> ^ ^ the braces indicate the start and end of an option group</code></pre>
</div> </div>
<h4>Structural attributes</h4> <h4 id="structural-attributes">Structural attributes</h4>
<p>nopaque provides several structural attributes for query. A distinction is made between attributes with and without value.</p> <p>nopaque provides several structural attributes for query. A distinction is made between attributes with and without value.</p>
<ol> <ol>
<li><span class="green-text"><b>s</b></span>: Annotates a sentence</li> <li><span class="green-text"><b>s</b></span>: Annotates a sentence</li>
@ -153,7 +170,7 @@
</li> </li>
</ol> </ol>
<h5>Searching for structural attributes</h5> <h5 id="searching-structural-attributes">How to search for structural attributes</h5>
<pre><code>&lt;ent&gt; [] &lt;/ent&gt;; A one token long entity of any type</code></pre> <pre><code>&lt;ent&gt; [] &lt;/ent&gt;; A one token long entity of any type</code></pre>
<pre><code>&lt;ent_type="PERSON"&gt; [] &lt;/ent_type&gt;; A one token long entity of type PERSON</code></pre> <pre><code>&lt;ent_type="PERSON"&gt; [] &lt;/ent_type&gt;; A one token long entity of type PERSON</code></pre>
<pre><code>&lt;ent_type="PERSON"&gt; []* &lt;/ent_type&gt;; Entity of any length of type PERSON</code></pre> <pre><code>&lt;ent_type="PERSON"&gt; []* &lt;/ent_type&gt;; Entity of any length of type PERSON</code></pre>

View File

@ -1,26 +1,46 @@
<h3 class="manual-chapter-title">Query Builder Tutorial</h3> <h3 class="manual-chapter-title">Query Builder Tutorial</h3>
<h4>Query Builder</h4>
<p>The query builder helps you to make a query in the form of the Corpus Query <p>In this section, we will provide you with more detailed instructions on how to use the Query Builder -
Language (CQL) to your text. You can use the CQL to filter out various types of nopaque's main user-friendly tool for finding and analyzing different linguistic elements of your texts.</p>
text parameters, for example, a specific word, a lemma, or you can set part-of-speech
tags (pos) that indicate the type of word you are looking for (a noun, an
adjective, etc.). In addition, you can also search for structural attributes,
or specify your query for a token (word, lemma, pos) via entity typing. And of
course everything can be combined. You can find examples for different queries
under the tab "Examples".</p>
<p></p>
<br>
<div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;"> <div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;">
<h5>Content</h5> <h5>Content</h5>
<ol style="list-style-type:disc"> <ol style="list-style-type:disc">
<li><a href="#add-new-token-tutorial">Add new token to your query</a></li> <li><a href="#general-overview">General Overview</a></li>
<li><a href="#edit-options-tutorial">Options to edit your query</a></li> <li><a href="#add-new-token-tutorial">Add a new token to your query</a></li>
<li><a href="#add-structural-attribute-tutorial">Add structural Attributes to your query</a></li> <li><a href="#edit-options-tutorial">Options for editing your query</a></li>
<li><a href="#add-structural-attribute-tutorial">Add structural attributes to your query</a></li>
<li><a href="#general-options-query-builder">General options</a></li> <li><a href="#general-options-query-builder">General options</a></li>
</ol> </ol>
</div> </div>
<h4 id="general-overview">General Overview</h4>
<p>The Query Builder can be accessed via <a href=" {{ url_for('main.dashboard') }}">My Corpora</a> or <a href=" {{ url_for('services.corpus_analysis') }}">Corpus Analysis</a> in the sidebar options.
Click on the corpus you wish to analyze. You will be sent to its corpus overview page.
Here, click on <b>Analyze</b> to reach the analysis page.
The analysis page features different options for analyzing your corpus, including
visualizations and a <b>Reader</b> module. In this case, we want to open the query builder.
To do so, click on the <b>Concordance</b> button on the top of the page.</p>
<p>The query builder uses the <b>Corpus Query Language (CQL)</b> to help you make a query for analyzing your texts.
In this way, it is possible to filter out various types of text parameters, for
example, a specific word, a lemma, or you can set part-of-speech
tags (pos) that indicate the type of word you are looking for (a noun, an
adjective, etc.). In addition, you can also search for structural attributes,
or specify your query for a token (word, lemma, pos) via entity typing. And of
course, the different text parameters can be combined.</p>
<p>Tokens and structural attributes can be added by clicking on the <b>"+"</b> button
(what we call the "input marker") in the input field or the labeled buttons below it. Elements
added are shown as chips. These can be reorganized using drag and drop. The input
marker can also be moved in this way. Its position shows where new elements will be added. <br>
A "translation" of your query into Corpus Query Language (CQL) will be displayed underneath the query field.</p>
<p>For more information, see our <b>manual section for the Corpus Query Language.</b>
<br>
Advanced users can make direct use of CQL by switching to <b>expert mode</b> via the toggle button.
</p>
<p>The entire input field can be cleared using the red trash icon on the right.</p>
<br>
{# Add Token Tutorial #} {# Add Token Tutorial #}
<div> <div>
<hr> <hr>
@ -29,7 +49,7 @@ under the tab "Examples".</p>
<h4 id="add-new-token-tutorial">Add new token to your Query</h4> <h4 id="add-new-token-tutorial">Add new token to your Query</h4>
<p>If you are only looking for a specific token, you can click on the left <p>If you are only looking for a specific token, you can click on the left
button and select the type of token you are looking for from the drop-down menu. button and select the type of token you are looking for from the drop-down menu.
By default "Word" is selected. </p> "Word" is selected by default. </p>
<h5>Word and Lemma</h5> <h5>Word and Lemma</h5>
<p>If you want to search for a specific word or lemma and the respective <p>If you want to search for a specific word or lemma and the respective
@ -46,13 +66,13 @@ under the tab "Examples".</p>
"simple_pos" to search for different parts-of-speech. You can find an overview "simple_pos" to search for different parts-of-speech. You can find an overview
of all tags under the "Tagsets" tab.</p> of all tags under the "Tagsets" tab.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/pos.gif') }}" alt="part-of-speech-tag explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/pos.gif') }}" alt="part-of-speech-tag explanation" width="100%;" style="margin-bottom:20px;">
<br>
<h5>Empty Token</h5> <h5>Empty Token</h5>
<p>Here you can search for an empty token. This selection should never stand <p>Here you can search for a token with unspecified attributes (also called wildcard token). This
alone and should always be extended with an incidence modifier or stand in a selection should never stand alone and should always be extended with an incidence modifier or stand in a
larger query, because otherwise all possible tokens would be searched for and larger query, because otherwise all possible tokens would be searched for and
the program would crash.</p> the program would crash.</p>
<p></p>
<br> <br>
</div> </div>
@ -61,8 +81,8 @@ under the tab "Examples".</p>
<hr> <hr>
<p></p> <p></p>
<br> <br>
<h4 id="edit-options-tutorial">Options to edit your token</h4> <h4 id="edit-options-tutorial">Options for editing your query</h4>
<p>You have the possibility to extend or specify your searched token with <p>You have the possibility to extend or specify the token you are searching for with
certain factors. For this the query builder offers some fixed options. You can certain factors. For this the query builder offers some fixed options. You can
find more information about the options in the Corpus Query Language Tutorial.</p> find more information about the options in the Corpus Query Language Tutorial.</p>
<br> <br>
@ -76,7 +96,6 @@ under the tab "Examples".</p>
variants are not limited, so you can manually enter more options in the same variants are not limited, so you can manually enter more options in the same
format. "Option1" and "option2" must be replaced accordingly. </p> format. "Option1" and "option2" must be replaced accordingly. </p>
<img src="{{ url_for('static', filename='images/manual/query_builder/option_group.gif') }}" alt="option group explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/option_group.gif') }}" alt="option group explanation" width="100%;" style="margin-bottom:20px;">
<p></p>
<br> <br>
<h5>Incidence Modifiers</h5> <h5>Incidence Modifiers</h5>
@ -85,7 +104,7 @@ under the tab "Examples".</p>
not at all or once: <br> not at all or once: <br>
[word = "is"] [word="it"] [word="your"] [word="litte"]? [word = "dog"] <br> [word = "is"] [word="it"] [word="your"] [word="litte"]? [word = "dog"] <br>
Here the word "little" should occur either once or not at all. With Here the word "little" should occur either once or not at all. With
[word="dogs?"] the search is for "dog "or "dogs". </p> [word="dogs?"] the search is for "dog "or "dogs".</p>
<br> <br>
<h5>Ignore Case</h5> <h5>Ignore Case</h5>
@ -101,7 +120,10 @@ under the tab "Examples".</p>
this case. For this you can simply string them together: <br> this case. For this you can simply string them together: <br>
[word="I"] [word="will" & simple_pos="VERB"] [word="go"].</p> [word="I"] [word="will" & simple_pos="VERB"] [word="go"].</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/or_and.gif') }}" alt="OR/AND explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/or_and.gif') }}" alt="OR/AND explanation" width="100%;" style="margin-bottom:20px;">
<p></p> <p>Tokens that have already been added can also be modified by clicking on the corresponding
pen icon. Click on the "ignore case" box, for example, and the query builder will
not differentiate between upper- and lower- case letters for that respective token.
New conditions added apply to the most recent token information.</p>
<br> <br>
</div> </div>
@ -120,26 +142,33 @@ under the tab "Examples".</p>
This search can of course be specified if you search for particular tokens or This search can of course be specified if you search for particular tokens or
entities between the sentence tags (<s></s>). For example, you can search for entities between the sentence tags (<s></s>). For example, you can search for
sentences that contain only a noun, verb, and adjective. <br> sentences that contain only a noun, verb, and adjective. <br>
After clicking on Sentence you will see a <div class="chip" style="background-color:#FD9720;">Sentence Start</div>. Click on Sentence to add the sentence chips: <div class="chip" style="background-color:#FD9720;">Sentence Start</div>
When you are done with your query or the content and <div class="chip" style="background-color:#FD9720;">Sentence End</div>.
between the Sentence tags, you have to click the Sentence button one more time These mark where the sentence starts and ends. Use drag-and-drop to place them accordingly. When
to close it. The corresponding button is called the Sentence attribute is added, the input marker will automatically be
<div class="chip" style="background-color:#FD9720;">Sentence End</div>.<br> moved between the sentence chips. Use drag-and-drop as needed to continue your query
at a different position.
<br> <br>
<h5>Entities</h5> <h5>Entities</h5>
<p>With entities, i.e. units of meaning, you search for text sections that <p>With entities, i.e. units of meaning, you can search for text sections that
follow a certain code. For example, persons, dates, certain events. You can contain more specific information, for example, persons, dates, or events. The
select the codes using the drop-down menus. You can find an explanation of codes for these categories can be selected using the drop-down menus. You can find an explanation of
the respective abbreviations under the tab "Tagsets". <br> these abbreviations under the tab "Tagsets". <br>
You can also search for unspecified entities by selecting "Add entity of any type".</p> You can also search for unspecified entities by selecting "Add entity of any type".</p>
To close the entity query you started, you have to click the entity button one more time. This will make the <div class="chip" style="background-color:#A6E22D;">Entity End</div> element appear in your query. Click on the Entity button to add the entity chips <div class="chip" style="background-color:#A6E22D;">Entity Type=</div> and <div class="chip" style="background-color:#A6E22D;">Entity End</div>.
<p>The entity type can be changed by clicking on the pen symbol on the chip. When
the Entity attribute is added, the input marker will automatically be
moved between the entity chips. Use drag-and-drop as needed to continue your query
at a different position.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/entity.gif') }}" alt="entity explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/entity.gif') }}" alt="entity explanation" width="100%;" style="margin-bottom:20px;">
<p></p> <p></p>
<br> <br>
<h5>Meta Data</h5> <h5>Meta Data (currently unavailable)</h5>
<p>With the meta data you can annotate your text and add specific conditions. <p>The meta data function is being worked on and cannot currently be used!
<br>
With the meta data you can annotate your text and add specific conditions.
You can select a category on the left and enter your desired value on the right. You can select a category on the left and enter your desired value on the right.
The selected metadata will apply to your entire request and will be added at the end.</p> The selected metadata will apply to your entire request and will be added at the end.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/meta_data.gif') }}" alt="meta data explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/meta_data.gif') }}" alt="meta data explanation" width="100%;" style="margin-bottom:20px;">
@ -155,14 +184,39 @@ under the tab "Examples".</p>
<br> <br>
<h4 id="general-options-query-builder">General Options of the query builder</h4> <h4 id="general-options-query-builder">General Options of the query builder</h4>
<p>You have several options to edit your query after adding it to the preview.</p> <p>You have several options to edit your query after adding it to the preview.</p>
<br>
<h5>Editing the elements</h5>
<p>You can edit your query chips by clicking on the pen icon.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/editing_chips.gif') }}" alt="editing explanation" width="100%;" style="margin-bottom:20px;">
<br>
<h5>Deleting the elements</h5> <h5>Deleting the elements</h5>
<p>You can delete the added elements from the query by clicking the X behind the respective content.</p> <p>You can delete the added elements from the query by clicking the X behind the respective content.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/delete.gif') }}" alt="delete explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/delete.gif') }}" alt="delete explanation" width="100%;" style="margin-bottom:20px;">
<br>
<h5>Move the elements of your query</h5> <h5>Move the elements of your query</h5>
<p>You can drag and drop elements to customize your query.</p> <p>You can drag and drop elements to customize your query.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/drag_and_drop.gif') }}" alt="Drag&Drop explanation" width="100%;" style="margin-bottom:20px;"> <img src="{{ url_for('static', filename='images/manual/query_builder/drag_and_drop.gif') }}" alt="Drag&Drop explanation" width="100%;" style="margin-bottom:20px;">
<br>
<h5>Setting an incidence modifier</h5>
<p>With the incidence modifier option, you can specify the amount of
times a token should appear in your query. This is particularly relevant for empty
tokens (tokens with unspecified attributes). Click on a token (blue chip) and
select the desired option from the list to add an incidence modifier. To
close the list without adding anything, click on the token again.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/incidence_modifier.gif') }}" alt="incidence modifier explanation" width="100%;" style="margin-bottom:20px;">
<br>
<h5>Switching between Query Builder and Expert mode</h5>
<p>To work with the plain Corpus Query Language instead of using the Query Builder, click on the "expert mode"
switch. Your query can be entered into the input field. All elements previously added will be carried over
into expert mode. Click on the switch again to switch back to the Query Builder if desired. All recognized elements
will be parsed into chips; those not recognized will be deleted from the query.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/expert_mode.gif') }}" alt="expert mode explanation" width="100%;" style="margin-bottom:20px;">
</div> </div>

View File

@ -3,45 +3,46 @@
<h2>Manual</h2> <h2>Manual</h2>
<ul class="tabs" id="manual-modal-toc"> <ul class="tabs" id="manual-modal-toc">
<li class="tab"><a href="#manual-modal-introduction">Introduction</a></li> <li class="tab"><a href="#manual-modal-introduction">Introduction</a></li>
<li class="tab"><a href="#manual-modal-registration-and-log-in">Registration and Log in</a></li> <li class="tab"><a href="#manual-modal-getting-started">Getting Started</a></li>
<li class="tab"><a href="#manual-modal-dashboard">Dashboard</a></li> <li class="tab"><a href="#manual-modal-dashboard">Dashboard</a></li>
<li class="tab"><a href="#manual-modal-services">Services</a></li> <li class="tab"><a href="#manual-modal-services">Services</a></li>
<li class="tab"><a href="#manual-modal-a-closer-look-at-the-corpus-analysis">A closer look at the Corpus Analysis</a></li> <!-- <li class="tab"><a href="#manual-modal-a-closer-look-at-the-corpus-analysis">A closer look at the Corpus Analysis</a></li> -->
<li class="tab"><a href="#manual-modal-cqp-query-language">CQP Query Language</a></li>
<li class="tab"><a href="#manual-modal-query-builder">Query Builder</a></li> <li class="tab"><a href="#manual-modal-query-builder">Query Builder</a></li>
<li class="tab"><a href="#manual-modal-cqp-query-language">CQP Query Language</a></li>
<li class="tab"><a href="#manual-modal-tagsets">Tagsets</a></li> <li class="tab"><a href="#manual-modal-tagsets">Tagsets</a></li>
</ul> </ul>
<div id="manual-modal-introduction"> <div id="manual-modal-introduction">
<br> <br>
{% include "main/_manual_modal/_01_introduction.html.j2" %} {% include "_base/_modals/_manual/01_introduction.html.j2" %}
</div> </div>
<div id="manual-modal-registration-and-log-in"> <div id="manual-modal-getting-started">
<br> <br>
{% include "main/_manual_modal/_02_registration_and_log_in.html.j2" %} {% include "_base/_modals/_manual/02_getting_started.html.j2" %}
</div> </div>
<div id="manual-modal-dashboard"> <div id="manual-modal-dashboard">
<br> <br>
{% include "main/_manual_modal/_03_dashboard.html.j2" %} {% include "_base/_modals/_manual/03_dashboard.html.j2" %}
</div> </div>
<div id="manual-modal-services"> <div id="manual-modal-services">
<br> <br>
{% include "main/_manual_modal/_06_services.html.j2" %} {% include "_base/_modals/_manual/06_services.html.j2" %}
</div> </div>
<div id="manual-modal-a-closer-look-at-the-corpus-analysis"> <!-- <div id="manual-modal-a-closer-look-at-the-corpus-analysis">
<br> <br>
{% include "main/_manual_modal/_07_a_closer_look_at_the_corpus_analysis.html.j2" %} {% include "_base/_modals/_manual/07_a_closer_look_at_the_corpus_analysis.html.j2" %}
</div> </div> -->
<div id="manual-modal-cqp-query-language"> <div id="manual-modal-cqp-query-language">
<br> <br>
{% include "main/_manual_modal/_08_cqp_query_language.html.j2" %} {% include "_base/_modals/_manual/08_cqp_query_language.html.j2" %}
</div> </div>
<div id="manual-modal-query-builder"> <div id="manual-modal-query-builder">
<br> <br>
{% include "main/_manual_modal/_09_query_builder.html.j2" %} {% include "_base/_modals/_manual/09_query_builder.html.j2" %}
</div> </div>
<div id="manual-modal-tagsets"> <div id="manual-modal-tagsets">
<br> <br>
{% include "main/_manual_modal/_10_tagsets.html.j2" %} {% include "_base/_modals/_manual/10_tagsets.html.j2" %}
</div> </div>
</div> </div>
<div class="modal-footer"> <div class="modal-footer">

View File

@ -27,7 +27,10 @@
<div class="col s12 m3"> <div class="col s12 m3">
<span>© 2020 Bielefeld University</span> <span>© 2020 Bielefeld University</span>
</div> </div>
<div class="col s12 m9 right-align"> <div class="col s12 m2">
<span class="right"><b>Version {{ config.NOPAQUE_VERSION }}</b></span>
</div>
<div class="col s12 m7 right-align">
<a class="btn-small primary-variant-color waves-effect waves-light" href="{{ url_for('main.faq') }}"><i class="left material-icons">info_outline</i>Frequently Asked Questions</a> <a class="btn-small primary-variant-color waves-effect waves-light" href="{{ url_for('main.faq') }}"><i class="left material-icons">info_outline</i>Frequently Asked Questions</a>
<a class="btn-small primary-variant-color waves-effect waves-light" href="mailto:{{ config.NOPAQUE_SERVICE_DESK }}"><i class="left material-icons">mail</i>Report an issue</a> <a class="btn-small primary-variant-color waves-effect waves-light" href="mailto:{{ config.NOPAQUE_SERVICE_DESK }}"><i class="left material-icons">mail</i>Report an issue</a>
<a class="btn-small primary-variant-color waves-effect waves-light" href="https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque" target="_blank"><i class="left material-icons">code</i>GitLab</a> <a class="btn-small primary-variant-color waves-effect waves-light" href="https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque" target="_blank"><i class="left material-icons">code</i>GitLab</a>

View File

@ -0,0 +1,5 @@
{% include "_base/_modals/manual.html.j2" %}
{% if current_user.is_authenticated and not current_user.terms_of_use_accepted %}
{% include "_base/_modals/terms_of_use.html.j2" %}
{% endif %}

View File

@ -5,12 +5,20 @@
<a href="#" data-target="sidenav" class="sidenav-trigger"><i class="material-icons">menu</i></a> <a href="#" data-target="sidenav" class="sidenav-trigger"><i class="material-icons">menu</i></a>
{% endif %} {% endif %}
<a href="{{ url_for('main.index') }}" class="brand-logo" style="height: 100%; overflow: hidden;"> <a href="{{ url_for('main.index') }}" class="brand-logo" style="height: 100%; overflow: hidden;">
<img class="hide-on-small-only" src="{{ url_for('static', filename='images/nopaque_-_logo_name_slogan.svg') }}" style="height: 128px; margin-top: -32px; margin-left: -32px;">
<img class="hide-on-med-and-up" src="{{ url_for('static', filename='images/nopaque_-_logo.svg') }}" style="height: 128px; margin-top: -32px; margin-left: -32px;"> <img class="hide-on-med-and-up" src="{{ url_for('static', filename='images/nopaque_-_logo.svg') }}" style="height: 128px; margin-top: -32px; margin-left: -32px;">
</a> </a>
<ul class="right hide-on-med-and-down"> <ul class="right hide-on-med-and-down">
<li><a href="{{ url_for('main.news') }}"><i class="material-icons left">email</i>News</a></li> <li><a href="{{ url_for('main.news') }}"><i class="material-icons left">email</i>News</a></li>
<li><a class="dropdown-trigger no-autoinit" data-target="nav-more-dropdown" href="#!" id="nav-more-dropdown-trigger"><i class="material-icons">more_vert</i></a></li> <li>
<a class="dropdown-trigger no-autoinit" data-target="nav-more-dropdown" href="#!" id="nav-more-dropdown-trigger">
{% if current_user.is_authenticated %}
<img src="{{ url_for('users.user_avatar', user_id=current_user.id) }}" alt="avatar" class="circle left" style="height: 54px; padding: 10px 10px 0 0;">
{{ current_user.username }} ({{ current_user.email }})
{% else %}
<i class="material-icons left">more_vert</i>
{% endif %}
</a>
</li>
</ul> </ul>
</div> </div>
<div class="nav-content primary-variant-color"> <div class="nav-content primary-variant-color">
@ -22,17 +30,14 @@
{% endif %} {% endif %}
{%- endfor -%} {%- endfor -%}
</ul> </ul>
{# {% if current_user.is_authenticated %} <a class="btn-floating btn-large halfway-fab modal-trigger pink tooltipped waves-effect waves-light" data-tooltip="Manual" href="#manual-modal"><i class="material-icons">school</i></a>
<a class="btn-floating btn-large halfway-fab modal-trigger pink tooltipped waves-effect waves-light" data-tooltip="Roadmap" href="#roadmap-modal"><i class="material-icons">explore</i></a>
{% endif %} #}
<a class="btn-floating btn-large halfway-fab modal-trigger pink tooltipped waves-effect waves-light" data-tooltip="Manual" href="#manual-modal"><i class="material-icons">help</i></a>
</div> </div>
</nav> </nav>
</div> </div>
<ul class="dropdown-content" id="nav-more-dropdown"> <ul class="dropdown-content" id="nav-more-dropdown">
{# <li><a href="{{ url_for('main.user_manual') }}"><i class="material-icons left">help</i>Manual</a></li> #}
{% if current_user.is_authenticated %} {% if current_user.is_authenticated %}
<li><a href="{{ url_for('users.user', user_id=current_user.id) }}"><i class="material-icons left">person</i>My Profile</a></li>
<li><a href="{{ url_for('settings.settings') }}"><i class="material-icons left">settings</i>Settings</a></li> <li><a href="{{ url_for('settings.settings') }}"><i class="material-icons left">settings</i>Settings</a></li>
<li class="divider" tabindex="-1"></li> <li class="divider" tabindex="-1"></li>
<li><a href="{{ url_for('auth.logout') }}">Log out</a></li> <li><a href="{{ url_for('auth.logout') }}">Log out</a></li>

View File

@ -76,28 +76,28 @@
{%- assets {%- assets
filters='rjsmin', filters='rjsmin',
output='gen/corpus-analysis.%(version)s.js', output='gen/corpus-analysis.%(version)s.js',
'js/CorpusAnalysis/index.js', 'js/corpus-analysis/index.js',
'js/CorpusAnalysis/cqi/index.js', 'js/corpus-analysis/cqi/index.js',
'js/CorpusAnalysis/cqi/constants.js', 'js/corpus-analysis/cqi/constants.js',
'js/CorpusAnalysis/cqi/errors.js', 'js/corpus-analysis/cqi/errors.js',
'js/CorpusAnalysis/cqi/status.js', 'js/corpus-analysis/cqi/status.js',
'js/CorpusAnalysis/cqi/api/index.js', 'js/corpus-analysis/cqi/api/index.js',
'js/CorpusAnalysis/cqi/api/client.js', 'js/corpus-analysis/cqi/api/client.js',
'js/CorpusAnalysis/cqi/models/index.js', 'js/corpus-analysis/cqi/models/index.js',
'js/CorpusAnalysis/cqi/models/resource.js', 'js/corpus-analysis/cqi/models/resource.js',
'js/CorpusAnalysis/cqi/models/attributes.js', 'js/corpus-analysis/cqi/models/attributes.js',
'js/CorpusAnalysis/cqi/models/subcorpora.js', 'js/corpus-analysis/cqi/models/subcorpora.js',
'js/CorpusAnalysis/cqi/models/corpora.js', 'js/corpus-analysis/cqi/models/corpora.js',
'js/CorpusAnalysis/cqi/client.js', 'js/corpus-analysis/cqi/client.js',
'js/CorpusAnalysis/query-builder/index.js', 'js/corpus-analysis/query-builder/index.js',
'js/CorpusAnalysis/query-builder/element-references.js', 'js/corpus-analysis/query-builder/element-references.js',
'js/CorpusAnalysis/query-builder/general-query-builder-functions.js', 'js/corpus-analysis/query-builder/query-builder.js',
'js/CorpusAnalysis/query-builder/structural-attribute-builder-functions.js', 'js/corpus-analysis/query-builder/structural-attribute-builder-functions.js',
'js/CorpusAnalysis/query-builder/token-attribute-builder-functions.js', 'js/corpus-analysis/query-builder/token-attribute-builder-functions.js',
'js/CorpusAnalysis/CorpusAnalysisApp.js', 'js/corpus-analysis/app.js',
'js/CorpusAnalysis/CorpusAnalysisConcordance.js', 'js/corpus-analysis/concordance-extension.js',
'js/CorpusAnalysis/CorpusAnalysisReader.js', 'js/corpus-analysis/reader-extension.js',
'js/CorpusAnalysis/CorpusAnalysisStaticVisualization.js' 'js/corpus-analysis/static-visualization-extension.js'
%} %}
<script src="{{ ASSET_URL }}"></script> <script src="{{ ASSET_URL }}"></script>
{%- endassets %} {%- endassets %}

View File

@ -1,42 +1,18 @@
<ul class="sidenav sidenav-fixed" id="sidenav"> <ul class="sidenav sidenav-fixed" id="sidenav">
<li> <li class="primary-color hide-on-small-only">
<div class="user-view" style="padding-top: 1px; padding-left: 20px !important; padding-right: 20px !important; height: 112px;"> <div style="overflow: hidden; height: 64px; width: 250px;">
<div class="background primary-color"></div> <a href="{{ url_for('main.index') }}">
<div class="row"> <img class="hide-on-small-only" src="{{ url_for('static', filename='images/nopaque_-_logo_name_slogan.svg') }}" style="height: 128px; margin-top: -32px;">
<div class="col s5">
<a href="{{ url_for('users.user', user_id=current_user.id) }}">
<img src="{{ url_for('users.user_avatar', user_id=current_user.id) }}" alt="user-image" class="circle responsive-img" style="height:80%; margin-top: 22px;">
</a> </a>
</div> </div>
<div class="col s5" style="word-wrap: break-word; margin-left:-10px;">
<span class="white-text name">
{% if current_user.username|length > 18 %}
{{ current_user.username[:15] + '...' }}
{% else %}
{{ current_user.username }}
{% endif %}
</span>
<span class="white-text email" style="padding-top:5px;">
{% if current_user.email|length > 32 %}
{{ current_user.email[:29] + '...' }}
{% else %}
{{ current_user.email }}
{% endif %}
</span>
</div>
</div>
</div>
</li> </li>
{# <li class="primary-color"> <li class="primary-variant-color center-align hide-on-small-only" style="padding-top: 8px; height:48px;">
<div style="overflow: hidden;height: 64px; width: 250px;"> <img src="{{ url_for('static', filename='images/nopaque_slogan_transparent.png') }}" style="width:85%">
<img class="hide-on-small-only" src="{{ url_for('static', filename='images/nopaque_-_logo_name_slogan.svg') }}" style="height: 128px; margin-top: -32px; margin-left: ;"> </li>
</div> <li class="hide-on-med-and-up"><a class="waves-effect" href="{{ url_for('main.index') }}"><i class="material-icons left">home</i>nopaque</a></li>
</li> #} <li>
{# <li><a href="{{ url_for('main.index') }}">nopaque</a></li> #}
<li class="hide-on-large-only">
<a class="waves-effect" href="{{ url_for('main.news') }}"><i class="material-icons left">email</i>News</a> <a class="waves-effect" href="{{ url_for('main.news') }}"><i class="material-icons left">email</i>News</a>
</li> </li>
{# <li><a href="{{ url_for('main.user_manual') }}"><i class="material-icons">help</i>Manual</a></li> #}
<li> <li>
<a class="waves-effect" class="waves-effect" href="{{ url_for('main.dashboard') }}"><i class="material-icons">dashboard</i>Dashboard</a> <a class="waves-effect" class="waves-effect" href="{{ url_for('main.dashboard') }}"><i class="material-icons">dashboard</i>Dashboard</a>
<ul> <ul>
@ -74,8 +50,9 @@
<li> <li>
<a class="waves-effect" class="waves-effect" href="{{ url_for('main.social_area') }}"><i class="material-icons">rocket_launch</i>Social Area</a> <a class="waves-effect" class="waves-effect" href="{{ url_for('main.social_area') }}"><i class="material-icons">rocket_launch</i>Social Area</a>
<ul> <ul>
<li><a href="{{ url_for('users.user', user_id=current_user.id) }}" style="padding-left: 47px;"><i class="material-icons left">person</i>My Profile</a></li>
<li> <li>
<a class="waves-effect" href="{{ url_for('main.social_area', _anchor='public-users') }}" style="padding-left: 47px;"><i class="material-icons">person</i>Public Users</a> <a class="waves-effect" href="{{ url_for('main.social_area', _anchor='public-users') }}" style="padding-left: 47px;"><i class="material-icons">group</i>Public Users</a>
</li> </li>
<li> <li>
<a class="waves-effect" href="{{ url_for('main.social_area', _anchor='public-corpora') }}" style="padding-left: 47px;"><i class="nopaque-icons">I</i>Public Corpora</a> <a class="waves-effect" href="{{ url_for('main.social_area', _anchor='public-corpora') }}" style="padding-left: 47px;"><i class="nopaque-icons">I</i>Public Corpora</a>

View File

@ -16,16 +16,16 @@
{% block styles %} {% block styles %}
{{ super() }} {{ super() }}
{% include "_styles.html.j2" %} {% include "_base/styles.html.j2" %}
{% endblock styles %} {% endblock styles %}
{% block navbar %} {% block navbar %}
{% include "_navbar.html.j2" %} {% include "_base/navbar.html.j2" %}
{% endblock navbar %} {% endblock navbar %}
{% block sidenav %} {% block sidenav %}
{% if current_user.is_authenticated %} {% if current_user.is_authenticated %}
{% include "_sidenav.html.j2" %} {% include "_base/sidenav.html.j2" %}
{% endif %} {% endif %}
{% endblock sidenav %} {% endblock sidenav %}
@ -34,23 +34,17 @@
{% block page_content %}{% endblock page_content %} {% block page_content %}{% endblock page_content %}
<div id="modals"> <div id="modals">
{% block modals %} {% block modals %}
{% include "main/_manual_modal.html.j2" %} {% include "_base/modals.html.j2" %}
{# {% if current_user.is_authenticated %}
{% include "_roadmap.html.j2" %}
{% endif %} #}
{% if current_user.is_authenticated and not current_user.terms_of_use_accepted %}
{% include "_terms_of_use_modal.html.j2" %}
{% endif %}
{% endblock modals %} {% endblock modals %}
</div> </div>
{% endblock main %} {% endblock main %}
{% block footer_attribs %} class="page-footer primary-variant-color"{% endblock footer_attribs %} {% block footer_attribs %} class="page-footer primary-variant-color"{% endblock footer_attribs %}
{% block footer %} {% block footer %}
{% include "_footer.html.j2" %} {% include "_base/footer.html.j2" %}
{% endblock footer %} {% endblock footer %}
{% block scripts %} {% block scripts %}
{{ super() }} {{ super() }}
{% include "_scripts.html.j2" %} {% include "_base/scripts.html.j2" %}
{% endblock scripts %} {% endblock scripts %}

View File

@ -128,7 +128,7 @@
{# The extension scripts #} {# The extension scripts #}
{% macro scripts() %} {% macro scripts() %}
<script> <script>
const corpusAnalysisConcordance = new CorpusAnalysisConcordance(corpusAnalysisApp); const corpusAnalysisConcordance = new nopaque.corpus_analysis.ConcordanceExtension(corpusAnalysisApp);
const concordanceQueryBuilder = new ConcordanceQueryBuilder(); const concordanceQueryBuilder = new nopaque.corpus_analysis.query_builder.QueryBuilder();
</script> </script>
{% endmacro %} {% endmacro %}

View File

@ -23,4 +23,15 @@
</div> </div>
</form> </form>
</div> </div>
<div id="corpus-analysis-concordance-switch-to-query-builder-submit-modal" class="modal">
<div class="modal-content">
<h4>Switch to Query Builder</h4>
<p>Switching back to the query builder can cause elements the query builder does not recognize to become lost. Continue?</p>
</div>
<div class="modal-footer">
<a class="btn modal-close waves-effect waves-light">Cancel</a>
<a class="btn modal-close red waves-effect waves-light switch-action" data-switch-action="confirm">Switch to Query Builder</a>
</div>
</div>
{% endmacro %} {% endmacro %}

View File

@ -1,11 +1,18 @@
{% macro card_content(id_prefix) %} {% macro card_content(id_prefix) %}
<form id="corpus-analysis-concordance-query-builder-form"> <form id="corpus-analysis-concordance-query-builder-form">
<div class="row"> <div class="row">
<div class="col s9" id="corpus-analysis-concordance-query-builder-input-field-container"> <div class="col s8" id="corpus-analysis-concordance-query-builder-input-field-container">
<div id="corpus-analysis-concordance-query-builder-input-field"> <div id="corpus-analysis-concordance-query-builder-input-field">
<p id="corpus-analysis-concordance-query-builder-input-field-placeholder">Click on the buttons below to build your query.</p> <a class="query-element-target btn-floating btn-small blue-grey lighten-4 waves-effect waves-light tooltipped" style="margin-bottom:10px; margin-right:5px;" draggable="true" data-position="bottom" data-tooltip="Add a token to your query">
<i class="material-icons">add</i>
</a>
</div> </div>
</div> </div>
<div class="col s1 center-align">
<a class="btn-floating btn waves-effect waves-light red" id="corpus-analysis-concordance-delete-query-button" style="margin-top:18px;">
<i class="material-icons">delete</i>
</a>
</div>
<div class="input-field col s3"> <div class="input-field col s3">
<i class="material-icons prefix">arrow_forward</i> <i class="material-icons prefix">arrow_forward</i>
<input class="validate corpus-analysis-action" id="corpus-analysis-concordance-form-subcorpus-name" name="subcorpus-name" type="text" required pattern="^[A-Z][a-z0-9\-]*" value="Last"></input> <input class="validate corpus-analysis-action" id="corpus-analysis-concordance-form-subcorpus-name" name="subcorpus-name" type="text" required pattern="^[A-Z][a-z0-9\-]*" value="Last"></input>
@ -30,9 +37,10 @@
<div class="row"> <div class="row">
<div class="col s12"> <div class="col s12">
<p></p> <p></p>
<a class="btn waves-effect waves-light tooltipped modal-trigger" href="#corpus-analysis-concordance-positional-attr-modal" data-position="bottom" data-tooltip="Search for any token, for example a word, a lemma or a part-of-speech tag">Add new token to your query</a> <a class="btn-small waves-effect waves-light tooltipped modal-trigger" href="#corpus-analysis-concordance-positional-attr-modal" data-position="bottom" data-tooltip="Search for any token, for example a word, a lemma or a part-of-speech tag">Add new token to your query</a>
<a class="btn waves-effect waves-light tooltipped modal-trigger" href="#corpus-analysis-concordance-structural-attr-modal" data-position="bottom" data-tooltip="Structure your query with structural attributes, for example sentences, entities or annotate the text">Add structural attributes to your query</a> <a class="btn-small waves-effect waves-light tooltipped modal-trigger" href="#corpus-analysis-concordance-structural-attr-modal" data-position="bottom" data-tooltip="Structure your query with structural attributes, for example sentences, entities or annotate the text">Add structural attributes to your query</a>
<a class="btn waves-effect waves-light tooltipped dropdown-trigger disabled" data-target="corpus-analysis-concordance-token-incidence-modifiers-dropdown" data-toggle-area="token-incidence-modifiers" data-position="top" data-tooltip="Incidence Modifiers are special characters or patterns, <br>which determine how often a character represented previously should occur.">incidence modifiers</a> <a class="btn-small waves-effect waves-light tooltipped dropdown-trigger disabled" data-target="corpus-analysis-concordance-token-incidence-modifiers-dropdown" data-toggle-area="token-incidence-modifiers" data-position="top" data-tooltip="Incidence Modifiers are special characters or patterns, <br>which determine how often a character represented previously should occur.">incidence modifiers</a>
<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" style="color:black">help_outline</i></a>
</div> </div>
</div> </div>
<div class="row"> <div class="row">
@ -59,7 +67,7 @@
<div id="corpus-analysis-concordance-structural-attr-modal" class="modal"> <div id="corpus-analysis-concordance-structural-attr-modal" class="modal">
<div class="modal-content"> <div class="modal-content">
<div class="attr-modal-header"> <div class="attr-modal-header">
<h5>Which structural attribute do you want to add to your query?<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="corpus-analysis-concordance-add-structural-attribute-tutorial-info-icon">help_outline</i></a></h5> <h5>Which structural attribute do you want to add to your query?<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" data-manual-modal-chapter-anchor="add-structural-attribute-tutorial" href="#manual-modal"><i class="material-icons left" id="corpus-analysis-concordance-add-structural-attribute-tutorial-info-icon">help_outline</i></a></h5>
</div> </div>
<p></p> <p></p>
<br> <br>
@ -67,7 +75,6 @@
<div class="col s12"> <div class="col s12">
<a class="btn-small waves-effect waves-light" data-structural-attr-modal-action-button="sentence" data-toggle-area="sentence-button">sentence</a> <a class="btn-small waves-effect waves-light" data-structural-attr-modal-action-button="sentence" data-toggle-area="sentence-button">sentence</a>
<a class="btn-small waves-effect waves-light" data-structural-attr-modal-action-button="entity" data-toggle-area="entity-button">entity</a> <a class="btn-small waves-effect waves-light" data-structural-attr-modal-action-button="entity" data-toggle-area="entity-button">entity</a>
<a class="btn-small waves-effect waves-light" data-structural-attr-modal-action-button="meta-data" data-toggle-area="text-annotation-button">Meta Data</a>
</div> </div>
</div> </div>
<div id="corpus-analysis-concordance-entity-builder" data-toggle-area="entity-builder" class="hide"> <div id="corpus-analysis-concordance-entity-builder" data-toggle-area="entity-builder" class="hide">
@ -111,39 +118,6 @@
</div> </div>
</div> </div>
</div> </div>
<div id="corpus-analysis-concordance-text-annotation-builder" data-toggle-area="text-annotation-builder" class="hide">
<p></p>
<br>
<div class="row">
<div class= "input-field col s4 l3">
<select name="text-annotation-options" id="corpus-analysis-concordance-text-annotation-options">
<option class="btn-small waves-effect waves-light" value="address">address</option>
<option class="btn-small waves-effect waves-light" value="author">author</option>
<option class="btn-small waves-effect waves-light" value="booktitle">booktitle</option>
<option class="btn-small waves-effect waves-light" value="chapter">chapter</option>
<option class="btn-small waves-effect waves-light" value="editor">editor</option>
<option class="btn-small waves-effect waves-light" value="institution">institution</option>
<option class="btn-small waves-effect waves-light" value="journal">journal</option>
<option class="btn-small waves-effect waves-light" value="pages">pages</option>
<option class="btn-small waves-effect waves-light" value="publisher">publisher</option>
<option class="btn-small waves-effect waves-light" value="publishing_year">publishing year</option>
<option class="btn-small waves-effect waves-light" value="school">school</option>
<option class="btn-small waves-effect waves-light" value="title">title</option>
</select>
<label>Meta data</label>
</div>
<div class= "input-field col s7 l5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your text annotation" type="text" id="corpus-analysis-concordance-text-annotation-input">
</div>
<div class="col s1 l1 center-align">
<p class="btn-floating waves-effect waves-light" id="corpus-analysis-concordance-text-annotation-submit">
<i class="material-icons right">send</i>
</p>
</div>
<div class="hide" id="corpus-analysis-concordance-no-value-metadata-message"><i>No value entered!</i></div>
</div>
</div>
</div> </div>
</div> </div>
{% endmacro %} {% endmacro %}
@ -154,7 +128,7 @@
<div class="row attr-modal-header"> <div class="row attr-modal-header">
<p></p> <p></p>
<div class="col s12"> <div class="col s12">
<h5>Which kind of token are you looking for? <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="corpus-analysis-concordance-token-tutorial-info-icon">help_outline</i></a></h5> <h5>Which kind of token are you looking for? <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" data-manual-modal-chapter-anchor="add-new-token-tutorial" href="#manual-modal"><i class="material-icons left" id="corpus-analysis-concordance-token-tutorial-info-icon">help_outline</i></a></h5>
</div> </div>
<div class="input-field col s3" style="margin-left:42px;"> <div class="input-field col s3" style="margin-left:42px;">
<select id="corpus-analysis-concordance-positional-attr-selection"> <select id="corpus-analysis-concordance-positional-attr-selection">
@ -363,7 +337,7 @@
</div> </div>
<div id="corpus-analysis-concordance-token-edit-options" data-toggle-area="input-field-options"> <div id="corpus-analysis-concordance-token-edit-options" data-toggle-area="input-field-options">
<div class="row"> <div class="row">
<h6>Options to edit your token: <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="corpus-analysis-concordance-edit-options-tutorial-info-icon">help_outline</i></a></h6> <h6>Options to edit your token: <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal" data-manual-modal-chapter-anchor="edit-options-tutorial"><i class="material-icons left" id="corpus-analysis-concordance-edit-options-tutorial-info-icon">help_outline</i></a></h6>
</div> </div>
<p></p> <p></p>
<div class="row"> <div class="row">
@ -436,9 +410,3 @@
</div> </div>
</div> </div>
{% endmacro %} {% endmacro %}
{# {% macro scripts(id_prefix) %}
<script>
const concordanceQueryBuilder = new ConcordanceQueryBuilder();
</script>
{% endmacro %} #}

View File

@ -79,6 +79,6 @@
{# The extension scripts #} {# The extension scripts #}
{% macro scripts() %} {% macro scripts() %}
<script> <script>
const corpusAnalysisReader = new CorpusAnalysisReader(corpusAnalysisApp); const corpusAnalysisReader = new nopaque.corpus_analysis.ReaderExtension(corpusAnalysisApp);
</script> </script>
{% endmacro %} {% endmacro %}

View File

@ -158,7 +158,7 @@
{% macro scripts() %} {% macro scripts() %}
<script> <script>
const corpusAnalysisStaticVisualization = new CorpusAnalysisStaticVisualization(corpusAnalysisApp); const corpusAnalysisStaticVisualization = new nopaque.corpus_analysis.StaticVisualizationExtension(corpusAnalysisApp);
</script> </script>
{% endmacro %} {% endmacro %}

View File

@ -81,7 +81,7 @@
{% block scripts %} {% block scripts %}
{{ super() }} {{ super() }}
<script> <script>
const corpusAnalysisApp = new CorpusAnalysisApp({{ corpus.hashid|tojson }}); const corpusAnalysisApp = new nopaque.corpus_analysis.App({{ corpus.hashid|tojson }});
</script> </script>
{{ concordance_extension.scripts() }} {{ concordance_extension.scripts() }}

View File

@ -153,16 +153,16 @@
let deleteJobRequestElement = document.querySelector('#delete-job-request'); let deleteJobRequestElement = document.querySelector('#delete-job-request');
let restartJobRequestElement = document.querySelector('#restart-job-request'); let restartJobRequestElement = document.querySelector('#restart-job-request');
deleteJobRequestElement.addEventListener('click', (event) => { deleteJobRequestElement.addEventListener('click', (event) => {
requests.jobs.entity.delete({{ job.hashid|tojson }}); nopaque.requests.jobs.entity.delete({{ job.hashid|tojson }});
}); });
restartJobRequestElement.addEventListener('click', (event) => { restartJobRequestElement.addEventListener('click', (event) => {
requests.jobs.entity.restart({{ job.hashid|tojson }}); nopaque.requests.jobs.entity.restart({{ job.hashid|tojson }});
}); });
if ({{ current_user.is_administrator()|tojson }}) { if ({{ current_user.is_administrator()|tojson }}) {
let jobLogButtonElement = document.querySelector('#job-log-button'); let jobLogButtonElement = document.querySelector('#job-log-button');
jobLogButtonElement.addEventListener('click', (event) => { jobLogButtonElement.addEventListener('click', (event) => {
requests.jobs.entity.log({{ job.hashid|tojson }}) nopaque.requests.jobs.entity.log({{ job.hashid|tojson }})
.then( .then(
(response) => { (response) => {
response.json() response.json()

View File

@ -1,9 +0,0 @@
<h3 class="manual-chapter-title">Introduction</h3>
<p>
nopaque is a web-based digital working environment. It implements a
workflow based on the research process in the humanities and supports its
users in processing their data in order to subsequently apply digital
analysis methods to them. All processes are implemented in a specially
provided cloud environment with established open source software. This
always ensures that no personal data of the users is disclosed.
</p>

View File

@ -1,18 +0,0 @@
<h3 class="manual-chapter-title">Registration and Log in</h3>
<div class="row">
<div class="col s12 m4">
<img alt="Registration and Log in" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/registration-and-log-in.png') }}">
</div>
<div class="col s12 m8">
<p>
Before you can start using the web platform, you need to create a user
account. This requires only a few details: just a user name, an e-mail
address and a password are needed. In order to register yourself, fill out
the form on the <a href="{{ url_for('auth.register') }}">registration page</a>. After successful registration, the
created account must be verified. To do this, follow the instructions
given in the automatically sent e-mail. Afterwards, you can log in as
usual with your username/email address and password in the log-in form
located next to the registration button.
</p>
</div>
</div>

View File

@ -1,52 +0,0 @@
<h3 class="manual-chapter-title">Services</h5>
<div class="row">
<div class="col s12 m4">
<img alt="Services" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/services.png') }}">
</div>
<div class="col s12 m8">
<p>
nopaque was designed from the ground up to be modular. This modularity
means that the offered workflow provides variable entry and exit points,
so that different starting points and goals can be flexibly addressed.
Each of these modules are implemented in a self-contained service, each of
which represents a step in the workflow. The services are coordinated in
such a way that they can be used consecutively. The order can either be
taken from the listing of the services in the left sidebar or from the
roadmap (accessible via the pink compass in the upper right corner). All
services are versioned, so the data generated with nopaque is always
reproducible.
</p>
</div>
</div>
<h4 class="manual-chapter-title">File Setup</h4>
<p>
The <a href="{{ url_for('services.file_setup_pipeline') }}">File Setup Service</a> bundles image data, such as scans and photos,
together in a handy PDF file. To use this service, use the job form to
select the images to be bundled, choose the desired service version, and
specify a title and description. Please note that the service sorts the
images into the resulting PDF file based on the file names. So naming the
images correctly is of great importance. It has proven to be a good practice
to name the files according to the following scheme:
page-01.png, page-02.jpg, page-03.tiff, etc. In general, you can assume
that the images will be sorted in the order in which the file explorer of
your operating system lists them when you view the files in a folder
sorted in ascending order by file name.
</p>
<h4>Optical Character Recognition (OCR)</h4>
<p>Comming soon...</p>
<h4>Handwritten Text Recognition (HTR)</h4>
<p>Comming soon...</p>
<h4>Natural Language Processing (NLP)</h4>
<p>Comming soon...</p>
<h4>Corpus Analysis</h4>
<p>
With the corpus analysis service, it is possible to create a text corpus
and then explore it in an analysis session. The analysis session is realized
on the server side by the Open Corpus Workbench software, which enables
efficient and complex searches with the help of the CQP Query Language.
</p>

View File

@ -198,7 +198,7 @@
function mastodonStatusToElement(status) { function mastodonStatusToElement(status) {
let date = new Date(status.created_at).toLocaleString('en-US'); let date = new Date(status.created_at).toLocaleString('en-US');
let newsElement = Utils.HTMLToElement( let newsElement = nopaque.Utils.HTMLToElement(
` `
<div class="row"> <div class="row">
<div class="col s11"> <div class="col s11">
@ -222,7 +222,7 @@
function bisBlogsEntryToElement(entry) { function bisBlogsEntryToElement(entry) {
let date = new Date(entry.published).toLocaleString('en-US'); let date = new Date(entry.published).toLocaleString('en-US');
let newsElement = Utils.HTMLToElement( let newsElement = nopaque.Utils.HTMLToElement(
` `
<div class="row"> <div class="row">
<div class="col s1"> <div class="col s1">

View File

@ -1,43 +0,0 @@
{% extends "base.html.j2" %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12 m10">
<div class="section scrollspy" id="introduction">
{% include "main/manual/_01_introduction.html.j2" %}
</div>
<div class="section scrollspy" id="registration-and-log-in">
{% include "main/manual/_02_registration_and_log_in.html.j2" %}
</div>
<div class="section scrollspy" id="dashboard">
{% include "main/manual/_03_dashboard.html.j2" %}
</div>
<div class="section scrollspy" id="services">
{% include "main/manual/_06_services.html.j2" %}
</div>
<div class="section scrollspy" id="a-closer-look-at-the-corpus-analysis">
{% include "main/manual/_07_a_closer_look_at_the_corpus_analysis.html.j2" %}
</div>
<div class="section scrollspy" id="cqp-query-language">
{% include "main/manual/_08_cqp_query_language.html.j2" %}
</div>
</div>
<div class="col m2 hide-on-small-only">
<ul class="section table-of-contents" style="position: fixed !important;">
<li><a href="#introduction">Introduction</a></li>
<li><a href="#registration-and-log-in">Registration and Log in</a></li>
<li><a href="#dashboard">Dashboard</a></li>
<li><a href="#services">Services</a></li>
<li><a href="#a-closer-look-at-the-corpus-analysis">A closer look at the Corpus Analysis</a></li>
<li><a href="#cqp-query-language">CQP Query Language</a></li>
</ul>
</div>
</div>
</div>
{% endblock page_content %}

View File

@ -12,11 +12,11 @@
</div> </div>
<div class="col s12" id="fgho-sommerschule-2023-vorbereitungen"> <div class="col s12" id="fgho-sommerschule-2023-vorbereitungen">
{% include "workshops/_fgho_sommerschule_2023/_vorbereitungen.html.j2" %} {% include "workshops/_fgho_sommerschule_2023/vorbereitungen.html.j2" %}
</div> </div>
<div class="col s12" id="fgho-sommerschule-2023-workshop-aufgaben"> <div class="col s12" id="fgho-sommerschule-2023-workshop-aufgaben">
{% include "workshops/_fgho_sommerschule_2023/_aufgaben.html.j2" %} {% include "workshops/_fgho_sommerschule_2023/aufgaben.html.j2" %}
</div> </div>
</div> </div>
</div> </div>

View File

@ -115,6 +115,8 @@ class Config:
NOPAQUE_READCOOP_USERNAME = os.environ.get('NOPAQUE_READCOOP_USERNAME') NOPAQUE_READCOOP_USERNAME = os.environ.get('NOPAQUE_READCOOP_USERNAME')
NOPAQUE_READCOOP_PASSWORD = os.environ.get('NOPAQUE_READCOOP_PASSWORD') NOPAQUE_READCOOP_PASSWORD = os.environ.get('NOPAQUE_READCOOP_PASSWORD')
NOPAQUE_VERSION='1.0.0'
@staticmethod @staticmethod
def init_app(app: Flask): def init_app(app: Flask):
# Set up logging according to the corresponding (NOPAQUE_LOG_*) # Set up logging according to the corresponding (NOPAQUE_LOG_*)

View File

@ -4,7 +4,6 @@
# More information about the environment variables can be found here: # # More information about the environment variables can be found here: #
# https://hub.docker.com/_/postgres # # https://hub.docker.com/_/postgres #
############################################################################## ##############################################################################
POSTGRES_DB= POSTGRES_DB=
POSTGRES_USER= POSTGRES_USER=

View File

@ -20,6 +20,7 @@ depends_on = None
def upgrade(): def upgrade():
# TODO: Add error handling for sqlalchemy.exc.ProgrammingError
for user in User.query.all(): for user in User.query.all():
spacy_nlp_pipeline_models_dir = os.path.join(user.path, 'spacy_nlp_pipeline_models') spacy_nlp_pipeline_models_dir = os.path.join(user.path, 'spacy_nlp_pipeline_models')
if os.path.exists(spacy_nlp_pipeline_models_dir): if os.path.exists(spacy_nlp_pipeline_models_dir):

View File

@ -17,6 +17,7 @@ depends_on = None
def upgrade(): def upgrade():
# TODO: Add error handling for sqlalchemy.exc.ProgrammingError
for user in User.query.all(): for user in User.query.all():
old_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_models') old_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_models')
new_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_pipeline_models') new_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_pipeline_models')