50 Commits

Author SHA1 Message Date
48fe7c0702 Slight changes to services description 2024-06-20 12:53:48 +02:00
5a2723b617 updates and restructuring 2024-03-26 15:29:26 +01:00
4425d50140 more manual updates 2024-03-19 17:33:37 +01:00
39113a6f17 manual sections 01, 02, 06 2024-03-14 17:07:53 +01:00
a53f1d216b manual v.a. dashboard, services 2024-03-14 09:35:22 +01:00
ffd7a3ad91 Manual Ergänzungen Intro /Getting Started 2024-03-05 15:41:17 +01:00
5dce269736 Version number + original slogan font 2023-12-18 12:49:30 +01:00
13369296d3 rename docker-entrypoint.sh to docker-nopaque-entrypoint.sh 2023-12-15 13:56:03 +01:00
4f6e1c121f Add nopaque version config variable 2023-12-15 08:47:59 +01:00
438a257fe3 Update CI script 2023-12-15 08:47:46 +01:00
2e88d7d035 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-15 08:37:02 +01:00
b338c33d42 Bump cwb version 2023-12-15 08:36:50 +01:00
d6cebddd92 Updated query builder gifs and instructions 2023-12-12 14:56:08 +01:00
07fda0e95a Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-07 22:35:41 +01:00
3927d9e4cd Edits in structural attributes section and others 2023-12-07 22:34:00 +01:00
8f5d5ffdec Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-07 12:46:48 +01:00
f02d1619e2 Try to implement anchor tags 2023-12-07 12:46:37 +01:00
892f1f799e Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-05 15:00:49 +01:00
f5e98ae655 Add badges to README 2023-12-05 15:00:21 +01:00
f790106e0e Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-12-05 14:54:05 +01:00
c57acc73d2 Manual changes 2023-12-05 14:42:38 +01:00
678a0767b7 Change Manual icon 2023-11-30 11:21:39 +01:00
17a9338d9f Fix job deletion from job page 2023-11-29 16:11:14 +01:00
a7cbce1eda Fix wrong spacy-nlp-pipeline version number 2023-11-29 10:45:35 +01:00
fa28c875e1 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-28 12:40:05 +01:00
0927edcceb Bug Fixes 2023-11-28 12:39:54 +01:00
9c22370eea Implement force download parameter in model insert_defaults methods 2023-11-28 12:10:55 +01:00
bdcc80a66f Add new tesseract-ocr-pipeline version. Remove redundant spacy-nlp-pipeline version. 2023-11-28 10:34:30 +01:00
9be5ce6014 link logo to homepage 2023-11-23 13:32:54 +01:00
00e4c3ade3 Add logo to sidenav 2023-11-23 13:26:19 +01:00
79a16cae83 Add links to my profile page 2023-11-23 13:16:21 +01:00
c5aea0be94 Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-22 12:50:18 +01:00
afcb890ccf Element Target drag&drop + small improvements 2023-11-22 12:50:08 +01:00
9627708950 rename manual files to fit new naming convention 2023-11-21 12:31:10 +01:00
1bb1408988 make the workshops package fit the new file scheme 2023-11-21 10:11:49 +01:00
79bafdea89 Switch back to older settings and extension .vscode setup 2023-11-20 15:26:22 +01:00
a2d617718b Update .vscode directory contents 2023-11-20 11:05:56 +01:00
691b2de5b2 Bug Fix: lock chips after switch to QB 2023-11-20 09:48:06 +01:00
eb0e7c9ba1 Fix error on not authenticated users 2023-11-20 09:35:53 +01:00
ab132746e7 Add TODO in migration scripts 2023-11-17 10:42:55 +01:00
ae5646512d Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-17 10:15:50 +01:00
fc66327920 Make double quotation marks escapable again 2023-11-17 10:15:39 +01:00
9bfc96ad41 minor codestyle fix 2023-11-16 17:22:07 +01:00
008938b46b Avatar in top right corner 2023-11-16 15:57:27 +01:00
4f24e9f9da Erase meta data logic from struc attribute builder 2023-11-14 09:48:38 +01:00
d0fe4360bb Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-13 15:37:26 +01:00
1c18806c9c Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-13 15:53:17 +01:00
9487aa7a60 Restructure modals and base template 2023-11-13 15:53:14 +01:00
6559051fd5 Delete condition logic in token builder 2023-11-13 15:37:19 +01:00
0882e085a3 Function renaming 2023-11-13 14:46:19 +01:00
61 changed files with 904 additions and 631 deletions

View File

@ -8,6 +8,6 @@
!.flaskenv
!boot.sh
!config.py
!docker-entrypoint.sh
!docker-nopaque-entrypoint.sh
!nopaque.py
!requirements.txt

View File

@ -1,3 +1,37 @@
include:
- template: Security/Container-Scanning.gitlab-ci.yml
##############################################################################
# Pipeline stages in order of execution #
##############################################################################
stages:
- build
- publish
- sca
##############################################################################
# Pipeline behavior #
##############################################################################
workflow:
rules:
# Run the pipeline on commits to the default branch
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
variables:
# Set the Docker image tag to `latest`
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:latest
when: always
# Run the pipeline on tag creation
- if: $CI_COMMIT_TAG
variables:
# Set the Docker image tag to the Git tag name
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
when: always
# Don't run the pipeline on all other occasions
- when: never
##############################################################################
# Default values for pipeline jobs #
##############################################################################
default:
image: docker:24.0.6
services:
@ -5,38 +39,46 @@ default:
tags:
- docker
##############################################################################
# CI/CD variables for all jobs in the pipeline #
##############################################################################
variables:
DOCKER_TLS_CERTDIR: /certs
DOCKER_BUILD_PATH: .
DOCKERFILE: Dockerfile
build_image:
##############################################################################
# Pipeline jobs #
##############################################################################
build:
stage: build
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: on_success
variables:
IMAGE_TAG: $CI_REGISTRY_IMAGE:latest
- if: $CI_COMMIT_TAG
when: "on_success"
variables:
IMAGE_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
- when: never
before_script:
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
script:
- docker build -t $IMAGE_TAG .
- docker push $IMAGE_TAG
- docker build --tag $DOCKER_IMAGE --file $DOCKERFILE $DOCKER_BUILD_PATH
- docker save $DOCKER_IMAGE > docker_image.tar
artifacts:
paths:
- docker_image.tar
include:
- template: Security/Container-Scanning.gitlab-ci.yml
publish:
stage: publish
before_script:
- docker login --username gitlab-ci-token --password $CI_JOB_TOKEN $CI_REGISTRY
script:
- docker load --input docker_image.tar
- docker push $DOCKER_IMAGE
after_script:
- docker logout $CI_REGISTRY
container_scanning:
stage: sca
rules:
# Run the job on commits to the default branch
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: on_success
variables:
CS_IMAGE: $CI_REGISTRY_IMAGE:latest
when: always
# Run the job on tag creation
- if: $CI_COMMIT_TAG
when: on_success
variables:
CS_IMAGE: ${CI_REGISTRY_IMAGE}:${CI_COMMIT_REF_NAME}
when: always
# Don't run the job on all other occasions
- when: never
variables:
CS_IMAGE: $DOCKER_IMAGE

View File

@ -1,7 +1,8 @@
{
"recommendations": [
"samuelcolvin.jinjahtml",
"irongeek.vscode-env",
"ms-azuretools.vscode-docker",
"ms-python.python"
"ms-python.python",
"samuelcolvin.jinjahtml"
]
}

View File

@ -1,13 +1,9 @@
{
"editor.rulers": [79],
"files.insertFinalNewline": true,
"python.terminal.activateEnvironment": false,
"[css]": {
"editor.tabSize": 2
},
"[scss]": {
"editor.tabSize": 2
},
"[html]": {
"editor.tabSize": 2
},
@ -17,7 +13,7 @@
"[jinja-html]": {
"editor.tabSize": 2
},
"[jinja-js]": {
"[scss]": {
"editor.tabSize": 2
}
}

View File

@ -17,9 +17,6 @@ RUN apt-get update \
&& rm --recursive /var/lib/apt/lists/*
COPY docker-entrypoint.sh /usr/local/bin/
RUN useradd --create-home --no-log-init nopaque \
&& groupadd docker \
&& usermod --append --groups docker nopaque
@ -47,7 +44,10 @@ RUN python3 -m pip install --requirement requirements.txt \
USER root
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
EXPOSE 5000
ENTRYPOINT ["docker-entrypoint.sh"]
ENTRYPOINT ["docker-nopaque-entrypoint.sh"]

View File

@ -1,5 +1,8 @@
# nopaque
![release badge](https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque/-/badges/release.svg)
![pipeline badge](https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque/badges/master/pipeline.svg?ignore_skipped=true)
nopaque bundles various tools and services that provide humanities scholars with DH methods and thus can support their various individual research processes. Using nopaque, researchers can subject digitized sources to Optical Character Recognition (OCR). The resulting text files can then be used as a data basis for Natural Language Processing (NLP). The texts are automatically subjected to various linguistic annotations. The data processed via NLP can then be summarized in the web application as corpora and analyzed by means of an information retrieval system through complex search queries. The range of functions of the web application will be successively extended according to the needs of the researchers.
## Prerequisites and requirements

View File

@ -8,7 +8,7 @@
pipeline_name: 'ca_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- '0.1.0'
- title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.2.0/de_core_news_md-3.2.0.tar.gz'
@ -19,7 +19,7 @@
pipeline_name: 'de_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- '0.1.0'
- title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.2.0/el_core_news_md-3.2.0.tar.gz'
@ -120,7 +120,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz'
@ -132,7 +131,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz'
@ -144,7 +142,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'English'
description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz'
@ -156,7 +153,6 @@
version: '3.4.1'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Spanish'
description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz'
@ -168,7 +164,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'French'
description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz'
@ -180,7 +175,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Italian'
description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner'
url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz'
@ -192,7 +186,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Polish'
description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz'
@ -204,7 +197,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Russian'
description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz'
@ -216,7 +208,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Chinese'
description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz'
@ -228,4 +219,3 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'

View File

@ -9,6 +9,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Amharic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
@ -20,6 +21,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Arabic'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
@ -31,6 +33,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Assamese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
@ -42,6 +45,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Azerbaijani'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
@ -53,6 +57,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Azerbaijani - Cyrillic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
@ -64,6 +69,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Belarusian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
@ -75,6 +81,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Bengali'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
@ -86,6 +93,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Tibetan'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
@ -97,6 +105,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Bosnian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
@ -108,6 +117,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Bulgarian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
@ -119,6 +129,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Catalan; Valencian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
@ -130,6 +141,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Cebuano'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
@ -141,6 +153,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Czech'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
@ -152,6 +165,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Chinese - Simplified'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
@ -163,6 +177,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Chinese - Traditional'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
@ -174,6 +189,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Cherokee'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
@ -185,6 +201,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Welsh'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
@ -196,6 +213,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Danish'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
@ -207,6 +225,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'German'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
@ -218,6 +237,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Dzongkha'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
@ -229,6 +249,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Greek, Modern (1453-)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
@ -240,6 +261,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'English'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
@ -251,6 +273,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'English, Middle (1100-1500)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
@ -262,6 +285,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Esperanto'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
@ -273,6 +297,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Estonian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
@ -284,6 +309,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Basque'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
@ -295,6 +321,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Persian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
@ -306,6 +333,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Finnish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
@ -317,6 +345,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'French'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
@ -328,6 +357,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'German Fraktur'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
@ -339,6 +369,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'French, Middle (ca. 1400-1600)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
@ -350,6 +381,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Irish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
@ -361,6 +393,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Galician'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
@ -372,6 +405,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Greek, Ancient (-1453)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
@ -383,6 +417,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Gujarati'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
@ -394,6 +429,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Haitian; Haitian Creole'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
@ -405,6 +441,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Hebrew'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
@ -416,6 +453,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Hindi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
@ -427,6 +465,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Croatian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
@ -438,6 +477,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Hungarian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
@ -449,6 +489,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Inuktitut'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
@ -460,6 +501,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Indonesian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
@ -471,6 +513,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Icelandic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
@ -482,6 +525,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Italian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
@ -493,6 +537,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'Italian - Old'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
@ -504,6 +549,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Javanese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
@ -515,6 +561,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Japanese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
@ -526,6 +573,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Kannada'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
@ -537,6 +585,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Georgian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
@ -548,6 +597,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Georgian - Old'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
@ -559,6 +609,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Kazakh'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
@ -570,6 +621,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Central Khmer'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
@ -581,6 +633,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Kirghiz; Kyrgyz'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
@ -592,6 +645,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Korean'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
@ -603,6 +657,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Kurdish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
@ -614,6 +669,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Lao'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
@ -625,6 +681,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Latin'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
@ -636,6 +693,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Latvian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
@ -647,6 +705,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Lithuanian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
@ -658,6 +717,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Malayalam'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
@ -669,6 +729,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Marathi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
@ -680,6 +741,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Macedonian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
@ -691,6 +753,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Maltese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
@ -702,6 +765,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Malay'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
@ -713,6 +777,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Burmese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
@ -724,6 +789,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Nepali'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
@ -735,6 +801,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Dutch; Flemish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
@ -746,6 +813,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Norwegian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
@ -757,6 +825,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Oriya'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
@ -768,6 +837,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Panjabi; Punjabi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
@ -779,6 +849,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Polish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
@ -790,6 +861,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Portuguese'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
@ -801,6 +873,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Pushto; Pashto'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
@ -812,6 +885,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Romanian; Moldavian; Moldovan'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
@ -823,6 +897,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Russian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
@ -834,6 +909,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Sanskrit'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
@ -845,6 +921,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Sinhala; Sinhalese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
@ -856,6 +933,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Slovak'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
@ -867,6 +945,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Slovenian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
@ -878,6 +957,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Spanish; Castilian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
@ -889,6 +969,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'Spanish; Castilian - Old'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
@ -900,6 +981,7 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Albanian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
@ -911,6 +993,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Serbian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
@ -922,6 +1005,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Serbian - Latin'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
@ -933,6 +1017,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Swahili'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
@ -944,6 +1029,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Swedish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
@ -955,6 +1041,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Syriac'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
@ -966,6 +1053,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Tamil'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
@ -977,6 +1065,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Telugu'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
@ -988,6 +1077,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Tajik'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
@ -999,6 +1089,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Tagalog'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
@ -1010,6 +1101,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Thai'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
@ -1021,6 +1113,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Tigrinya'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
@ -1032,6 +1125,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Turkish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
@ -1043,6 +1137,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Uighur; Uyghur'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
@ -1054,6 +1149,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Ukrainian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
@ -1065,6 +1161,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Urdu'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
@ -1076,6 +1173,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Uzbek'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
@ -1087,6 +1185,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Uzbek - Cyrillic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
@ -1098,6 +1197,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Vietnamese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
@ -1109,6 +1209,7 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Yiddish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
@ -1120,3 +1221,4 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'

View File

@ -45,7 +45,7 @@ def _create_build_corpus_service(corpus):
''' ## Constraints ## '''
constraints = ['node.role==worker']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1853'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
''' ## Labels ## '''
labels = {
'origin': current_app.config['SERVER_NAME'],
@ -139,7 +139,7 @@ def _create_cqpserver_container(corpus):
''' ## Entrypoint ## '''
entrypoint = ['bash', '-c']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1853'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
''' ## Name ## '''
name = f'cqpserver_{corpus.id}'
''' ## Network ## '''

View File

@ -45,12 +45,6 @@ def dashboard():
)
# @bp.route('/user_manual')
# @register_breadcrumb(bp, '.user_manual', '<i class="material-icons left">help</i>User manual')
# def user_manual():
# return render_template('main/user_manual.html.j2', title='User manual')
@bp.route('/news')
@register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News')
def news():

View File

@ -953,7 +953,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
return self.user.hashid
@staticmethod
def insert_defaults():
def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
@ -966,6 +966,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description']
model.filename = f'{model.id}.traineddata'
model.publisher = m['publisher']
model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url']
@ -973,38 +974,39 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
model.is_public = True
model.title = m['title']
model.version = m['version']
continue
model = TesseractOCRPipelineModel(
compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
publisher=m['publisher'],
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
publishing_year=m['publishing_year'],
is_public=True,
title=m['title'],
user=nopaque_user,
version=m['version']
)
db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
model.filename = f'{model.id}.traineddata'
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
unit="B",
unit_scale=True,
unit_divisor=1024,
total=int(r.headers['Content-Length'])
)
pbar.clear()
with open(model.path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
pbar.update(len(chunk))
f.write(chunk)
pbar.close()
else:
model = TesseractOCRPipelineModel(
compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
publisher=m['publisher'],
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
publishing_year=m['publishing_year'],
is_public=True,
title=m['title'],
user=nopaque_user,
version=m['version']
)
db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
model.filename = f'{model.id}.traineddata'
if not os.path.exists(model.path) or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
unit="B",
unit_scale=True,
unit_divisor=1024,
total=int(r.headers['Content-Length'])
)
pbar.clear()
with open(model.path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
pbar.update(len(chunk))
f.write(chunk)
pbar.close()
db.session.commit()
def delete(self):
@ -1080,7 +1082,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
return self.user.hashid
@staticmethod
def insert_defaults():
def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
@ -1093,6 +1095,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description']
model.filename = m['url'].split('/')[-1]
model.publisher = m['publisher']
model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url']
@ -1101,39 +1104,40 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
model.title = m['title']
model.version = m['version']
model.pipeline_name = m['pipeline_name']
continue
model = SpaCyNLPPipelineModel(
compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
publisher=m['publisher'],
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
publishing_year=m['publishing_year'],
is_public=True,
title=m['title'],
user=nopaque_user,
version=m['version'],
pipeline_name=m['pipeline_name']
)
db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
model.filename = m['url'].split('/')[-1]
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
unit="B",
unit_scale=True,
unit_divisor=1024,
total=int(r.headers['Content-Length'])
)
pbar.clear()
with open(model.path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
pbar.update(len(chunk))
f.write(chunk)
pbar.close()
else:
model = SpaCyNLPPipelineModel(
compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
filename=m['url'].split('/')[-1],
publisher=m['publisher'],
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
publishing_year=m['publishing_year'],
is_public=True,
title=m['title'],
user=nopaque_user,
version=m['version'],
pipeline_name=m['pipeline_name']
)
db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
if not os.path.exists(model.path) or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
unit="B",
unit_scale=True,
unit_divisor=1024,
total=int(r.headers['Content-Length'])
)
pbar.clear()
with open(model.path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
pbar.update(len(chunk))
f.write(chunk)
pbar.close()
db.session.commit()
def delete(self):

View File

@ -10,7 +10,7 @@ file-setup-pipeline:
tesseract-ocr-pipeline:
name: 'Tesseract OCR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.1'
latest_version: '0.1.2'
versions:
0.1.0:
methods:
@ -23,6 +23,12 @@ tesseract-ocr-pipeline:
- 'ocropus_nlbin_threshold'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'binarization'
- 'ocropus_nlbin_threshold'
publishing_year: 2023
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
transkribus-htr-pipeline:
name: 'Transkribus HTR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
@ -41,7 +47,7 @@ transkribus-htr-pipeline:
spacy-nlp-pipeline:
name: 'SpaCy NLP Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.2'
latest_version: '0.1.1'
versions:
0.1.0:
methods:
@ -53,8 +59,3 @@ spacy-nlp-pipeline:
- 'encoding_detection'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'encoding_detection'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'

Binary file not shown.

Before

Width:  |  Height:  |  Size: 222 KiB

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 378 KiB

After

Width:  |  Height:  |  Size: 402 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 720 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 854 KiB

After

Width:  |  Height:  |  Size: 589 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 436 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 189 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 511 KiB

After

Width:  |  Height:  |  Size: 381 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1009 KiB

After

Width:  |  Height:  |  Size: 759 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 903 KiB

After

Width:  |  Height:  |  Size: 750 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 413 KiB

After

Width:  |  Height:  |  Size: 524 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

View File

@ -168,6 +168,14 @@ nopaque.App = class App {
let manualModalTocElement = document.querySelector('#manual-modal-toc');
let manualModalToc = M.Tabs.getInstance(manualModalTocElement);
manualModalToc.select(modalTriggerElement.dataset.manualModalChapter);
// TODO: Make this work.
// if ('manualModalChapterAnchor' in modalTriggerElement.dataset) {
// let manualModalChapterAnchor = document.querySelector(`#${modalTriggerElement.dataset.manualModalChapterAnchor}`);
// let xCoord = manualModalChapterAnchor.getBoundingClientRect().left;
// let yCoord = manualModalChapterAnchor.getBoundingClientRect().top;
// let modalContentElement = modalElement.querySelector('.modal-content');
// modalContentElement.scroll(xCoord, yCoord);
// }
}
}
}

View File

@ -3,16 +3,16 @@ nopaque.corpus_analysis.query_builder.ElementReferences = class ElementReference
// General Elements
this.queryInputField = document.querySelector('#corpus-analysis-concordance-query-builder-input-field');
this.queryChipElements = [];
this.queryElementTarget = document.querySelector('.query-element-target')
this.editingModusOn = false;
this.editedQueryChipElementIndex = undefined;
this.deleteQueryButton = document.querySelector('#corpus-analysis-concordance-delete-query-button');
// Structural Attribute Builder Elements
this.structuralAttrModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-structural-attr-modal'));
this.englishEntTypeSelection = document.querySelector('#corpus-analysis-concordance-english-ent-type-selection');
this.germanEntTypeSelection = document.querySelector('#corpus-analysis-concordance-german-ent-type-selection');
this.textAnnotationSelection = document.querySelector('#corpus-analysis-concordance-text-annotation-options');
this.textAnnotationInput = document.querySelector('#corpus-analysis-concordance-text-annotation-input');
// Token Attribute Builder Elements
this.positionalAttrModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-positional-attr-modal'));
this.positionalAttrSelection = document.querySelector('#corpus-analysis-concordance-positional-attr-selection');

View File

@ -2,30 +2,58 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
constructor() {
this.elements = new nopaque.corpus_analysis.query_builder.ElementReferences();
this.incidenceModifierEventListeners();
this.nAndMInputSubmitEventListeners();
this.addEventListenersToQueryElementTarget();
this.addEventListenersToIncidenceModifier();
this.addEventListenersToNAndMInputSubmit();
let queryBuilderDisplay = document.querySelector("#corpus-analysis-concordance-query-builder-display");
let expertModeDisplay = document.querySelector("#corpus-analysis-concordance-expert-mode-display");
let expertModeSwitch = document.querySelector("#corpus-analysis-concordance-expert-mode-switch");
expertModeSwitch.addEventListener("change", () => {
const isChecked = expertModeSwitch.checked;
if (isChecked) {
queryBuilderDisplay.classList.add("hide");
expertModeDisplay.classList.remove("hide");
this.switchToExpertModeParser();
} else {
queryBuilderDisplay.classList.remove("hide");
expertModeDisplay.classList.add("hide");
this.switchToQueryBuilderParser();
}
});
this.elements.deleteQueryButton.addEventListener('click', () => {this.resetQueryInputField()});
this.expertModeQueryBuilderSwitchHandler();
this.extensions = {
structuralAttributeBuilderFunctions: new nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions(this),
tokenAttributeBuilderFunctions: new nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions(this),
};
this.dropdown = M.Dropdown.init(
document.querySelector('.dropdown-trigger[data-toggle-area="token-incidence-modifiers"]'),
{
onCloseStart: () => {
this.unselectChipElement(this.elements.queryInputField.querySelector('.chip.teal'));
}
}
)
}
addEventListenersToQueryElementTarget() {
this.elements.queryElementTarget.addEventListener('click', () => {
this.elements.positionalAttrModal.open();
});
this.elements.queryElementTarget.addEventListener('dragstart', this.handleDragStart.bind(this, this.elements.queryElementTarget));
this.elements.queryElementTarget.addEventListener('dragend', this.handleDragEnd);
}
addEventListenersToIncidenceModifier() {
// Eventlisteners for the incidence modifiers. There are two different types of incidence modifiers: token and character incidence modifiers.
document.querySelectorAll('.incidence-modifier-selection').forEach(button => {
let dropdownId = button.parentNode.parentNode.id;
if (dropdownId === 'corpus-analysis-concordance-token-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.tokenIncidenceModifierHandler(button.dataset.token, button.innerHTML));
} else if (dropdownId === 'corpus-analysis-concordance-character-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.extensions.tokenAttributeBuilderFunctions.characterIncidenceModifierHandler(button));
}
});
}
addEventListenersToNAndMInputSubmit() {
// Eventlisteners for the submit of n- and m-values of the incidence modifier modal for "exactly n" or "between n and m".
document.querySelectorAll('.n-m-submit-button').forEach(button => {
let modalId = button.dataset.modalId;
if (modalId === 'corpus-analysis-concordance-exactly-n-token-modal' || modalId === 'corpus-analysis-concordance-between-nm-token-modal') {
button.addEventListener('click', () => this.tokenNMSubmitHandler(modalId));
} else if (modalId === 'corpus-analysis-concordance-exactly-n-character-modal' || modalId === 'corpus-analysis-concordance-between-nm-character-modal') {
button.addEventListener('click', () => this.extensions.tokenAttributeBuilderFunctions.characterNMSubmitHandler(modalId));
}
});
}
toggleClass(elements, className, action) {
@ -36,26 +64,27 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
resetQueryInputField() {
this.elements.queryInputField.innerHTML = '';
this.addPlaceholder();
this.addQueryElementTarget();
this.updateChipList();
this.queryPreviewBuilder();
}
addQueryElementTarget() {
let queryElementTarget = nopaque.Utils.HTMLToElement(
`
<a class="query-element-target btn-floating btn-small blue-grey lighten-4 waves-effect waves-light tooltipped" style="margin-bottom:10px; margin-right:5px;" draggable="true" data-position="bottom" data-tooltip="Add an Element to your query">
<i class="material-icons">add</i>
</a>
`
);
this.elements.queryInputField.appendChild(queryElementTarget);
this.elements.queryElementTarget = queryElementTarget;
this.addEventListenersToQueryElementTarget();
}
updateChipList() {
this.elements.queryChipElements = this.elements.queryInputField.querySelectorAll('.query-component');
}
removePlaceholder() {
let placeholder = this.elements.queryInputField.querySelector('#corpus-analysis-concordance-query-builder-input-field-placeholder');
if (placeholder && this.elements.queryInputField !== undefined) {
this.elements.queryInputField.innerHTML = '';
}
}
addPlaceholder() {
let placeholder = nopaque.Utils.HTMLToElement('<span id="corpus-analysis-concordance-query-builder-input-field-placeholder">Click on a button to add a query component</span>');
this.elements.queryInputField.appendChild(placeholder);
}
resetMaterializeSelection(selectionElements, value = "default") {
selectionElements.forEach(selectionElement => {
@ -68,7 +97,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
})
}
submitQueryChipElement(dataType = undefined, prettyQueryText = undefined, queryText = undefined, index = null, isClosingTag = false, isEditable = false) {
submitQueryChipElement(dataType=undefined, prettyQueryText=undefined, queryText=undefined, index=null, isClosingTag=false, isEditable=false) {
if (this.elements.editingModusOn) {
let editedQueryChipElement = this.elements.queryChipElements[this.elements.editedQueryChipElementIndex];
editedQueryChipElement.dataset.type = dataType;
@ -81,45 +110,41 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
}
}
queryChipFactory(dataType, prettyQueryText, queryText, index = null, isClosingTag = false, isEditable = false) {
queryChipFactory(dataType, prettyQueryText, queryText, index=null, isClosingTag=false, isEditable=false) {
// Creates a new query chip element, adds Eventlisteners for selection, deletion and drag and drop and appends it to the query input field.
queryText = nopaque.Utils.escape(queryText);
prettyQueryText = nopaque.Utils.escape(prettyQueryText);
let queryChipElement = nopaque.Utils.HTMLToElement(
`
<span class="chip query-component" data-type="${dataType}" data-query="${queryText}" draggable="true" data-closing-tag="${isClosingTag}">
<span class="chip query-component" data-type="${dataType}" data-query="${queryText}" draggable="true"">
${prettyQueryText}${isEditable ? '<i class="material-icons chip-action-button" data-chip-action="edit" style="padding-left:5px; font-size:18px; cursor:pointer;">edit</i>': ''}
${isClosingTag ? '<i class="material-icons chip-action-button" data-chip-action="lock" style="padding-top:5px; font-size:20px; cursor:pointer;">lock_open</i>' : '<i class="material-icons close chip-action-button" data-chip-action="delete">close</i>'}
${isClosingTag ? '' : '<i class="material-icons close chip-action-button" data-chip-action="delete">close</i>'}
</span>
`
);
this.actionListeners(queryChipElement);
this.addActionListeners(queryChipElement);
queryChipElement.addEventListener('dragstart', this.handleDragStart.bind(this, queryChipElement));
queryChipElement.addEventListener('dragend', this.handleDragEnd);
// Ensures that metadata is always at the end of the query and if an index is given, inserts the query chip at the given index and if there is a closing tag, inserts the query chip before the closing tag.
this.removePlaceholder();
let lastChild = this.elements.queryInputField.lastChild;
let isLastChildTextAnnotation = lastChild && lastChild.dataset.type === 'text-annotation';
if (!index) {
let closingTagElement = this.elements.queryInputField.querySelector('[data-closing-tag="true"]');
if (closingTagElement) {
index = Array.from(this.elements.queryInputField.children).indexOf(closingTagElement);
}
}
if (dataType !== 'text-annotation' && index) {
this.elements.queryInputField.insertBefore(queryChipElement, this.elements.queryChipElements[index]);
} else if (dataType !== 'text-annotation' && isLastChildTextAnnotation) {
this.elements.queryInputField.insertBefore(queryChipElement, lastChild);
// If an index is given, inserts the query chip after the given index (only relevant for Incidence Modifier) and if there is a closing tag, inserts the query chip before the closing tag.
if (index !== null) {
this.updateChipList();
this.elements.queryChipElements[index].after(queryChipElement);
} else {
this.elements.queryInputField.appendChild(queryChipElement);
this.elements.queryInputField.insertBefore(queryChipElement, this.elements.queryElementTarget);
}
if (isClosingTag) {
this.moveQueryElementTarget(queryChipElement);
}
this.updateChipList();
this.queryPreviewBuilder();
}
actionListeners(queryChipElement) {
moveQueryElementTarget(element) {
this.elements.queryInputField.insertBefore(this.elements.queryElementTarget, element);
}
addActionListeners(queryChipElement) {
let notQuantifiableDataTypes = ['start-sentence', 'end-sentence', 'start-entity', 'start-empty-entity', 'end-entity', 'token-incidence-modifier'];
queryChipElement.addEventListener('click', (event) => {
if (event.target.classList.contains('chip')) {
@ -129,19 +154,15 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
}
});
let chipActionButtons = queryChipElement.querySelectorAll('.chip-action-button');
// chipActionButtons.forEach(button => {
for (let button of chipActionButtons) {
chipActionButtons.forEach(button => {
button.addEventListener('click', (event) => {
if (event.target.dataset.chipAction === 'delete') {
this.deleteChipElement(queryChipElement);
} else if (event.target.dataset.chipAction === 'edit') {
this.editChipElement(queryChipElement);
} else if (event.target.dataset.chipAction === 'lock') {
this.lockClosingChipElement(queryChipElement);
}
});
}
// });
});
}
editChipElement(queryChipElement) {
@ -151,9 +172,6 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
case 'start-entity':
this.extensions.structuralAttributeBuilderFunctions.editStartEntityChipElement(queryChipElement);
break;
case 'text-annotation':
this.extensions.structuralAttributeBuilderFunctions.editTextAnnotationChipElement(queryChipElement);
break;
case 'token':
let queryElementsContent = this.extensions.tokenAttributeBuilderFunctions.prepareTokenQueryElementsContent(queryChipElement);
this.extensions.tokenAttributeBuilderFunctions.editTokenChipElement(queryElementsContent);
@ -163,22 +181,15 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
}
}
lockClosingChipElement(queryChipElement) {
queryChipElement.dataset.closingTag = 'false';
let lockIcon = queryChipElement.querySelector('[data-chip-action="lock"]');
lockIcon.textContent = 'lock';
// TODO: Write unlock-Function?
lockIcon.dataset.chipAction = 'unlock';
}
deleteChipElement(attr) {
let elementIndex = Array.from(this.elements.queryInputField.children).indexOf(attr);
switch (attr.dataset.type) {
case 'start-sentence':
this.deletingClosingTagHandler(elementIndex, 'end-sentence');
this.deleteClosingTagHandler(elementIndex, 'end-sentence');
break;
case 'start-empty-entity':
case 'start-entity':
this.deletingClosingTagHandler(elementIndex, 'end-entity');
this.deleteClosingTagHandler(elementIndex, 'end-entity');
break;
case 'token':
let nextElement = Array.from(this.elements.queryInputField.children)[elementIndex+1];
@ -189,14 +200,11 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
break;
}
this.elements.queryInputField.removeChild(attr);
if (this.elements.queryInputField.children.length === 0) {
this.addPlaceholder();
}
this.updateChipList();
this.queryPreviewBuilder();
}
deletingClosingTagHandler(elementIndex, closingTagType) {
deleteClosingTagHandler(elementIndex, closingTagType) {
let closingTags = this.elements.queryInputField.querySelectorAll(`[data-type="${closingTagType}"]`);
for (let i = 0; i < closingTags.length; i++) {
let closingTag = closingTags[i];
@ -208,7 +216,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
}
}
handleDragStart(queryChipElement, event) {
handleDragStart(queryChipElement) {
// is called when a query chip is dragged. It creates a dropzone (in form of a chip) for the dragged chip and adds it to the query input field.
let queryChips = this.elements.queryInputField.querySelectorAll('.query-component');
if (queryChipElement.dataset.type === 'token-incidence-modifier') {
@ -226,13 +234,17 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
let targetChipClone = targetChipElement.cloneNode(true);
element.insertAdjacentElement('afterend', targetChipClone);
//TODO: Change to two different functions for drag and drop
this.addDragDropListeners(targetChipClone, queryChipElement);
}
}, 0);
}
handleDragEnd(event) {
// is called when a query chip is dropped. It removes the dropzones and initializes the tooltips if the dragged element is the query element target.
if (event.target.classList.contains('query-element-target')) {
M.Tooltip.init(event.target);
}
document.querySelectorAll('.drop-target').forEach(target => target.remove());
}
@ -286,24 +298,36 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
}
selectChipElement(attr) {
document.querySelectorAll('.chip.teal').forEach(element => {
if (element !== attr) {
element.classList.remove('teal', 'lighten-2');
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'add');
}
});
if (attr.classList.contains('teal')) {
return;
}
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'toggle');
attr.classList.toggle('teal');
attr.classList.toggle('lighten-5');
M.Dropdown.getInstance(document.querySelector('.dropdown-trigger[data-toggle-area="token-incidence-modifiers"]')).open();
}
tokenIncidenceModifierHandler(incidenceModifier, incidenceModifierPretty) {
unselectChipElement(attr) {
let nModalInstance = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-exactly-n-token-modal'));
let nmModalInstance = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-between-nm-token-modal'));
if (nModalInstance.isOpen || nmModalInstance.isOpen) {
return;
}
attr.classList.remove('teal', 'lighten-5');
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'add');
}
tokenIncidenceModifierHandler(incidenceModifier, incidenceModifierPretty, nOrNM = false) {
// Adds a token incidence modifier to the query input field.
let selectedChip = this.elements.queryInputField.querySelector('.chip.teal');
let selectedChipIndex = Array.from(this.elements.queryInputField.children).indexOf(selectedChip);
this.submitQueryChipElement('token-incidence-modifier', incidenceModifierPretty, incidenceModifier, selectedChipIndex+1);
this.selectChipElement(selectedChip);
let selectedChipIndex = Array.from(this.elements.queryChipElements).indexOf(selectedChip);
if (nOrNM) {
this.unselectChipElement(selectedChip);
}
this.submitQueryChipElement('token-incidence-modifier', incidenceModifierPretty, incidenceModifier, selectedChipIndex);
}
tokenNMSubmitHandler(modalId) {
@ -321,29 +345,30 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
let instance = M.Modal.getInstance(modal);
instance.close();
this.tokenIncidenceModifierHandler(input, pretty_input);
this.tokenIncidenceModifierHandler(input, pretty_input, true);
}
incidenceModifierEventListeners() {
// Eventlisteners for the incidence modifiers. There are two different types of incidence modifiers: token and character incidence modifiers.
document.querySelectorAll('.incidence-modifier-selection').forEach(button => {
let dropdownId = button.parentNode.parentNode.id;
if (dropdownId === 'corpus-analysis-concordance-token-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.tokenIncidenceModifierHandler(button.dataset.token, button.innerHTML));
} else if (dropdownId === 'corpus-analysis-concordance-character-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.characterIncidenceModifierHandler(button));
}
expertModeQueryBuilderSwitchHandler() {
let queryBuilderDisplay = document.querySelector("#corpus-analysis-concordance-query-builder-display");
let expertModeDisplay = document.querySelector("#corpus-analysis-concordance-expert-mode-display");
let expertModeSwitch = document.querySelector("#corpus-analysis-concordance-expert-mode-switch");
let submitModal = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-switch-to-query-builder-submit-modal'));
let confirmSwitchToQueryBuilderButton = document.querySelector('.switch-action[data-switch-action="confirm"]');
confirmSwitchToQueryBuilderButton.addEventListener("click", () => {
queryBuilderDisplay.classList.remove("hide");
expertModeDisplay.classList.add("hide");
this.switchToQueryBuilderParser();
});
}
nAndMInputSubmitEventListeners() {
// Eventlisteners for the submit of n- and m-values of the incidence modifier modal for "exactly n" or "between n and m".
document.querySelectorAll('.n-m-submit-button').forEach(button => {
let modalId = button.dataset.modalId;
if (modalId === 'corpus-analysis-concordance-exactly-n-token-modal' || modalId === 'corpus-analysis-concordance-between-nm-token-modal') {
button.addEventListener('click', () => this.tokenNMSubmitHandler(modalId));
} else if (modalId === 'corpus-analysis-concordance-exactly-n-character-modal' || modalId === 'corpus-analysis-concordance-between-nm-character-modal') {
button.addEventListener('click', () => this.extensions.tokenAttributeBuilderFunctions.characterNMSubmitHandler(modalId));
expertModeSwitch.addEventListener("change", () => {
const isChecked = expertModeSwitch.checked;
if (isChecked) {
queryBuilderDisplay.classList.add("hide");
expertModeDisplay.classList.remove("hide");
this.switchToExpertModeParser();
} else {
submitModal.open();
}
});
}
@ -361,15 +386,13 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
this.resetQueryInputField();
let expertModeInputFieldValue = document.querySelector('#corpus-analysis-concordance-form-query').value;
let chipElements = this.parseTextToChip(expertModeInputFieldValue);
let closingTagElements = ['end-sentence', 'end-entity'];
let editableElements = ['start-entity', 'text-annotation', 'token'];
let editableElements = ['start-entity', 'token'];
for (let chipElement of chipElements) {
let isClosingTag = closingTagElements.includes(chipElement['type']);
let isEditable = editableElements.includes(chipElement['type']);
if (chipElement['query'] === '[]'){
isEditable = false;
}
this.submitQueryChipElement(chipElement['type'], chipElement['pretty'], chipElement['query'], null, isClosingTag, isEditable);
this.submitQueryChipElement(chipElement['type'], chipElement['pretty'], chipElement['query'], null, false, isEditable);
}
}
@ -395,11 +418,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
pretty: 'Entity End',
type: 'end-entity'
},
':: ?match\\.text_[A-Za-z]+="[^"]+"': {
pretty: '',
type: 'text-annotation'
},
'\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]': {
'\\[(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?)*\\]': {
pretty: '',
type: 'token'
},
@ -452,14 +471,8 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
case ':: ?match\\.text_[A-Za-z]+="[^"]+"':
prettyText = stringElement.replace(/:: ?match\.text_|"|"/g, '');
break;
case '\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]':
let doubleQuotes = /(word|lemma|pos|simple_pos)="[^"]+"/gi;
let singleQuotes = /(word|lemma|pos|simple_pos)='[^']+'/gi;
if (doubleQuotes.exec(stringElement)) {
prettyText = stringElement.replace(/^\[|\]$|"/g, '');
} else if (singleQuotes.exec(stringElement)) {
prettyText = stringElement.replace(/^\[|\]$|'/g, '');
}
case '\\[(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?)*\\]':
prettyText = stringElement.replace(/^\[|\]$|(?<!\\)"/g, '');
prettyText = prettyText.replace(/\&/g, ' and ').replace(/\|/g, ' or ');
break;
case '(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])':

View File

@ -5,8 +5,6 @@ nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions = clas
this.structuralAttrModalEventlisteners();
document.querySelector('#corpus-analysis-concordance-text-annotation-submit').addEventListener('click', () => this.textAnnotationSubmitHandler());
this.elements.structuralAttrModal = M.Modal.init(
document.querySelector('#corpus-analysis-concordance-structural-attr-modal'),
{
@ -46,10 +44,7 @@ nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions = clas
resetStructuralAttrModal() {
this.app.resetMaterializeSelection([this.elements.englishEntTypeSelection, this.elements.germanEntTypeSelection]);
this.app.resetMaterializeSelection([this.elements.textAnnotationSelection], 'address');
this.elements.textAnnotationInput.value = '';
this.app.toggleClass(['entity-builder', 'text-annotation-builder'], 'hide', 'add');
this.app.toggleClass(['entity-builder'], 'hide', 'add');
this.toggleEditingAreaStructuralAttrModal('remove');
this.elements.editingModusOn = false;
this.elements.editedQueryChipElementIndex = undefined;
@ -64,11 +59,6 @@ nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions = clas
break;
case 'entity':
this.app.toggleClass(['entity-builder'], 'hide', 'toggle');
this.app.toggleClass(['text-annotation-builder'], 'hide', 'add');
break;
case 'meta-data':
this.app.toggleClass(['text-annotation-builder'], 'hide', 'toggle');
this.app.toggleClass(['entity-builder'], 'hide', 'add');
break;
default:
break;
@ -77,29 +67,7 @@ nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions = clas
toggleEditingAreaStructuralAttrModal(action) {
// If the user edits a query chip element, the corresponding editing area is displayed and the other areas are hidden or disabled.
this.app.toggleClass(['sentence-button', 'entity-button', 'text-annotation-button', 'any-type-entity-button'], 'disabled', action);
}
textAnnotationSubmitHandler() {
let noValueMetadataMessage = document.querySelector('#corpus-analysis-concordance-no-value-metadata-message');
let textAnnotationSubmit = document.querySelector('#corpus-analysis-concordance-text-annotation-submit');
let textAnnotationInput = document.querySelector('#corpus-analysis-concordance-text-annotation-input');
let textAnnotationOptions = document.querySelector('#corpus-analysis-concordance-text-annotation-options');
if (textAnnotationInput.value === '') {
textAnnotationSubmit.classList.add('red');
noValueMetadataMessage.classList.remove('hide');
setTimeout(() => {
textAnnotationSubmit.classList.remove('red');
}, 500);
setTimeout(() => {
noValueMetadataMessage.classList.add('hide');
}, 3000);
} else {
let queryText = `:: match.text_${textAnnotationOptions.value}="${textAnnotationInput.value}"`;
this.app.submitQueryChipElement('text-annotation', `${textAnnotationOptions.value}=${textAnnotationInput.value}`, queryText, null, false, true);
this.elements.structuralAttrModal.close();
}
this.app.toggleClass(['sentence-button', 'entity-button', 'any-type-entity-button'], 'disabled', action);
}
editStartEntityChipElement(queryChipElement) {
@ -111,15 +79,4 @@ nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions = clas
let selection = isEnglishEntType ? this.elements.englishEntTypeSelection : this.elements.germanEntTypeSelection;
this.app.resetMaterializeSelection([selection], entType);
}
editTextAnnotationChipElement(queryChipElement) {
this.elements.structuralAttrModal.open();
this.app.toggleClass(['text-annotation-builder'], 'hide', 'remove');
this.toggleEditingAreaStructuralAttrModal('add');
let [textAnnotationSelection, textAnnotationContent] = queryChipElement.dataset.query
.replace(/:: ?match\.text_|"|"/g, '')
.split('=');
this.app.resetMaterializeSelection([this.elements.textAnnotationSelection], textAnnotationSelection);
this.elements.textAnnotationInput.value = textAnnotationContent;
}
}

View File

@ -52,14 +52,14 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
switch (elem) {
case 'option-group':
input.value += '(option1|option2)';
this.cursorPositionInputfieldHandler(input, '(option1|option2)');
let firstIndex = input.value.indexOf('option1');
let lastIndex = firstIndex + 'option1'.length;
input.focus();
input.setSelectionRange(firstIndex, lastIndex);
break;
case 'wildcard-char':
input.value += '.';
this.cursorPositionInputfieldHandler(input, '.');
input.focus();
break;
case 'and':
this.conditionHandler('and');
@ -73,9 +73,19 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
this.optionToggleHandler();
}
cursorPositionInputfieldHandler(input, addedInput) {
let cursorPosition = input.selectionStart;
let textBeforeCursor = input.value.substring(0, cursorPosition);
let textAfterCursor = input.value.substring(cursorPosition);
let newInputValue = textBeforeCursor + addedInput + textAfterCursor;
input.value = newInputValue;
let newCursorPosition = cursorPosition + addedInput.length;
input.setSelectionRange(newCursorPosition, newCursorPosition);
}
characterIncidenceModifierHandler(elem) {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
input.value += elem.dataset.token;
this.cursorPositionInputfieldHandler(input, elem.dataset.token);
}
characterNMSubmitHandler(modalId) {
@ -83,12 +93,12 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
let input_n = modal.querySelector('.n-m-input[data-value-type="n"]').value;
let input_m = modal.querySelector('.n-m-input[data-value-type="m"]') || undefined;
input_m = input_m !== undefined ? ',' + input_m.value : '';
let input = `${input_n}${input_m}`;
let addedInput = `${input_n}${input_m}`;
let instance = M.Modal.getInstance(modal);
instance.close();
let tokenInput = this.tokenInputCheck(this.elements.tokenBuilderContent);
tokenInput.value += '{' + input + '}';
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
this.cursorPositionInputfieldHandler(input, `{${addedInput}}`);
}
conditionHandler(conditionText) {
@ -102,36 +112,12 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
notSelectedButton.parentNode.removeChild(notSelectedButton);
this.elements.tokenQuery.appendChild(tokenQueryTemplateClone);
// Deleting the options which do not make sense in the context of the condition like "word" AND "word". Also sets selection default.
let selectionDefault = "word";
let optionDeleteList = ['empty-token'];
if (conditionText === 'and') {
switch (this.elements.positionalAttrSelection.value) {
case 'english-pos' || 'german-pos':
optionDeleteList.push('english-pos', 'german-pos');
break;
default:
optionDeleteList.push(this.elements.positionalAttrSelection.value);
break;
}
} else {
let originalSelectionList =
`
<option value="word" selected>word</option>
<option value="lemma" >lemma</option>
<option value="english-pos">english pos</option>
<option value="german-pos">german pos</option>
<option value="simple_pos">simple_pos</option>
`;
this.elements.positionalAttrSelection.innerHTML = originalSelectionList;
M.FormSelect.init(this.elements.positionalAttrSelection);
}
let lastTokenQueryRow = this.elements.tokenQuery.lastElementChild;
if(lastTokenQueryRow.querySelector('[data-kind-of-token="word"]') || lastTokenQueryRow.querySelector('[data-kind-of-token="lemma"]')) {
this.appendIgnoreCaseCheckbox(lastTokenQueryRow.querySelector('.token-query-template-content'), this.elements.ignoreCaseCheckbox.checked);
}
this.elements.ignoreCaseCheckbox.checked = false;
this.setTokenSelection(selectionDefault, optionDeleteList);
this.setTokenSelection();
}
deleteTokenQueryRow(deleteButton) {
@ -160,7 +146,7 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
this.elements.positionalAttrSelection.appendChild(option);
}
appendIgnoreCaseCheckbox(parentElement, checked = false) {
appendIgnoreCaseCheckbox(parentElement, checked=false) {
let ignoreCaseCheckboxClone = document.querySelector('#ignore-case-checkbox-template').content.cloneNode(true);
parentElement.appendChild(ignoreCaseCheckboxClone);
M.Tooltip.init(parentElement.querySelectorAll('.tooltipped'));
@ -169,7 +155,7 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
}
}
setTokenSelection(selection, optionDeleteList) {
setTokenSelection(selection="word", optionDeleteList=['empty-token']) {
optionDeleteList.forEach(option => {
if (this.elements.positionalAttrSelection.querySelector(`option[value=${option}]`) !== null) {
this.elements.positionalAttrSelection.querySelector(`option[value=${option}]`).remove();
@ -309,8 +295,8 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
}
prepareTokenQueryElementsContent(queryChipElement) {
//this regex searches for word or lemma or pos or simple_pos="any string within single or double quotes" followed by one or no ignore case markers, followed by one or no condition characters.
let regex = new RegExp('(word|lemma|pos|simple_pos)=(("[^"]+")|(\\\\u0027[^\\\\u0027]+\\\\u0027)) ?(%c)? ?(\\&|\\|)?', 'gm');
//this regex searches for word or lemma or pos or simple_pos="any string (also quotation marks escaped by backslash) within double quotes" followed by one or no ignore case markers, followed by one or no condition characters.
let regex = new RegExp('(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?(\\&|\\|)?', 'gm');
let m;
let queryElementsContent = [];
while ((m = regex.exec(queryChipElement.dataset.query)) !== null) {
@ -323,7 +309,7 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
if (tokenAttr === 'pos') {
tokenAttr = 'english-pos';
}
let tokenValue = m[2].replace(/"|'/g, '');
let tokenValue = m[2].replace(/(?<!\\)"/g, '');
let ignoreCase = false;
let condition = undefined;
m.forEach((match) => {

View File

@ -0,0 +1,34 @@
<h3 class="manual-chapter-title">Introduction</h3>
<h4>Introduction</h4>
<p>
Nopaque is a web application that offers different services and tools to support
researchers working with image and text-based data. These services are logically
connected and build upon each other. They include:
</p>
<ol style="list-style-type:disc; margin-left:2em; padding-bottom:0;">
<li><b>File setup</b>, which converts and merges different data (e.g., books, letters)
for further processing.</li>
<li><b>Image-to-text conversion tools:</b></li>
<ol style="list-style-type:circle; margin-left:1em; padding-bottom:0;"><li><b>Optical Character Recognition</b> converts photos and
scans into text data, making them machine-readable.</li>
<li><b>Transkribus HTR (Handwritten Text Recognition) Pipeline</b> (currently deactivated)*
also converts images into text data, making them machine-readable.</li>
</ol>
<li><b>Natural Language Processing</b> extracts information from your text via
computational linguistic data processing (tokenization, lemmatization, part-of-speech
tagging and named-entity recognition.</li>
<li><b>Corpus analysis</b> makes use of CQP Query Language to search through text
corpora with the aid of metadata and Natural Language Processing tags.</li>
</ol>
Nopaque also features a <b>Social Area</b>, where researchers can create a personal profile, connect with other users and share corpora if desired.
These services can be accessed from the sidebar in nopaque.
All processes are implemented in a specially provided cloud environment with established open-source software.
This always ensures that no personal data of the users is disclosed.
<p>
*Note: the Transkribus HTR Pipeline is currently
deactivated; we are working on an alternative solution. You can try using Tesseract OCR,
though the results will likely be poor.
</p>

View File

@ -0,0 +1,104 @@
<h3 class="manual-chapter-title">Getting Started</h3>
<h4>Getting Started</h4>
<p>
In this section, we will take you through all the steps you need to start analyzing your data with nopaque.
</p>
<div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;">
<h5>Content</h5>
<ol style="list-style-type:disc">
<li><a href="#registration-and-login">Registration and login</a></li>
<li><a href="#preparing-files">Preparing files for analysis</a></li>
<li><a href="#converting-a-pdf-into-text">Converting a PDF into text data</a></li>
<li><a href="#extracting-linguistic-data">Extracting linguistic data from text</a></li>
<li><a href="#creating-a-corpus">Creating a corpus</a></li>
<li><a href="#analyzing-a-corpus">Analyzing a corpus</a></li>
</ol>
</div>
<p></p>
<h5 id="registration-and-login">Registration and login</h5>
<p>Before you can begin using nopaque, you will need to create a personal user account.
Open the menu (three dots) at the top right of the screen and choose “Register.” Enter
the required details listed on the registration page (username, password, email address).
After verifying your account via the link sent to your email, you can log in.</p>
<h5 id="preparing-files">Preparing files for analysis</h5>
<p>A few steps need to be taken before images, scans, or other text data are ready for
analysis in nopaque. The SpaCy NLP Pipeline service can only extract linguistic data
from texts in plain text (.txt) format. If your text is already in this format, you
can skip the next steps and go directly to <b>Extracting linguistic data from text</b>.
Otherwise, the next steps assume that you are starting off with image data.</p>
<p>
First, all data needs to be converted into PDF format. Using the <b>File Setup</b> service,
you can bundle images together even of different formats and convert them all into
one PDF file. Note that the File Setup service will sort the images based on their file
name in ascending order. It is thus recommended to name them accordingly, for example:
page-01.png, page-02.jpg, page-03.tiff.
</p>
<p>
Add a title and description to your job and select the File Setup version* you want to use.
After uploading the images and completing the File Setup job, the list of files added
can be seen under “Inputs.” Further below, under “Results,” you can find and download
the PDF output.</p>
<h5 id="converting-a-pdf-into-text">Converting a PDF into text data</h5>
<p>Select an image-to-text conversion tool depending on whether your PDF is primarily
composed of handwritten text or printed text. For printed text, select the <b>Tesseract OCR
Pipeline</b>. For handwritten text, select the <b>Transkribus HTR Pipeline</b>. Select the desired
language model or upload your own. Select the version* of Tesseract OCR you want to use
and click on submit to start the conversion. When the job is finished, various output
files can be seen and downloaded further below, under “Results.” You may want to review
the text output for errors and coherence. (Note: the Transkribus HTR Pipeline is currently
deactivated; we are working on an alternative solution. You can try using Tesseract OCR,
though the results will likely be poor.)
</p>
<h5 id="extracting-linguistic-data">Extracting linguistic data from text</h5>
<p>The <b>SpaCy NLP Pipeline</b> service extracts linguistic information from plain text files
(in .txt format). Select the corresponding .txt file, the language model, and the
version* you want to use. When the job is finished, find and download the files in
<b>.json</b> and <b>.vrt</b> format under “Results.”</p>
<h5 id="creating-a-corpus">Creating a corpus</h5>
<p>Now, using the files in .vrt format, you can create a corpus. This can be done
in the <a href="{{ url_for('main.dashboard') }}">Dashboard</a> or
<a href="{{ url_for('services.corpus_analysis') }}">Corpus Analysis</a> sections under “My Corpora.” Click on “Create corpus”
and add a title and description for your corpus. After submitting, you will automatically
be taken to the corpus overview page (which can be called up again via the corpus lists)
of your new, still empty corpus. </p>
<p>
Further down in the “Corpus files” section, you can add texts in .vrt format
(results of the NLP service) to your new corpus. To do this, use the "Add Corpus File"
button and fill in the form that appears. Here, you can add
metadata to each text. After adding all texts to the corpus, it must
be prepared for analysis. This process can be initiated by clicking on the
"Build" button under "Actions".
On the corpus overview page, you can see information about the current status of
the corpus in the upper right corner. After the build process, the status "built" should be shown here.
Now, your corpus is ready for analysis.</p>
<h5 id="analyzing-a-corpus">Analyzing a corpus</h5>
<p>Navigate to the corpus you would like to analyze and click on the Analyze button.
This will take you to an analysis overview page for your corpus. Here, you can find a
visualization of general linguistic information of your corpus, including tokens,
sentences, unique words, unique lemmas, unique parts of speech and unique simple parts
of speech. You will also find a pie chart of the proportional textual makeup of your
corpus and can view the linguistic information for each individual text file. A more
detailed visualization of token frequencies with a search option is also on this page.</p>
<p>From the corpus analysis overview page, you can navigate to other analysis modules:
the <b>Query Builder</b> (under <b>Concordance</b>) and the <b>Reader</b>. With the Reader, you can read
your corpus texts tokenized with the associated linguistic information. The tokens can
be shown as lemmas, parts of speech, words, and can be displayed in different ways:
visually as plain text with the option of highlighted entities or as chips.</p>
<p>The <b>Concordance</b> module allows for more specific, query-oriented text analyses.
Here, you can filter out text parameters and structural attributes in different
combinations. This is explained in more detail in the Query Builder section of the
manual.</p>
<br>
<br>
*For all services, it is recommended to use the latest version unless you need a model
only available in an earlier version or are looking to reproduce data that was originally generated
using an older version.

View File

@ -1,15 +1,22 @@
<h3 class="manual-chapter-title">Dashboard</h3>
<h4>About the dashboard</h4>
<br>
<div class="row">
<div class="col s12 m4">
<img alt="Dashboard" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/dashboard.png') }}">
</div>
<div class="col s12 m8">
<p>
The <a href="{{ url_for('main.dashboard') }}">dashboard</a> provides a central overview of all resources assigned to the
user. These are <a href="{{ url_for('main.dashboard', _anchor='corpora') }}">corpora</a> and created <a href="{{ url_for('main.dashboard', _anchor='jobs') }}">jobs</a>. Corpora are freely composable
annotated text collections and jobs are the initiated file processing
procedures. Both the job and the corpus listings can be searched using
the search field displayed above them.
The <a href="{{ url_for('main.dashboard') }}">dashboard</a> provides a central
overview of all user-specific resources.
These are <a href="{{ url_for('main.dashboard', _anchor='corpora') }}">corpora</a>,
created <a href="{{ url_for('main.dashboard', _anchor='jobs') }}">jobs</a>, and
model <a href="{{ url_for('main.dashboard', _anchor='contributions') }}"">contributions</a>.
A <b>corpus</b> is a freely composable annotated text collection.
A <b>job</b> is an initiated file processing procedure.
A <b>model</b> is a mathematical system for pattern recognition based on data examples that have been processed by AI. One can search for jobs as
well as corpus listings using the search field displayed above them on the dashboard.
Uploaded models can be found and edited by clicking on the corresponding service under <b>My Contributions</b>.
</p>
</div>
<div class="col s12">&nbsp;</div>
@ -20,10 +27,10 @@
<p>
A corpus is a collection of texts that can be analyzed using the
Corpus Analysis service. All texts must be in the verticalized text
file format, which can be obtained via the Natrual Language
Processing service. It contains, in addition to the actual text,
file format, which can be obtained via the Natural Language
Processing service. It contains, in addition to the text,
further annotations that are searchable in combination with optional
addable metadata during your analysis.
metadata that can be added during your analysis.
</p>
</div>
</div>

View File

@ -0,0 +1,107 @@
<h3 class="manual-chapter-title">Services</h5>
<h4>Services</h4>
<p>
In this section, we will describe the different services nopaque has to offer.
</p>
<div class="row">
<div class="col s12 m4">
<img alt="Services" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/services.png') }}">
</div>
<div class="col s12 m8">
<p>
Nopaque was designed to be modular. Its modules are implemented in
self-contained <b>services</b>, each of which represents a step in the
workflow. The typical workflow involves using services one after another,
consecutively.
The typical workflow order can be taken from the listing of the
services in the left sidebar or from the nopaque manual (accessible via the pink
button in the upper right corner).
The services can also be applied at different starting and ending points,
which allows you to conduct your work flexibly.
All services are versioned, so the data generated with nopaque is always
reproducible.
<p>For all services, it is recommended to use the latest version (selected
in the drop-down menu on the service page) unless you need a model
only available in an earlier version or are looking to reproduce data that was originally generated
using an older version.</p>
</p>
</div>
</div>
<h4>File Setup</h4>
<p>
The <a href="{{ url_for('services.file_setup_pipeline') }}">File Setup Service</a> bundles image data, such as scans and photos,
together in a handy PDF file. To use this service, use the job form to
select the images to be bundled, choose the desired service version, and
specify a title and description.
Note that the File Setup service will sort the images based on their file name in
ascending order. It is thus important and highly recommended to name
them accordingly, for example:
page-01.png, page-02.jpg, page-03.tiff. Generally, you can assume
that the images will be sorted in the order in which the file explorer of
your operating system lists them when you view the files in a folder
sorted in ascending order by file name.
</p>
<h4>Optical Character Recognition (OCR)</h4>
<p>
The <a href="{{ url_for('services.tesseract_ocr_pipeline') }}">Tesseract OCR Pipeline</a>
converts image data - like photos and scans - into text data, making them machine-readable.
This step enables you to proceed with the computational analysis of your documents.
To use this service, use the job form to select the file you want to convert into text data.
Then, choose the language model and service version you would like to use. Enter a title and description for your file and then
submit your job. Once the job is finished, the results can be found and downloaded further below on the page, under
the section labeled "Inputs."
</p>
<h4>Handwritten Text Recognition (HTR)</h4>
<p>The Transkribus HTR Pipeline is currently
deactivated. We are working on an alternative solution. In the meantime, you can
try using Tesseract OCR, though the results will likely be poor.</p>
<h4>Natural Language Processing (NLP)</h4>
<p>The <a href="{{ url_for('services.spacy_nlp_pipeline') }}">SpaCy NLP Pipeline</a> extracts
information from plain text files (.txt format) via computational linguistic data processing
(tokenization, lemmatization, part-of-speech tagging and named-entity recognition).
To use this service, select the .txt file that you want to extract this information from.
Then select the language model and the version you want to use. Once the job is finished, you can find and download the files in
<b>.json</b> and <b>.vrt</b> format under the section labeled “Results.”</p>
<h4>Corpus Analysis</h4>
<p>
With the <a href="{{ url_for('services.corpus_analysis') }}">Corpus Analysis</a>
service, it is possible to create a text corpus
and then explore through it with analytical tools. The analysis session is realized
on the server side by the Open Corpus Workbench software, which enables
efficient and complex searches with the help of the CQP Query Language.</p>
<p>
To use this service, navigate to the corpus you would like to analyze and click on the Analyze button.
This will take you to an analysis overview page for your corpus. Here, you can find
a visualization of general linguistic information of your corpus, including tokens,
sentences, unique words, unique lemmas, unique parts of speech and unique simple
parts of speech. You will also find a pie chart of the proportional textual makeup
of your corpus and can view the linguistic information for each individual text file.
A more detailed visualization of token frequencies with a search option is also on
this page.
</p>
<p>
From the corpus analysis overview page, you can navigate to other analysis modules:
the Query Builder (under Concordance) and the Reader.
</p>
<p>
With the <b>Reader</b>, you can read your corpus texts tokenized with the associated linguistic information. The tokens
can be shown as lemmas, parts of speech, words, and can be displayed in different
ways: visually as plain text with the option of highlighted entities or as chips.
</p>
<p>
The Concordance module allows for more specific, query-oriented text analyses.
Here, you can filter out text parameters and structural attributes in different
combinations. This is explained in more detail in the <b>Query Builder</b> section of the
manual.
</p>
</p>

View File

@ -7,7 +7,7 @@
<div class="col s12 m8">
<p>
To <a href="{{ url_for('corpora.create_corpus') }}">create a corpus</a>, you
can use the "New Corpus" button, which can be found on both, the Corpus
can use the "New Corpus" button, which can be found on both the Corpus
Analysis Service page and the Dashboard below the corpus list. Fill in the input
mask to Create a corpus. After you have completed the input mask, you will
be automatically taken to the corpus overview page (which can be called up
@ -43,5 +43,5 @@
the way of how a token is displayed, by using the text style switch. The
concordance module offers some more options regarding the context size of
search results. If the context does not provide enough information you can
hop into the reader module by using the lupe icon next to a match.
hop into the reader module by using the magnifier icon next to a match.
</p>

View File

@ -1,5 +1,22 @@
<h3 class="manual-chapter-title">CQP Query Language</h3>
<p>Within the Corpus Query Language, a distinction is made between two types of annotations: positional attributes and structural attributes. Positional attributes refer to a token, e.g. the word "book" is assigned the part-of-speech tag "NN", the lemma "book" and the simplified part-of-speech tag "NOUN" within the token structure. Structural attributes refer to text structure-giving elements such as sentence and entity markup. For example, the markup of a sentence is represented in the background as follows:</p>
<h4 id="cqp-query-language">CQP Query Language</h4>
<p>In this section, we will provide some functional explanations of the properties of the Corpus Query Language. This includes
the types of linguistic attributes one can work with and how to use them in your query.</p>
<div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;">
<h5>Content</h5>
<ol style="list-style-type:disc">
<li><a href="#overview-annotations">Overview of annotation types</a></li>
<li><a href="#positional-attributes">Positional attributes</a></li>
<li><a href="#searching-positional-attributes">How to search for positional attributes</a></li>
<li><a href="#structural-attributes">Structural attributes</a></li>
<li><a href="#searching-structural-attributes">How to search for structural attributes</a></li>
</ol>
</div>
<h4 id="overview-annotations">Overview of annotation types</h4>
<p>Within the Corpus Query Language, a distinction is made between two types of annotations: <b>positional attributes</b> and <b>structural attributes</b>. Positional attributes refer to a token, e.g. the word "book" is assigned the part-of-speech tag "NN", the lemma "book" and the simplified part-of-speech tag "NOUN" within the token structure. Structural attributes refer to text structure-giving elements such as sentence and entity markup. For example, the markup of a sentence is represented in the background as follows:</p>
<pre>
<code>
<span class="green-text">&lt;s&gt; structural attribute</span>
@ -13,7 +30,7 @@
</code>
</pre>
<h4>Positional attributes</h4>
<h4 id="positional-attributes">Positional attributes</h4>
<p>Before you can start searching for positional attributes (also called tokens), it is necessary to know what properties they contain.</p>
<ol>
<li><span class="blue-text"><b>word</b></span>: The string as it is also found in the original text</li>
@ -33,7 +50,7 @@
</li>
</ol>
<h5>Searching for positional attributes</h5>
<h5 id="searching-positional-attributes">How to search for positional attributes</h5>
<div>
<p>
<b>Token with no condition on any property (also called <span class="blue-text">wildcard token</span>)</b><br>
@ -118,7 +135,7 @@
<pre style="margin-top: 0;" ><code> ^ ^ the braces indicate the start and end of an option group</code></pre>
</div>
<h4>Structural attributes</h4>
<h4 id="structural-attributes">Structural attributes</h4>
<p>nopaque provides several structural attributes for query. A distinction is made between attributes with and without value.</p>
<ol>
<li><span class="green-text"><b>s</b></span>: Annotates a sentence</li>
@ -153,7 +170,7 @@
</li>
</ol>
<h5>Searching for structural attributes</h5>
<h5 id="searching-structural-attributes">How to search for structural attributes</h5>
<pre><code>&lt;ent&gt; [] &lt;/ent&gt;; A one token long entity of any type</code></pre>
<pre><code>&lt;ent_type="PERSON"&gt; [] &lt;/ent_type&gt;; A one token long entity of type PERSON</code></pre>
<pre><code>&lt;ent_type="PERSON"&gt; []* &lt;/ent_type&gt;; Entity of any length of type PERSON</code></pre>

View File

@ -1,26 +1,46 @@
<h3 class="manual-chapter-title">Query Builder Tutorial</h3>
<p>The query builder helps you to make a query in the form of the Corpus Query
Language (CQL) to your text. You can use the CQL to filter out various types of
text parameters, for example, a specific word, a lemma, or you can set part-of-speech
tags (pos) that indicate the type of word you are looking for (a noun, an
adjective, etc.). In addition, you can also search for structural attributes,
or specify your query for a token (word, lemma, pos) via entity typing. And of
course everything can be combined. You can find examples for different queries
under the tab "Examples".</p>
<p></p>
<br>
<h4>Query Builder</h4>
<p>In this section, we will provide you with more detailed instructions on how to use the Query Builder -
nopaque's main user-friendly tool for finding and analyzing different linguistic elements of your texts.</p>
<div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;">
<h5>Content</h5>
<ol style="list-style-type:disc">
<li><a href="#add-new-token-tutorial">Add new token to your query</a></li>
<li><a href="#edit-options-tutorial">Options to edit your query</a></li>
<li><a href="#add-structural-attribute-tutorial">Add structural Attributes to your query</a></li>
<li><a href="#general-overview">General Overview</a></li>
<li><a href="#add-new-token-tutorial">Add a new token to your query</a></li>
<li><a href="#edit-options-tutorial">Options for editing your query</a></li>
<li><a href="#add-structural-attribute-tutorial">Add structural attributes to your query</a></li>
<li><a href="#general-options-query-builder">General options</a></li>
</ol>
</div>
<h4 id="general-overview">General Overview</h4>
<p>The Query Builder can be accessed via <a href=" {{ url_for('main.dashboard') }}">My Corpora</a> or <a href=" {{ url_for('services.corpus_analysis') }}">Corpus Analysis</a> in the sidebar options.
Click on the corpus you wish to analyze. You will be sent to its corpus overview page.
Here, click on <b>Analyze</b> to reach the analysis page.
The analysis page features different options for analyzing your corpus, including
visualizations and a <b>Reader</b> module. In this case, we want to open the query builder.
To do so, click on the <b>Concordance</b> button on the top of the page.</p>
<p>The query builder uses the <b>Corpus Query Language (CQL)</b> to help you make a query for analyzing your texts.
In this way, it is possible to filter out various types of text parameters, for
example, a specific word, a lemma, or you can set part-of-speech
tags (pos) that indicate the type of word you are looking for (a noun, an
adjective, etc.). In addition, you can also search for structural attributes,
or specify your query for a token (word, lemma, pos) via entity typing. And of
course, the different text parameters can be combined.</p>
<p>Tokens and structural attributes can be added by clicking on the <b>"+"</b> button
(what we call the "input marker") in the input field or the labeled buttons below it. Elements
added are shown as chips. These can be reorganized using drag and drop. The input
marker can also be moved in this way. Its position shows where new elements will be added. <br>
A "translation" of your query into Corpus Query Language (CQL) will be displayed underneath the query field.</p>
<p>For more information, see our <b>manual section for the Corpus Query Language.</b>
<br>
Advanced users can make direct use of CQL by switching to <b>expert mode</b> via the toggle button.
</p>
<p>The entire input field can be cleared using the red trash icon on the right.</p>
<br>
{# Add Token Tutorial #}
<div>
<hr>
@ -29,8 +49,8 @@ under the tab "Examples".</p>
<h4 id="add-new-token-tutorial">Add new token to your Query</h4>
<p>If you are only looking for a specific token, you can click on the left
button and select the type of token you are looking for from the drop-down menu.
By default "Word" is selected. </p>
"Word" is selected by default. </p>
<h5>Word and Lemma</h5>
<p>If you want to search for a specific word or lemma and the respective
category is selected in the drop-down menu, you can type in the word or lemma
@ -46,13 +66,13 @@ under the tab "Examples".</p>
"simple_pos" to search for different parts-of-speech. You can find an overview
of all tags under the "Tagsets" tab.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/pos.gif') }}" alt="part-of-speech-tag explanation" width="100%;" style="margin-bottom:20px;">
<br>
<h5>Empty Token</h5>
<p>Here you can search for an empty token. This selection should never stand
alone and should always be extended with an incidence modifier or stand in a
<p>Here you can search for a token with unspecified attributes (also called wildcard token). This
selection should never stand alone and should always be extended with an incidence modifier or stand in a
larger query, because otherwise all possible tokens would be searched for and
the program would crash.</p>
<p></p>
<br>
</div>
@ -61,8 +81,8 @@ under the tab "Examples".</p>
<hr>
<p></p>
<br>
<h4 id="edit-options-tutorial">Options to edit your token</h4>
<p>You have the possibility to extend or specify your searched token with
<h4 id="edit-options-tutorial">Options for editing your query</h4>
<p>You have the possibility to extend or specify the token you are searching for with
certain factors. For this the query builder offers some fixed options. You can
find more information about the options in the Corpus Query Language Tutorial.</p>
<br>
@ -76,7 +96,6 @@ under the tab "Examples".</p>
variants are not limited, so you can manually enter more options in the same
format. "Option1" and "option2" must be replaced accordingly. </p>
<img src="{{ url_for('static', filename='images/manual/query_builder/option_group.gif') }}" alt="option group explanation" width="100%;" style="margin-bottom:20px;">
<p></p>
<br>
<h5>Incidence Modifiers</h5>
@ -85,7 +104,7 @@ under the tab "Examples".</p>
not at all or once: <br>
[word = "is"] [word="it"] [word="your"] [word="litte"]? [word = "dog"] <br>
Here the word "little" should occur either once or not at all. With
[word="dogs?"] the search is for "dog "or "dogs". </p>
[word="dogs?"] the search is for "dog "or "dogs".</p>
<br>
<h5>Ignore Case</h5>
@ -101,7 +120,10 @@ under the tab "Examples".</p>
this case. For this you can simply string them together: <br>
[word="I"] [word="will" & simple_pos="VERB"] [word="go"].</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/or_and.gif') }}" alt="OR/AND explanation" width="100%;" style="margin-bottom:20px;">
<p></p>
<p>Tokens that have already been added can also be modified by clicking on the corresponding
pen icon. Click on the "ignore case" box, for example, and the query builder will
not differentiate between upper- and lower- case letters for that respective token.
New conditions added apply to the most recent token information.</p>
<br>
</div>
@ -120,26 +142,33 @@ under the tab "Examples".</p>
This search can of course be specified if you search for particular tokens or
entities between the sentence tags (<s></s>). For example, you can search for
sentences that contain only a noun, verb, and adjective. <br>
After clicking on Sentence you will see a <div class="chip" style="background-color:#FD9720;">Sentence Start</div>.
When you are done with your query or the content
between the Sentence tags, you have to click the Sentence button one more time
to close it. The corresponding button is called
<div class="chip" style="background-color:#FD9720;">Sentence End</div>.<br>
Click on Sentence to add the sentence chips: <div class="chip" style="background-color:#FD9720;">Sentence Start</div>
and <div class="chip" style="background-color:#FD9720;">Sentence End</div>.
These mark where the sentence starts and ends. Use drag-and-drop to place them accordingly. When
the Sentence attribute is added, the input marker will automatically be
moved between the sentence chips. Use drag-and-drop as needed to continue your query
at a different position.
<br>
<h5>Entities</h5>
<p>With entities, i.e. units of meaning, you search for text sections that
follow a certain code. For example, persons, dates, certain events. You can
select the codes using the drop-down menus. You can find an explanation of
the respective abbreviations under the tab "Tagsets". <br>
<p>With entities, i.e. units of meaning, you can search for text sections that
contain more specific information, for example, persons, dates, or events. The
codes for these categories can be selected using the drop-down menus. You can find an explanation of
these abbreviations under the tab "Tagsets". <br>
You can also search for unspecified entities by selecting "Add entity of any type".</p>
To close the entity query you started, you have to click the entity button one more time. This will make the <div class="chip" style="background-color:#A6E22D;">Entity End</div> element appear in your query.
Click on the Entity button to add the entity chips <div class="chip" style="background-color:#A6E22D;">Entity Type=</div> and <div class="chip" style="background-color:#A6E22D;">Entity End</div>.
<p>The entity type can be changed by clicking on the pen symbol on the chip. When
the Entity attribute is added, the input marker will automatically be
moved between the entity chips. Use drag-and-drop as needed to continue your query
at a different position.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/entity.gif') }}" alt="entity explanation" width="100%;" style="margin-bottom:20px;">
<p></p>
<br>
<h5>Meta Data</h5>
<p>With the meta data you can annotate your text and add specific conditions.
<h5>Meta Data (currently unavailable)</h5>
<p>The meta data function is being worked on and cannot currently be used!
<br>
With the meta data you can annotate your text and add specific conditions.
You can select a category on the left and enter your desired value on the right.
The selected metadata will apply to your entire request and will be added at the end.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/meta_data.gif') }}" alt="meta data explanation" width="100%;" style="margin-bottom:20px;">
@ -155,14 +184,39 @@ under the tab "Examples".</p>
<br>
<h4 id="general-options-query-builder">General Options of the query builder</h4>
<p>You have several options to edit your query after adding it to the preview.</p>
<br>
<h5>Editing the elements</h5>
<p>You can edit your query chips by clicking on the pen icon.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/editing_chips.gif') }}" alt="editing explanation" width="100%;" style="margin-bottom:20px;">
<br>
<h5>Deleting the elements</h5>
<p>You can delete the added elements from the query by clicking the X behind the respective content.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/delete.gif') }}" alt="delete explanation" width="100%;" style="margin-bottom:20px;">
<br>
<h5>Move the elements of your query</h5>
<p>You can drag and drop elements to customize your query.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/drag_and_drop.gif') }}" alt="Drag&Drop explanation" width="100%;" style="margin-bottom:20px;">
<br>
<h5>Setting an incidence modifier</h5>
<p>With the incidence modifier option, you can specify the amount of
times a token should appear in your query. This is particularly relevant for empty
tokens (tokens with unspecified attributes). Click on a token (blue chip) and
select the desired option from the list to add an incidence modifier. To
close the list without adding anything, click on the token again.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/incidence_modifier.gif') }}" alt="incidence modifier explanation" width="100%;" style="margin-bottom:20px;">
<br>
<h5>Switching between Query Builder and Expert mode</h5>
<p>To work with the plain Corpus Query Language instead of using the Query Builder, click on the "expert mode"
switch. Your query can be entered into the input field. All elements previously added will be carried over
into expert mode. Click on the switch again to switch back to the Query Builder if desired. All recognized elements
will be parsed into chips; those not recognized will be deleted from the query.</p>
<img src="{{ url_for('static', filename='images/manual/query_builder/expert_mode.gif') }}" alt="expert mode explanation" width="100%;" style="margin-bottom:20px;">
</div>

View File

@ -3,45 +3,46 @@
<h2>Manual</h2>
<ul class="tabs" id="manual-modal-toc">
<li class="tab"><a href="#manual-modal-introduction">Introduction</a></li>
<li class="tab"><a href="#manual-modal-registration-and-log-in">Registration and Log in</a></li>
<li class="tab"><a href="#manual-modal-getting-started">Getting Started</a></li>
<li class="tab"><a href="#manual-modal-dashboard">Dashboard</a></li>
<li class="tab"><a href="#manual-modal-services">Services</a></li>
<li class="tab"><a href="#manual-modal-a-closer-look-at-the-corpus-analysis">A closer look at the Corpus Analysis</a></li>
<li class="tab"><a href="#manual-modal-cqp-query-language">CQP Query Language</a></li>
<!-- <li class="tab"><a href="#manual-modal-a-closer-look-at-the-corpus-analysis">A closer look at the Corpus Analysis</a></li> -->
<li class="tab"><a href="#manual-modal-query-builder">Query Builder</a></li>
<li class="tab"><a href="#manual-modal-cqp-query-language">CQP Query Language</a></li>
<li class="tab"><a href="#manual-modal-tagsets">Tagsets</a></li>
</ul>
<div id="manual-modal-introduction">
<br>
{% include "main/_manual_modal/_01_introduction.html.j2" %}
{% include "_base/_modals/_manual/01_introduction.html.j2" %}
</div>
<div id="manual-modal-registration-and-log-in">
<div id="manual-modal-getting-started">
<br>
{% include "main/_manual_modal/_02_registration_and_log_in.html.j2" %}
{% include "_base/_modals/_manual/02_getting_started.html.j2" %}
</div>
<div id="manual-modal-dashboard">
<br>
{% include "main/_manual_modal/_03_dashboard.html.j2" %}
{% include "_base/_modals/_manual/03_dashboard.html.j2" %}
</div>
<div id="manual-modal-services">
<br>
{% include "main/_manual_modal/_06_services.html.j2" %}
{% include "_base/_modals/_manual/06_services.html.j2" %}
</div>
<div id="manual-modal-a-closer-look-at-the-corpus-analysis">
<!-- <div id="manual-modal-a-closer-look-at-the-corpus-analysis">
<br>
{% include "main/_manual_modal/_07_a_closer_look_at_the_corpus_analysis.html.j2" %}
</div>
{% include "_base/_modals/_manual/07_a_closer_look_at_the_corpus_analysis.html.j2" %}
</div> -->
<div id="manual-modal-cqp-query-language">
<br>
{% include "main/_manual_modal/_08_cqp_query_language.html.j2" %}
{% include "_base/_modals/_manual/08_cqp_query_language.html.j2" %}
</div>
<div id="manual-modal-query-builder">
<br>
{% include "main/_manual_modal/_09_query_builder.html.j2" %}
{% include "_base/_modals/_manual/09_query_builder.html.j2" %}
</div>
<div id="manual-modal-tagsets">
<br>
{% include "main/_manual_modal/_10_tagsets.html.j2" %}
{% include "_base/_modals/_manual/10_tagsets.html.j2" %}
</div>
</div>
<div class="modal-footer">

View File

@ -27,7 +27,10 @@
<div class="col s12 m3">
<span>© 2020 Bielefeld University</span>
</div>
<div class="col s12 m9 right-align">
<div class="col s12 m2">
<span class="right"><b>Version {{ config.NOPAQUE_VERSION }}</b></span>
</div>
<div class="col s12 m7 right-align">
<a class="btn-small primary-variant-color waves-effect waves-light" href="{{ url_for('main.faq') }}"><i class="left material-icons">info_outline</i>Frequently Asked Questions</a>
<a class="btn-small primary-variant-color waves-effect waves-light" href="mailto:{{ config.NOPAQUE_SERVICE_DESK }}"><i class="left material-icons">mail</i>Report an issue</a>
<a class="btn-small primary-variant-color waves-effect waves-light" href="https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque" target="_blank"><i class="left material-icons">code</i>GitLab</a>

View File

@ -0,0 +1,5 @@
{% include "_base/_modals/manual.html.j2" %}
{% if current_user.is_authenticated and not current_user.terms_of_use_accepted %}
{% include "_base/_modals/terms_of_use.html.j2" %}
{% endif %}

View File

@ -5,12 +5,20 @@
<a href="#" data-target="sidenav" class="sidenav-trigger"><i class="material-icons">menu</i></a>
{% endif %}
<a href="{{ url_for('main.index') }}" class="brand-logo" style="height: 100%; overflow: hidden;">
<img class="hide-on-small-only" src="{{ url_for('static', filename='images/nopaque_-_logo_name_slogan.svg') }}" style="height: 128px; margin-top: -32px; margin-left: -32px;">
<img class="hide-on-med-and-up" src="{{ url_for('static', filename='images/nopaque_-_logo.svg') }}" style="height: 128px; margin-top: -32px; margin-left: -32px;">
</a>
<ul class="right hide-on-med-and-down">
<li><a href="{{ url_for('main.news') }}"><i class="material-icons left">email</i>News</a></li>
<li><a class="dropdown-trigger no-autoinit" data-target="nav-more-dropdown" href="#!" id="nav-more-dropdown-trigger"><i class="material-icons">more_vert</i></a></li>
<li>
<a class="dropdown-trigger no-autoinit" data-target="nav-more-dropdown" href="#!" id="nav-more-dropdown-trigger">
{% if current_user.is_authenticated %}
<img src="{{ url_for('users.user_avatar', user_id=current_user.id) }}" alt="avatar" class="circle left" style="height: 54px; padding: 10px 10px 0 0;">
{{ current_user.username }} ({{ current_user.email }})
{% else %}
<i class="material-icons left">more_vert</i>
{% endif %}
</a>
</li>
</ul>
</div>
<div class="nav-content primary-variant-color">
@ -22,17 +30,14 @@
{% endif %}
{%- endfor -%}
</ul>
{# {% if current_user.is_authenticated %}
<a class="btn-floating btn-large halfway-fab modal-trigger pink tooltipped waves-effect waves-light" data-tooltip="Roadmap" href="#roadmap-modal"><i class="material-icons">explore</i></a>
{% endif %} #}
<a class="btn-floating btn-large halfway-fab modal-trigger pink tooltipped waves-effect waves-light" data-tooltip="Manual" href="#manual-modal"><i class="material-icons">help</i></a>
<a class="btn-floating btn-large halfway-fab modal-trigger pink tooltipped waves-effect waves-light" data-tooltip="Manual" href="#manual-modal"><i class="material-icons">school</i></a>
</div>
</nav>
</div>
<ul class="dropdown-content" id="nav-more-dropdown">
{# <li><a href="{{ url_for('main.user_manual') }}"><i class="material-icons left">help</i>Manual</a></li> #}
{% if current_user.is_authenticated %}
<li><a href="{{ url_for('users.user', user_id=current_user.id) }}"><i class="material-icons left">person</i>My Profile</a></li>
<li><a href="{{ url_for('settings.settings') }}"><i class="material-icons left">settings</i>Settings</a></li>
<li class="divider" tabindex="-1"></li>
<li><a href="{{ url_for('auth.logout') }}">Log out</a></li>

View File

@ -1,34 +1,18 @@
<ul class="sidenav sidenav-fixed" id="sidenav">
<li>
<div class="user-view">
<div class="background primary-color"></div>
<span class="white-text name">
{% if current_user.username|length > 32 %}
{{ current_user.username[:29] + '...' }}
{% else %}
{{ current_user.username }}
{% endif %}
</span>
<span class="white-text email">
{% if current_user.email|length > 32 %}
{{ current_user.email[:29] + '...' }}
{% else %}
{{ current_user.email }}
{% endif %}
</span>
<li class="primary-color hide-on-small-only">
<div style="overflow: hidden; height: 64px; width: 250px;">
<a href="{{ url_for('main.index') }}">
<img class="hide-on-small-only" src="{{ url_for('static', filename='images/nopaque_-_logo_name_slogan.svg') }}" style="height: 128px; margin-top: -32px;">
</a>
</div>
</div>
</li>
{# <li class="primary-color">
<div style="overflow: hidden;height: 64px; width: 250px;">
<img class="hide-on-small-only" src="{{ url_for('static', filename='images/nopaque_-_logo_name_slogan.svg') }}" style="height: 128px; margin-top: -32px; margin-left: ;">
</div>
</li> #}
{# <li><a href="{{ url_for('main.index') }}">nopaque</a></li> #}
<li class="hide-on-large-only">
<li class="primary-variant-color center-align hide-on-small-only" style="padding-top: 8px; height:48px;">
<img src="{{ url_for('static', filename='images/nopaque_slogan_transparent.png') }}" style="width:85%">
</li>
<li class="hide-on-med-and-up"><a class="waves-effect" href="{{ url_for('main.index') }}"><i class="material-icons left">home</i>nopaque</a></li>
<li>
<a class="waves-effect" href="{{ url_for('main.news') }}"><i class="material-icons left">email</i>News</a>
</li>
{# <li><a href="{{ url_for('main.user_manual') }}"><i class="material-icons">help</i>Manual</a></li> #}
<li>
<a class="waves-effect" class="waves-effect" href="{{ url_for('main.dashboard') }}"><i class="material-icons">dashboard</i>Dashboard</a>
<ul>
@ -66,12 +50,13 @@
<li>
<a class="waves-effect" class="waves-effect" href="{{ url_for('main.social_area') }}"><i class="material-icons">rocket_launch</i>Social Area</a>
<ul>
<li>
<a class="waves-effect" href="{{ url_for('main.social_area', _anchor='public-users') }}" style="padding-left: 47px;"><i class="material-icons">person</i>Public Users</a>
</li>
<li>
<a class="waves-effect" href="{{ url_for('main.social_area', _anchor='public-corpora') }}" style="padding-left: 47px;"><i class="nopaque-icons">I</i>Public Corpora</a>
</li>
<li><a href="{{ url_for('users.user', user_id=current_user.id) }}" style="padding-left: 47px;"><i class="material-icons left">person</i>My Profile</a></li>
<li>
<a class="waves-effect" href="{{ url_for('main.social_area', _anchor='public-users') }}" style="padding-left: 47px;"><i class="material-icons">group</i>Public Users</a>
</li>
<li>
<a class="waves-effect" href="{{ url_for('main.social_area', _anchor='public-corpora') }}" style="padding-left: 47px;"><i class="nopaque-icons">I</i>Public Corpora</a>
</li>
</ul>
</li>
<li class="hide-on-large-only"><div class="divider"></div></li>

View File

@ -16,16 +16,16 @@
{% block styles %}
{{ super() }}
{% include "_styles.html.j2" %}
{% include "_base/styles.html.j2" %}
{% endblock styles %}
{% block navbar %}
{% include "_navbar.html.j2" %}
{% include "_base/navbar.html.j2" %}
{% endblock navbar %}
{% block sidenav %}
{% if current_user.is_authenticated %}
{% include "_sidenav.html.j2" %}
{% include "_base/sidenav.html.j2" %}
{% endif %}
{% endblock sidenav %}
@ -34,23 +34,17 @@
{% block page_content %}{% endblock page_content %}
<div id="modals">
{% block modals %}
{% include "main/_manual_modal.html.j2" %}
{# {% if current_user.is_authenticated %}
{% include "_roadmap.html.j2" %}
{% endif %} #}
{% if current_user.is_authenticated and not current_user.terms_of_use_accepted %}
{% include "_terms_of_use_modal.html.j2" %}
{% endif %}
{% include "_base/modals.html.j2" %}
{% endblock modals %}
</div>
{% endblock main %}
{% block footer_attribs %} class="page-footer primary-variant-color"{% endblock footer_attribs %}
{% block footer %}
{% include "_footer.html.j2" %}
{% include "_base/footer.html.j2" %}
{% endblock footer %}
{% block scripts %}
{{ super() }}
{% include "_scripts.html.j2" %}
{% include "_base/scripts.html.j2" %}
{% endblock scripts %}

View File

@ -23,4 +23,15 @@
</div>
</form>
</div>
<div id="corpus-analysis-concordance-switch-to-query-builder-submit-modal" class="modal">
<div class="modal-content">
<h4>Switch to Query Builder</h4>
<p>Switching back to the query builder can cause elements the query builder does not recognize to become lost. Continue?</p>
</div>
<div class="modal-footer">
<a class="btn modal-close waves-effect waves-light">Cancel</a>
<a class="btn modal-close red waves-effect waves-light switch-action" data-switch-action="confirm">Switch to Query Builder</a>
</div>
</div>
{% endmacro %}

View File

@ -1,11 +1,18 @@
{% macro card_content(id_prefix) %}
<form id="corpus-analysis-concordance-query-builder-form">
<div class="row">
<div class="col s9" id="corpus-analysis-concordance-query-builder-input-field-container">
<div class="col s8" id="corpus-analysis-concordance-query-builder-input-field-container">
<div id="corpus-analysis-concordance-query-builder-input-field">
<p id="corpus-analysis-concordance-query-builder-input-field-placeholder">Click on the buttons below to build your query.</p>
<a class="query-element-target btn-floating btn-small blue-grey lighten-4 waves-effect waves-light tooltipped" style="margin-bottom:10px; margin-right:5px;" draggable="true" data-position="bottom" data-tooltip="Add a token to your query">
<i class="material-icons">add</i>
</a>
</div>
</div>
<div class="col s1 center-align">
<a class="btn-floating btn waves-effect waves-light red" id="corpus-analysis-concordance-delete-query-button" style="margin-top:18px;">
<i class="material-icons">delete</i>
</a>
</div>
<div class="input-field col s3">
<i class="material-icons prefix">arrow_forward</i>
<input class="validate corpus-analysis-action" id="corpus-analysis-concordance-form-subcorpus-name" name="subcorpus-name" type="text" required pattern="^[A-Z][a-z0-9\-]*" value="Last"></input>
@ -30,9 +37,10 @@
<div class="row">
<div class="col s12">
<p></p>
<a class="btn waves-effect waves-light tooltipped modal-trigger" href="#corpus-analysis-concordance-positional-attr-modal" data-position="bottom" data-tooltip="Search for any token, for example a word, a lemma or a part-of-speech tag">Add new token to your query</a>
<a class="btn waves-effect waves-light tooltipped modal-trigger" href="#corpus-analysis-concordance-structural-attr-modal" data-position="bottom" data-tooltip="Structure your query with structural attributes, for example sentences, entities or annotate the text">Add structural attributes to your query</a>
<a class="btn waves-effect waves-light tooltipped dropdown-trigger disabled" data-target="corpus-analysis-concordance-token-incidence-modifiers-dropdown" data-toggle-area="token-incidence-modifiers" data-position="top" data-tooltip="Incidence Modifiers are special characters or patterns, <br>which determine how often a character represented previously should occur.">incidence modifiers</a>
<a class="btn-small waves-effect waves-light tooltipped modal-trigger" href="#corpus-analysis-concordance-positional-attr-modal" data-position="bottom" data-tooltip="Search for any token, for example a word, a lemma or a part-of-speech tag">Add new token to your query</a>
<a class="btn-small waves-effect waves-light tooltipped modal-trigger" href="#corpus-analysis-concordance-structural-attr-modal" data-position="bottom" data-tooltip="Structure your query with structural attributes, for example sentences, entities or annotate the text">Add structural attributes to your query</a>
<a class="btn-small waves-effect waves-light tooltipped dropdown-trigger disabled" data-target="corpus-analysis-concordance-token-incidence-modifiers-dropdown" data-toggle-area="token-incidence-modifiers" data-position="top" data-tooltip="Incidence Modifiers are special characters or patterns, <br>which determine how often a character represented previously should occur.">incidence modifiers</a>
<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" style="color:black">help_outline</i></a>
</div>
</div>
<div class="row">
@ -59,7 +67,7 @@
<div id="corpus-analysis-concordance-structural-attr-modal" class="modal">
<div class="modal-content">
<div class="attr-modal-header">
<h5>Which structural attribute do you want to add to your query?<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="corpus-analysis-concordance-add-structural-attribute-tutorial-info-icon">help_outline</i></a></h5>
<h5>Which structural attribute do you want to add to your query?<a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" data-manual-modal-chapter-anchor="add-structural-attribute-tutorial" href="#manual-modal"><i class="material-icons left" id="corpus-analysis-concordance-add-structural-attribute-tutorial-info-icon">help_outline</i></a></h5>
</div>
<p></p>
<br>
@ -67,7 +75,6 @@
<div class="col s12">
<a class="btn-small waves-effect waves-light" data-structural-attr-modal-action-button="sentence" data-toggle-area="sentence-button">sentence</a>
<a class="btn-small waves-effect waves-light" data-structural-attr-modal-action-button="entity" data-toggle-area="entity-button">entity</a>
<a class="btn-small waves-effect waves-light" data-structural-attr-modal-action-button="meta-data" data-toggle-area="text-annotation-button">Meta Data</a>
</div>
</div>
<div id="corpus-analysis-concordance-entity-builder" data-toggle-area="entity-builder" class="hide">
@ -111,39 +118,6 @@
</div>
</div>
</div>
<div id="corpus-analysis-concordance-text-annotation-builder" data-toggle-area="text-annotation-builder" class="hide">
<p></p>
<br>
<div class="row">
<div class= "input-field col s4 l3">
<select name="text-annotation-options" id="corpus-analysis-concordance-text-annotation-options">
<option class="btn-small waves-effect waves-light" value="address">address</option>
<option class="btn-small waves-effect waves-light" value="author">author</option>
<option class="btn-small waves-effect waves-light" value="booktitle">booktitle</option>
<option class="btn-small waves-effect waves-light" value="chapter">chapter</option>
<option class="btn-small waves-effect waves-light" value="editor">editor</option>
<option class="btn-small waves-effect waves-light" value="institution">institution</option>
<option class="btn-small waves-effect waves-light" value="journal">journal</option>
<option class="btn-small waves-effect waves-light" value="pages">pages</option>
<option class="btn-small waves-effect waves-light" value="publisher">publisher</option>
<option class="btn-small waves-effect waves-light" value="publishing_year">publishing year</option>
<option class="btn-small waves-effect waves-light" value="school">school</option>
<option class="btn-small waves-effect waves-light" value="title">title</option>
</select>
<label>Meta data</label>
</div>
<div class= "input-field col s7 l5">
<i class="material-icons prefix">mode_edit</i>
<input placeholder="Type in your text annotation" type="text" id="corpus-analysis-concordance-text-annotation-input">
</div>
<div class="col s1 l1 center-align">
<p class="btn-floating waves-effect waves-light" id="corpus-analysis-concordance-text-annotation-submit">
<i class="material-icons right">send</i>
</p>
</div>
<div class="hide" id="corpus-analysis-concordance-no-value-metadata-message"><i>No value entered!</i></div>
</div>
</div>
</div>
</div>
{% endmacro %}
@ -154,7 +128,7 @@
<div class="row attr-modal-header">
<p></p>
<div class="col s12">
<h5>Which kind of token are you looking for? <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="corpus-analysis-concordance-token-tutorial-info-icon">help_outline</i></a></h5>
<h5>Which kind of token are you looking for? <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" data-manual-modal-chapter-anchor="add-new-token-tutorial" href="#manual-modal"><i class="material-icons left" id="corpus-analysis-concordance-token-tutorial-info-icon">help_outline</i></a></h5>
</div>
<div class="input-field col s3" style="margin-left:42px;">
<select id="corpus-analysis-concordance-positional-attr-selection">
@ -363,7 +337,7 @@
</div>
<div id="corpus-analysis-concordance-token-edit-options" data-toggle-area="input-field-options">
<div class="row">
<h6>Options to edit your token: <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal"><i class="material-icons left" id="corpus-analysis-concordance-edit-options-tutorial-info-icon">help_outline</i></a></h6>
<h6>Options to edit your token: <a class="modal-trigger" data-manual-modal-chapter="manual-modal-query-builder" href="#manual-modal" data-manual-modal-chapter-anchor="edit-options-tutorial"><i class="material-icons left" id="corpus-analysis-concordance-edit-options-tutorial-info-icon">help_outline</i></a></h6>
</div>
<p></p>
<div class="row">
@ -436,9 +410,3 @@
</div>
</div>
{% endmacro %}
{# {% macro scripts(id_prefix) %}
<script>
const concordanceQueryBuilder = new ConcordanceQueryBuilder();
</script>
{% endmacro %} #}

View File

@ -153,16 +153,16 @@
let deleteJobRequestElement = document.querySelector('#delete-job-request');
let restartJobRequestElement = document.querySelector('#restart-job-request');
deleteJobRequestElement.addEventListener('click', (event) => {
requests.jobs.entity.delete({{ job.hashid|tojson }});
nopaque.requests.jobs.entity.delete({{ job.hashid|tojson }});
});
restartJobRequestElement.addEventListener('click', (event) => {
requests.jobs.entity.restart({{ job.hashid|tojson }});
nopaque.requests.jobs.entity.restart({{ job.hashid|tojson }});
});
if ({{ current_user.is_administrator()|tojson }}) {
let jobLogButtonElement = document.querySelector('#job-log-button');
jobLogButtonElement.addEventListener('click', (event) => {
requests.jobs.entity.log({{ job.hashid|tojson }})
nopaque.requests.jobs.entity.log({{ job.hashid|tojson }})
.then(
(response) => {
response.json()

View File

@ -1,9 +0,0 @@
<h3 class="manual-chapter-title">Introduction</h3>
<p>
nopaque is a web-based digital working environment. It implements a
workflow based on the research process in the humanities and supports its
users in processing their data in order to subsequently apply digital
analysis methods to them. All processes are implemented in a specially
provided cloud environment with established open source software. This
always ensures that no personal data of the users is disclosed.
</p>

View File

@ -1,18 +0,0 @@
<h3 class="manual-chapter-title">Registration and Log in</h3>
<div class="row">
<div class="col s12 m4">
<img alt="Registration and Log in" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/registration-and-log-in.png') }}">
</div>
<div class="col s12 m8">
<p>
Before you can start using the web platform, you need to create a user
account. This requires only a few details: just a user name, an e-mail
address and a password are needed. In order to register yourself, fill out
the form on the <a href="{{ url_for('auth.register') }}">registration page</a>. After successful registration, the
created account must be verified. To do this, follow the instructions
given in the automatically sent e-mail. Afterwards, you can log in as
usual with your username/email address and password in the log-in form
located next to the registration button.
</p>
</div>
</div>

View File

@ -1,52 +0,0 @@
<h3 class="manual-chapter-title">Services</h5>
<div class="row">
<div class="col s12 m4">
<img alt="Services" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/services.png') }}">
</div>
<div class="col s12 m8">
<p>
nopaque was designed from the ground up to be modular. This modularity
means that the offered workflow provides variable entry and exit points,
so that different starting points and goals can be flexibly addressed.
Each of these modules are implemented in a self-contained service, each of
which represents a step in the workflow. The services are coordinated in
such a way that they can be used consecutively. The order can either be
taken from the listing of the services in the left sidebar or from the
roadmap (accessible via the pink compass in the upper right corner). All
services are versioned, so the data generated with nopaque is always
reproducible.
</p>
</div>
</div>
<h4 class="manual-chapter-title">File Setup</h4>
<p>
The <a href="{{ url_for('services.file_setup_pipeline') }}">File Setup Service</a> bundles image data, such as scans and photos,
together in a handy PDF file. To use this service, use the job form to
select the images to be bundled, choose the desired service version, and
specify a title and description. Please note that the service sorts the
images into the resulting PDF file based on the file names. So naming the
images correctly is of great importance. It has proven to be a good practice
to name the files according to the following scheme:
page-01.png, page-02.jpg, page-03.tiff, etc. In general, you can assume
that the images will be sorted in the order in which the file explorer of
your operating system lists them when you view the files in a folder
sorted in ascending order by file name.
</p>
<h4>Optical Character Recognition (OCR)</h4>
<p>Comming soon...</p>
<h4>Handwritten Text Recognition (HTR)</h4>
<p>Comming soon...</p>
<h4>Natural Language Processing (NLP)</h4>
<p>Comming soon...</p>
<h4>Corpus Analysis</h4>
<p>
With the corpus analysis service, it is possible to create a text corpus
and then explore it in an analysis session. The analysis session is realized
on the server side by the Open Corpus Workbench software, which enables
efficient and complex searches with the help of the CQP Query Language.
</p>

View File

@ -1,43 +0,0 @@
{% extends "base.html.j2" %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12 m10">
<div class="section scrollspy" id="introduction">
{% include "main/manual/_01_introduction.html.j2" %}
</div>
<div class="section scrollspy" id="registration-and-log-in">
{% include "main/manual/_02_registration_and_log_in.html.j2" %}
</div>
<div class="section scrollspy" id="dashboard">
{% include "main/manual/_03_dashboard.html.j2" %}
</div>
<div class="section scrollspy" id="services">
{% include "main/manual/_06_services.html.j2" %}
</div>
<div class="section scrollspy" id="a-closer-look-at-the-corpus-analysis">
{% include "main/manual/_07_a_closer_look_at_the_corpus_analysis.html.j2" %}
</div>
<div class="section scrollspy" id="cqp-query-language">
{% include "main/manual/_08_cqp_query_language.html.j2" %}
</div>
</div>
<div class="col m2 hide-on-small-only">
<ul class="section table-of-contents" style="position: fixed !important;">
<li><a href="#introduction">Introduction</a></li>
<li><a href="#registration-and-log-in">Registration and Log in</a></li>
<li><a href="#dashboard">Dashboard</a></li>
<li><a href="#services">Services</a></li>
<li><a href="#a-closer-look-at-the-corpus-analysis">A closer look at the Corpus Analysis</a></li>
<li><a href="#cqp-query-language">CQP Query Language</a></li>
</ul>
</div>
</div>
</div>
{% endblock page_content %}

View File

@ -12,11 +12,11 @@
</div>
<div class="col s12" id="fgho-sommerschule-2023-vorbereitungen">
{% include "workshops/_fgho_sommerschule_2023/_vorbereitungen.html.j2" %}
{% include "workshops/_fgho_sommerschule_2023/vorbereitungen.html.j2" %}
</div>
<div class="col s12" id="fgho-sommerschule-2023-workshop-aufgaben">
{% include "workshops/_fgho_sommerschule_2023/_aufgaben.html.j2" %}
{% include "workshops/_fgho_sommerschule_2023/aufgaben.html.j2" %}
</div>
</div>
</div>

View File

@ -115,6 +115,8 @@ class Config:
NOPAQUE_READCOOP_USERNAME = os.environ.get('NOPAQUE_READCOOP_USERNAME')
NOPAQUE_READCOOP_PASSWORD = os.environ.get('NOPAQUE_READCOOP_PASSWORD')
NOPAQUE_VERSION='1.0.0'
@staticmethod
def init_app(app: Flask):
# Set up logging according to the corresponding (NOPAQUE_LOG_*)

View File

@ -4,7 +4,6 @@
# More information about the environment variables can be found here: #
# https://hub.docker.com/_/postgres #
##############################################################################
POSTGRES_DB=
POSTGRES_USER=

View File

@ -20,6 +20,7 @@ depends_on = None
def upgrade():
# TODO: Add error handling for sqlalchemy.exc.ProgrammingError
for user in User.query.all():
spacy_nlp_pipeline_models_dir = os.path.join(user.path, 'spacy_nlp_pipeline_models')
if os.path.exists(spacy_nlp_pipeline_models_dir):

View File

@ -17,6 +17,7 @@ depends_on = None
def upgrade():
# TODO: Add error handling for sqlalchemy.exc.ProgrammingError
for user in User.query.all():
old_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_models')
new_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_pipeline_models')