Compare commits
220 Commits
07103ee4e5
...
access-pip
Author | SHA1 | Date | |
---|---|---|---|
2c709e65d0 | |||
71c0ddf515 | |||
5c395d1e06 | |||
82d6f6003f | |||
9da74c1c6f | |||
ec23bd94ee | |||
55a62053b0 | |||
a1e5bd61e0 | |||
cf8c164d60 | |||
05ab204e5a | |||
9f188afd16 | |||
dc77ac7b76 | |||
84276af322 | |||
d9d4067536 | |||
ba65cf5911 | |||
69a1edc51e | |||
32ad8c7359 | |||
8c0843d2d0 | |||
d4c9ab5821 | |||
518a245133 | |||
b6864b355a | |||
0a45e1bb65 | |||
08ca938333 | |||
cfdef8d1fa | |||
5dce269736 | |||
13369296d3 | |||
4f6e1c121f | |||
438a257fe3 | |||
2e88d7d035 | |||
b338c33d42 | |||
d6cebddd92 | |||
07fda0e95a | |||
3927d9e4cd | |||
8f5d5ffdec | |||
f02d1619e2 | |||
892f1f799e | |||
f5e98ae655 | |||
f790106e0e | |||
c57acc73d2 | |||
678a0767b7 | |||
17a9338d9f | |||
a7cbce1eda | |||
fa28c875e1 | |||
0927edcceb | |||
9c22370eea | |||
bdcc80a66f | |||
9be5ce6014 | |||
00e4c3ade3 | |||
79a16cae83 | |||
c5aea0be94 | |||
afcb890ccf | |||
9627708950 | |||
1bb1408988 | |||
79bafdea89 | |||
a2d617718b | |||
691b2de5b2 | |||
eb0e7c9ba1 | |||
ab132746e7 | |||
ae5646512d | |||
fc66327920 | |||
9bfc96ad41 | |||
008938b46b | |||
4f24e9f9da | |||
d0fe4360bb | |||
1c18806c9c | |||
9487aa7a60 | |||
6559051fd5 | |||
0882e085a3 | |||
ff1bcb40f3 | |||
d298b200dc | |||
660d7ebc99 | |||
df33c7b36d | |||
bf8b22fb58 | |||
b216ad8a40 | |||
4822f6ec02 | |||
61be3345be | |||
e9ddb85f03 | |||
e3166ca54c | |||
0565f309f8 | |||
1f40002249 | |||
1ff9c8bfe3 | |||
e8fe67d290 | |||
fbb32ef580 | |||
985e9b406f | |||
0abfe65afa | |||
f4d3415c11 | |||
965f2854b2 | |||
f101a742a9 | |||
c046fbfb1e | |||
8997d3ad67 | |||
bf249193af | |||
c40e428eb2 | |||
4daf3359b9 | |||
d875623a8c | |||
067318bb89 | |||
a9203cc409 | |||
78dd375ef8 | |||
82cd384e5f | |||
c7dab5e502 | |||
d3cfd2cfaf | |||
14c10aeab1 | |||
2dec17b1b9 | |||
9fe38fab52 | |||
e20dd01710 | |||
1b974f0bbc | |||
c6be72d0a7 | |||
d3f2d5648e | |||
7cae84ffdc | |||
1d6834302d | |||
53f4400731 | |||
f36600f06c | |||
068211a72b | |||
f566e276a1 | |||
c605613d86 | |||
d1fc425f48 | |||
b8ae221987 | |||
b50147a66a | |||
18311c8c9c | |||
2dc54f4258 | |||
bcdc3721ef | |||
60bcaa9e01 | |||
af89a5776f | |||
fcbf9c8cb6 | |||
cc6ce6e1f3 | |||
4581367d04 | |||
d7f00f6337 | |||
86947e2cf8 | |||
4a9a03e648 | |||
45369d4c84 | |||
f56e951b71 | |||
d776e11fe5 | |||
9200837e63 | |||
aad347caa0 | |||
9ccab8657a | |||
fe7f69d596 | |||
8a5c94f448 | |||
3d38e550a0 | |||
1387d80a26 | |||
5c00c5740e | |||
04575b78cf | |||
2951fc6966 | |||
bf0213edbc | |||
c843fbb437 | |||
1dc7d2a1c6 | |||
173aea7df4 | |||
f1962b3b47 | |||
dd04623278 | |||
5e8008399d | |||
0d92f221cb | |||
766c5ba27d | |||
661ac7c509 | |||
3b390858ff | |||
ae8e383085 | |||
9ac626c64d | |||
d0c6b2b9e5 | |||
8277e60689 | |||
8b887d79ef | |||
c9ad538bee | |||
983400b925 | |||
37f9e1281d | |||
5eef2292e7 | |||
351da5d4e9 | |||
27fe4a95e4 | |||
0627b27ec7 | |||
adfd229e66 | |||
ae6a7cb86d | |||
2dd6015ba6 | |||
f80b635ca3 | |||
0e8a87d34e | |||
ccf7f449dd | |||
dd05657362 | |||
cef82d9001 | |||
656eef17db | |||
104c2fe468 | |||
d08f95e944 | |||
87e2c2b484 | |||
7a925b6a19 | |||
e4f435c5ee | |||
7721926d6c | |||
691d4757ff | |||
6c744fc3ba | |||
e46f0032bd | |||
9da1a6e987 | |||
8182cccecd | |||
d898cd8516 | |||
4ae4b88a44 | |||
b7483af8e9 | |||
41d8dbad5d | |||
203faa4257 | |||
960f36c740 | |||
c3834ca400 | |||
572fdf3a00 | |||
22b43a689f | |||
deec9e8a76 | |||
688b96ffee | |||
a9973e9c8e | |||
413b6111df | |||
a9f05fffdf | |||
7936ac270b | |||
1eabf18b13 | |||
94dc25750c | |||
beb157092e | |||
1cd9540e5b | |||
912bd7da07 | |||
e21ef2422d | |||
c52c966863 | |||
a7a948908f | |||
3a97b1a07a | |||
315b538c30 | |||
c35b2f8674 | |||
baf70750e8 | |||
525723818e | |||
20c0678d3e | |||
c323c53f37 | |||
2d8cef64e8 | |||
9b9edf501d | |||
903310c17f | |||
bc92fd249f | |||
422415065d | |||
07ec01ae2e |
@ -8,5 +8,6 @@
|
||||
!.flaskenv
|
||||
!boot.sh
|
||||
!config.py
|
||||
!docker-nopaque-entrypoint.sh
|
||||
!nopaque.py
|
||||
!requirements.txt
|
||||
|
210
.env.tpl
@ -1,204 +1,32 @@
|
||||
################################################################################
|
||||
# Docker #
|
||||
################################################################################
|
||||
# DEFAULT: ./data
|
||||
# NOTE: Use `.` as <project-basedir>
|
||||
# HOST_DATA_DIR=
|
||||
|
||||
# Example: 1000
|
||||
##############################################################################
|
||||
# Variables for use in Docker Compose YAML files #
|
||||
##############################################################################
|
||||
# HINT: Use this bash command `id -u`
|
||||
# NOTE: 0 (= root user) is not allowed
|
||||
HOST_UID=
|
||||
|
||||
# Example: 1000
|
||||
# HINT: Use this bash command `id -g`
|
||||
HOST_GID=
|
||||
|
||||
# Example: 999
|
||||
# HINT: Use this bash command `getent group docker | cut -d: -f3`
|
||||
HOST_DOCKER_GID=
|
||||
|
||||
# DEFAULT: ./logs
|
||||
# NOTES: Use `.` as <project-basedir>
|
||||
# HOST_LOG_DIR=
|
||||
# DEFAULT: nopaque
|
||||
# DOCKER_DEFAULT_NETWORK_NAME=
|
||||
|
||||
# DEFAULT: nopaque_default
|
||||
# DOCKER_NETWORK_NAME=
|
||||
# DEFAULT: ./volumes/db/data
|
||||
# NOTE: Use `.` as <project-basedir>
|
||||
# DOCKER_DB_SERVICE_DATA_VOLUME_SOURCE_PATH=
|
||||
|
||||
################################################################################
|
||||
# Flask #
|
||||
# https://flask.palletsprojects.com/en/1.1.x/config/ #
|
||||
################################################################################
|
||||
# CHOOSE ONE: http, https
|
||||
# DEFAULT: http
|
||||
# PREFERRED_URL_SCHEME=
|
||||
# DEFAULT: ./volumes/mq/data
|
||||
# NOTE: Use `.` as <project-basedir>
|
||||
# DOCKER_MQ_SERVICE_DATA_VOLUME_SOURCE_PATH=
|
||||
|
||||
# DEFAULT: hard to guess string
|
||||
# HINT: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"`
|
||||
# SECRET_KEY=
|
||||
# NOTE: This must be a network share and it must be available on all
|
||||
# Docker Swarm nodes, mounted to the same path with the same
|
||||
# user and group ownership.
|
||||
DOCKER_NOPAQUE_SERVICE_DATA_VOLUME_SOURCE_PATH=
|
||||
|
||||
# DEFAULT: localhost:5000
|
||||
# Example: nopaque.example.com/nopaque.example.com:5000
|
||||
# HINT: If your instance is publicly available on a different Port then 80/443,
|
||||
# you will have to add this to the server name
|
||||
# SERVER_NAME=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# HINT: Set to true if you redirect http to https
|
||||
# SESSION_COOKIE_SECURE=
|
||||
|
||||
|
||||
################################################################################
|
||||
# Flask-Assets #
|
||||
# https://webassets.readthedocs.io/en/latest/ #
|
||||
################################################################################
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# ASSETS_DEBUG=
|
||||
|
||||
|
||||
################################################################################
|
||||
# Flask-Hashids #
|
||||
# https://github.com/Pevtrick/Flask-Hashids #
|
||||
################################################################################
|
||||
# DEFAULT: 16
|
||||
# HASHIDS_MIN_LENGTH=
|
||||
|
||||
# NOTE: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"`
|
||||
# It is strongly recommended that this is NEVER the same as the SECRET_KEY
|
||||
HASHIDS_SALT=
|
||||
|
||||
|
||||
################################################################################
|
||||
# Flask-Login #
|
||||
# https://flask-login.readthedocs.io/en/latest/ #
|
||||
################################################################################
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# HINT: Set to true if you redirect http to https
|
||||
# REMEMBER_COOKIE_SECURE=
|
||||
|
||||
|
||||
################################################################################
|
||||
# Flask-Mail #
|
||||
# https://pythonhosted.org/Flask-Mail/ #
|
||||
################################################################################
|
||||
# EXAMPLE: nopaque Admin <nopaque@example.com>
|
||||
MAIL_DEFAULT_SENDER=
|
||||
|
||||
MAIL_PASSWORD=
|
||||
|
||||
# EXAMPLE: smtp.example.com
|
||||
MAIL_SERVER=
|
||||
|
||||
# EXAMPLE: 587
|
||||
MAIL_PORT=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# MAIL_USE_SSL=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# MAIL_USE_TLS=
|
||||
|
||||
# EXAMPLE: nopaque@example.com
|
||||
MAIL_USERNAME=
|
||||
|
||||
|
||||
################################################################################
|
||||
# Flask-SQLAlchemy #
|
||||
# https://flask-sqlalchemy.palletsprojects.com/en/2.x/config/ #
|
||||
################################################################################
|
||||
# DEFAULT: 'sqlite:///<nopaque-basedir>/data.sqlite'
|
||||
# NOTE: Use `.` as <nopaque-basedir>,
|
||||
# Don't use a SQLite database when using Docker
|
||||
# SQLALCHEMY_DATABASE_URI=
|
||||
|
||||
|
||||
################################################################################
|
||||
# nopaque #
|
||||
################################################################################
|
||||
# An account is registered with this email adress gets automatically assigned
|
||||
# the administrator role.
|
||||
# EXAMPLE: admin.nopaque@example.com
|
||||
NOPAQUE_ADMIN=
|
||||
|
||||
# DEFAULT: /mnt/nopaque
|
||||
# NOTE: This must be a network share and it must be available on all Docker
|
||||
# Swarm nodes
|
||||
# NOPAQUE_DATA_DIR=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: True
|
||||
# NOPAQUE_IS_PRIMARY_INSTANCE=
|
||||
|
||||
# transport://[userid:password]@hostname[:port]/[virtual_host]
|
||||
NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI=
|
||||
|
||||
# NOTE: Get these from the nopaque development team
|
||||
NOPAQUE_DOCKER_REGISTRY_USERNAME=
|
||||
NOPAQUE_DOCKER_REGISTRY_PASSWORD=
|
||||
|
||||
# DEFAULT: %Y-%m-%d %H:%M:%S
|
||||
# NOPAQUE_LOG_DATE_FORMAT=
|
||||
|
||||
# DEFAULT: [%(asctime)s] %(levelname)s in %(pathname)s (function: %(funcName)s, line: %(lineno)d): %(message)s
|
||||
# NOPAQUE_LOG_FORMAT=
|
||||
|
||||
# DEFAULT: INFO
|
||||
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
|
||||
# NOPAQUE_LOG_LEVEL=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: True
|
||||
# NOPAQUE_LOG_FILE_ENABLED=
|
||||
|
||||
# DEFAULT: <nopaque-basedir>/logs
|
||||
# NOTE: Use `.` as <nopaque-basedir>
|
||||
# NOPAQUE_LOG_FILE_DIR=
|
||||
|
||||
# DEFAULT: NOPAQUE_LOG_LEVEL
|
||||
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
|
||||
# NOPAQUE_LOG_FILE_LEVEL=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# NOPAQUE_LOG_STDERR_ENABLED=
|
||||
|
||||
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
|
||||
# DEFAULT: NOPAQUE_LOG_LEVEL
|
||||
# NOPAQUE_LOG_STDERR_LEVEL=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# HINT: Set this to True only if you are using a proxy in front of nopaque
|
||||
# NOPAQUE_PROXY_FIX_ENABLED=
|
||||
|
||||
# DEFAULT: 0
|
||||
# Number of values to trust for X-Forwarded-For
|
||||
# NOPAQUE_PROXY_FIX_X_FOR=
|
||||
|
||||
# DEFAULT: 0
|
||||
# Number of values to trust for X-Forwarded-Host
|
||||
# NOPAQUE_PROXY_FIX_X_HOST=
|
||||
|
||||
# DEFAULT: 0
|
||||
# Number of values to trust for X-Forwarded-Port
|
||||
# NOPAQUE_PROXY_FIX_X_PORT=
|
||||
|
||||
# DEFAULT: 0
|
||||
# Number of values to trust for X-Forwarded-Prefix
|
||||
# NOPAQUE_PROXY_FIX_X_PREFIX=
|
||||
|
||||
# DEFAULT: 0
|
||||
# Number of values to trust for X-Forwarded-Proto
|
||||
# NOPAQUE_PROXY_FIX_X_PROTO=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# NOPAQUE_TRANSKRIBUS_ENABLED=
|
||||
|
||||
# READ-COOP account data: https://readcoop.eu/
|
||||
# NOPAQUE_READCOOP_USERNAME=
|
||||
# NOPAQUE_READCOOP_PASSWORD=
|
||||
# DEFAULT: ./volumes/nopaque/logs
|
||||
# NOTE: Use `.` as <project-basedir>
|
||||
# DOCKER_NOPAQUE_SERVICE_LOGS_VOLUME_SOURCE_PATH=.
|
||||
|
2
.gitignore
vendored
@ -1,6 +1,6 @@
|
||||
# nopaque specifics
|
||||
app/static/gen/
|
||||
data/
|
||||
volumes/
|
||||
docker-compose.override.yml
|
||||
logs/
|
||||
!logs/dummy
|
||||
|
84
.gitlab-ci.yml
Normal file
@ -0,0 +1,84 @@
|
||||
include:
|
||||
- template: Security/Container-Scanning.gitlab-ci.yml
|
||||
|
||||
##############################################################################
|
||||
# Pipeline stages in order of execution #
|
||||
##############################################################################
|
||||
stages:
|
||||
- build
|
||||
- publish
|
||||
- sca
|
||||
|
||||
##############################################################################
|
||||
# Pipeline behavior #
|
||||
##############################################################################
|
||||
workflow:
|
||||
rules:
|
||||
# Run the pipeline on commits to the default branch
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
variables:
|
||||
# Set the Docker image tag to `latest`
|
||||
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:latest
|
||||
when: always
|
||||
# Run the pipeline on tag creation
|
||||
- if: $CI_COMMIT_TAG
|
||||
variables:
|
||||
# Set the Docker image tag to the Git tag name
|
||||
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
|
||||
when: always
|
||||
# Don't run the pipeline on all other occasions
|
||||
- when: never
|
||||
|
||||
##############################################################################
|
||||
# Default values for pipeline jobs #
|
||||
##############################################################################
|
||||
default:
|
||||
image: docker:24.0.6
|
||||
services:
|
||||
- docker:24.0.6-dind
|
||||
tags:
|
||||
- docker
|
||||
|
||||
##############################################################################
|
||||
# CI/CD variables for all jobs in the pipeline #
|
||||
##############################################################################
|
||||
variables:
|
||||
DOCKER_TLS_CERTDIR: /certs
|
||||
DOCKER_BUILD_PATH: .
|
||||
DOCKERFILE: Dockerfile
|
||||
|
||||
##############################################################################
|
||||
# Pipeline jobs #
|
||||
##############################################################################
|
||||
build:
|
||||
stage: build
|
||||
script:
|
||||
- docker build --tag $DOCKER_IMAGE --file $DOCKERFILE $DOCKER_BUILD_PATH
|
||||
- docker save $DOCKER_IMAGE > docker_image.tar
|
||||
artifacts:
|
||||
paths:
|
||||
- docker_image.tar
|
||||
|
||||
publish:
|
||||
stage: publish
|
||||
before_script:
|
||||
- docker login --username gitlab-ci-token --password $CI_JOB_TOKEN $CI_REGISTRY
|
||||
script:
|
||||
- docker load --input docker_image.tar
|
||||
- docker push $DOCKER_IMAGE
|
||||
after_script:
|
||||
- docker logout $CI_REGISTRY
|
||||
|
||||
container_scanning:
|
||||
stage: sca
|
||||
rules:
|
||||
# Run the job on commits to the default branch
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
when: always
|
||||
# Run the job on tag creation
|
||||
- if: $CI_COMMIT_TAG
|
||||
when: always
|
||||
# Don't run the job on all other occasions
|
||||
- when: never
|
||||
variables:
|
||||
CS_IMAGE: $DOCKER_IMAGE
|
5
.vscode/extensions.json
vendored
@ -1,7 +1,8 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"samuelcolvin.jinjahtml",
|
||||
"irongeek.vscode-env",
|
||||
"ms-azuretools.vscode-docker",
|
||||
"ms-python.python"
|
||||
"ms-python.python",
|
||||
"samuelcolvin.jinjahtml"
|
||||
]
|
||||
}
|
||||
|
6
.vscode/settings.json
vendored
@ -1,13 +1,9 @@
|
||||
{
|
||||
"editor.rulers": [79],
|
||||
"files.insertFinalNewline": true,
|
||||
"python.terminal.activateEnvironment": false,
|
||||
"[css]": {
|
||||
"editor.tabSize": 2
|
||||
},
|
||||
"[scss]": {
|
||||
"editor.tabSize": 2
|
||||
},
|
||||
"[html]": {
|
||||
"editor.tabSize": 2
|
||||
},
|
||||
@ -17,7 +13,7 @@
|
||||
"[jinja-html]": {
|
||||
"editor.tabSize": 2
|
||||
},
|
||||
"[jinja-js]": {
|
||||
"[scss]": {
|
||||
"editor.tabSize": 2
|
||||
}
|
||||
}
|
||||
|
40
Dockerfile
@ -1,50 +1,60 @@
|
||||
FROM python:3.8.10-slim-buster
|
||||
FROM python:3.10.13-slim-bookworm
|
||||
|
||||
|
||||
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
|
||||
|
||||
|
||||
ARG DOCKER_GID
|
||||
ARG UID
|
||||
ARG GID
|
||||
|
||||
|
||||
# Set environment variables
|
||||
ENV LANG="C.UTF-8"
|
||||
ENV PYTHONDONTWRITEBYTECODE="1"
|
||||
ENV PYTHONUNBUFFERED="1"
|
||||
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get install --no-install-recommends --yes \
|
||||
build-essential \
|
||||
gosu \
|
||||
libpq-dev \
|
||||
&& rm --recursive /var/lib/apt/lists/*
|
||||
|
||||
|
||||
RUN groupadd --gid "${DOCKER_GID}" docker \
|
||||
&& groupadd --gid "${GID}" nopaque \
|
||||
&& useradd --create-home --gid nopaque --groups "${DOCKER_GID}" --no-log-init --uid "${UID}" nopaque
|
||||
# Create a non-root user
|
||||
RUN useradd --create-home --no-log-init nopaque \
|
||||
&& groupadd docker \
|
||||
&& usermod --append --groups docker nopaque
|
||||
|
||||
USER nopaque
|
||||
WORKDIR /home/nopaque
|
||||
|
||||
|
||||
ENV PYTHON3_VENV_PATH="/home/nopaque/venv"
|
||||
RUN python3 -m venv "${PYTHON3_VENV_PATH}"
|
||||
ENV PATH="${PYTHON3_VENV_PATH}/bin:${PATH}"
|
||||
# Create a Python virtual environment
|
||||
ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv"
|
||||
RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}"
|
||||
ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}"
|
||||
|
||||
|
||||
COPY --chown=nopaque:nopaque requirements.txt .
|
||||
# Install Python dependencies
|
||||
COPY --chown=nopaque:nopaque requirements.txt requirements.txt
|
||||
RUN python3 -m pip install --requirement requirements.txt \
|
||||
&& rm requirements.txt
|
||||
|
||||
|
||||
# Install the application
|
||||
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
|
||||
|
||||
COPY --chown=nopaque:nopaque app app
|
||||
COPY --chown=nopaque:nopaque migrations migrations
|
||||
COPY --chown=nopaque:nopaque tests tests
|
||||
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py ./
|
||||
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./
|
||||
|
||||
RUN mkdir logs
|
||||
|
||||
|
||||
EXPOSE 5000
|
||||
|
||||
|
||||
ENTRYPOINT ["./boot.sh"]
|
||||
USER root
|
||||
|
||||
|
||||
ENTRYPOINT ["docker-nopaque-entrypoint.sh"]
|
||||
|
@ -1,5 +1,8 @@
|
||||
# nopaque
|
||||
|
||||

|
||||

|
||||
|
||||
nopaque bundles various tools and services that provide humanities scholars with DH methods and thus can support their various individual research processes. Using nopaque, researchers can subject digitized sources to Optical Character Recognition (OCR). The resulting text files can then be used as a data basis for Natural Language Processing (NLP). The texts are automatically subjected to various linguistic annotations. The data processed via NLP can then be summarized in the web application as corpora and analyzed by means of an information retrieval system through complex search queries. The range of functions of the web application will be successively extended according to the needs of the researchers.
|
||||
|
||||
## Prerequisites and requirements
|
||||
|
@ -13,7 +13,6 @@ from flask_paranoid import Paranoid
|
||||
from flask_socketio import SocketIO
|
||||
from flask_sqlalchemy import SQLAlchemy
|
||||
from flask_hashids import Hashids
|
||||
from werkzeug.exceptions import HTTPException
|
||||
|
||||
|
||||
apifairy = APIFairy()
|
||||
@ -58,6 +57,9 @@ def create_app(config: Config = Config) -> Flask:
|
||||
scheduler.init_app(app)
|
||||
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
|
||||
|
||||
from .models.event_listeners import register_event_listeners
|
||||
register_event_listeners()
|
||||
|
||||
from .admin import bp as admin_blueprint
|
||||
default_breadcrumb_root(admin_blueprint, '.admin')
|
||||
app.register_blueprint(admin_blueprint, url_prefix='/admin')
|
||||
@ -74,8 +76,10 @@ def create_app(config: Config = Config) -> Flask:
|
||||
app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
|
||||
|
||||
from .corpora import bp as corpora_blueprint
|
||||
from .corpora.cqi_over_sio import CQiNamespace
|
||||
default_breadcrumb_root(corpora_blueprint, '.corpora')
|
||||
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
|
||||
socketio.on_namespace(CQiNamespace('/cqi_over_sio'))
|
||||
|
||||
from .errors import bp as errors_bp
|
||||
app.register_blueprint(errors_bp)
|
||||
@ -98,6 +102,9 @@ def create_app(config: Config = Config) -> Flask:
|
||||
|
||||
from .users import bp as users_blueprint
|
||||
default_breadcrumb_root(users_blueprint, '.users')
|
||||
app.register_blueprint(users_blueprint, url_prefix='/users')
|
||||
app.register_blueprint(users_blueprint, cli_group='user', url_prefix='/users')
|
||||
|
||||
from .workshops import bp as workshops_blueprint
|
||||
app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
|
||||
|
||||
return app
|
||||
|
@ -16,8 +16,8 @@ class CreateSpaCyNLPPipelineModelForm(ContributionBaseForm):
|
||||
)
|
||||
|
||||
def validate_spacy_model_file(self, field):
|
||||
if not field.data.filename.lower().endswith('.tar.gz'):
|
||||
raise ValidationError('.tar.gz files only!')
|
||||
if not field.data.filename.lower().endswith(('.tar.gz', ('.whl'))):
|
||||
raise ValidationError('.tar.gz or .whl files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
|
@ -2,32 +2,34 @@ from flask import current_app
|
||||
from app import db
|
||||
from app.models import User, Corpus, CorpusFile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
|
||||
|
||||
class SandpaperConverter:
|
||||
def __init__(self, json_db_file, data_dir):
|
||||
def __init__(self, json_db_file: Path, data_dir: Path):
|
||||
self.json_db_file = json_db_file
|
||||
self.data_dir = data_dir
|
||||
|
||||
def run(self):
|
||||
with open(self.json_db_file, 'r') as f:
|
||||
json_db = json.loads(f.read())
|
||||
with self.json_db_file.open('r') as f:
|
||||
json_db: List[Dict] = json.load(f)
|
||||
|
||||
for json_user in json_db:
|
||||
if not json_user['confirmed']:
|
||||
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
|
||||
continue
|
||||
user_dir = os.path.join(self.data_dir, str(json_user['id']))
|
||||
user_dir = self.data_dir / f'{json_user["id"]}'
|
||||
self.convert_user(json_user, user_dir)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
def convert_user(self, json_user, user_dir):
|
||||
def convert_user(self, json_user: Dict, user_dir: Path):
|
||||
current_app.logger.info(f'Create User {json_user["username"]}...')
|
||||
user = User(
|
||||
try:
|
||||
user = User.create(
|
||||
confirmed=json_user['confirmed'],
|
||||
email=json_user['email'],
|
||||
last_seen=datetime.fromtimestamp(json_user['last_seen']),
|
||||
@ -35,47 +37,34 @@ class SandpaperConverter:
|
||||
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
|
||||
username=json_user['username']
|
||||
)
|
||||
db.session.add(user)
|
||||
db.session.flush(objects=[user])
|
||||
db.session.refresh(user)
|
||||
try:
|
||||
user.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
except OSError:
|
||||
raise Exception('Internal Server Error')
|
||||
for json_corpus in json_user['corpora'].values():
|
||||
if not json_corpus['files'].values():
|
||||
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
|
||||
continue
|
||||
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
|
||||
corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}'
|
||||
self.convert_corpus(json_corpus, user, corpus_dir)
|
||||
current_app.logger.info('Done')
|
||||
|
||||
|
||||
def convert_corpus(self, json_corpus, user, corpus_dir):
|
||||
def convert_corpus(self, json_corpus: Dict, user: User, corpus_dir: Path):
|
||||
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
|
||||
corpus = Corpus(
|
||||
try:
|
||||
corpus = Corpus.create(
|
||||
user=user,
|
||||
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
|
||||
description=json_corpus['description'],
|
||||
title=json_corpus['title']
|
||||
)
|
||||
db.session.add(corpus)
|
||||
db.session.flush(objects=[corpus])
|
||||
db.session.refresh(corpus)
|
||||
try:
|
||||
corpus.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
except OSError:
|
||||
raise Exception('Internal Server Error')
|
||||
for json_corpus_file in json_corpus['files'].values():
|
||||
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
|
||||
current_app.logger.info('Done')
|
||||
|
||||
|
||||
def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
|
||||
def convert_corpus_file(self, json_corpus_file: Dict, corpus: Corpus, corpus_dir: Path):
|
||||
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
|
||||
corpus_file = CorpusFile(
|
||||
corpus=corpus,
|
||||
@ -99,13 +88,13 @@ class SandpaperConverter:
|
||||
db.session.refresh(corpus_file)
|
||||
try:
|
||||
shutil.copy2(
|
||||
os.path.join(corpus_dir, json_corpus_file['filename']),
|
||||
corpus_dir / json_corpus_file['filename'],
|
||||
corpus_file.path
|
||||
)
|
||||
except:
|
||||
current_app.logger.warning(
|
||||
'Can not convert corpus file: '
|
||||
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
|
||||
f'{corpus_dir / json_corpus_file["filename"]}'
|
||||
' -> '
|
||||
f'{corpus_file.path}'
|
||||
)
|
||||
|
@ -16,5 +16,4 @@ def before_request():
|
||||
pass
|
||||
|
||||
|
||||
from . import cli, cqi_over_socketio, files, followers, routes, json_routes
|
||||
from . import cqi_over_sio
|
||||
from . import cli, files, followers, routes, json_routes
|
||||
|
@ -1,7 +1,7 @@
|
||||
from app.models import Corpus, CorpusStatus
|
||||
import os
|
||||
from flask import current_app
|
||||
import shutil
|
||||
from app import db
|
||||
from app.models import Corpus, CorpusStatus
|
||||
from . import bp
|
||||
|
||||
|
||||
@ -18,7 +18,17 @@ def reset():
|
||||
]
|
||||
for corpus in [x for x in Corpus.query.all() if x.status in status]:
|
||||
print(f'Resetting corpus {corpus}')
|
||||
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
|
||||
corpus_cwb_dir = corpus.path / 'cwb'
|
||||
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
|
||||
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
|
||||
try:
|
||||
shutil.rmtree(corpus.path / 'cwb', ignore_errors=True)
|
||||
corpus_cwb_dir.mkdir()
|
||||
corpus_cwb_data_dir.mkdir()
|
||||
corpus_cwb_registry_dir.mkdir()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
raise
|
||||
corpus.status = CorpusStatus.UNPREPARED
|
||||
corpus.num_analysis_sessions = 0
|
||||
db.session.commit()
|
||||
|
@ -1,112 +1,206 @@
|
||||
from cqi import CQiClient
|
||||
from cqi.errors import CQiException
|
||||
from flask import session
|
||||
from cqi.status import CQiStatus
|
||||
from docker.models.containers import Container
|
||||
from flask import current_app, session
|
||||
from flask_login import current_user
|
||||
from flask_socketio import ConnectionRefusedError
|
||||
from flask_socketio import Namespace
|
||||
from inspect import signature
|
||||
from threading import Lock
|
||||
from app import db, hashids, socketio
|
||||
from typing import Callable, Dict, List, Optional
|
||||
from app import db, docker_client, hashids, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus, CorpusStatus
|
||||
from . import extensions
|
||||
|
||||
|
||||
'''
|
||||
This package tunnels the Corpus Query interface (CQi) protocol through
|
||||
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
|
||||
|
||||
This module only handles the SIO connect/disconnect, which handles the setup
|
||||
and teardown of necessary ressources for later use. Each CQi function has a
|
||||
corresponding SIO event. The event handlers are spread across the different
|
||||
modules within this package.
|
||||
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
|
||||
|
||||
Basic concept:
|
||||
1. A client connects to the SIO namespace and provides the id of a corpus to be
|
||||
analysed.
|
||||
1. A client connects to the "/cqi_over_sio" namespace.
|
||||
2. The client emits the "init" event and provides a corpus id for the corpus
|
||||
that should be analysed in this session.
|
||||
1.1 The analysis session counter of the corpus is incremented.
|
||||
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
|
||||
1.3 Wait until the CQP server is running.
|
||||
1.4 Connect the CQiClient to the server.
|
||||
1.5 Save the CQiClient and the Lock in the session for subsequential use.
|
||||
2. A client emits an event and may provide a single json object with necessary
|
||||
arguments for the targeted CQi function.
|
||||
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
|
||||
- The event handler function defines all arguments. Hence the client
|
||||
is sent as a single json object, the decorator decomposes it to fit
|
||||
the functions signature. This also includes type checking and proper
|
||||
use of the lock (acquire/release) mechanism.
|
||||
1.5 Save the CQiClient, the Lock and the corpus id in the session for
|
||||
subsequential use.
|
||||
2. The client emits the "exec" event provides the name of a CQi API function
|
||||
arguments (optional).
|
||||
- The event "exec" handler will execute the function, make sure that the
|
||||
result is serializable and returns the result back to the client.
|
||||
4. Wait for more events
|
||||
5. The client disconnects from the SIO namespace
|
||||
5. The client disconnects from the "/cqi_over_sio" namespace
|
||||
1.1 The analysis session counter of the corpus is decremented.
|
||||
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
||||
'''
|
||||
|
||||
|
||||
NAMESPACE = '/cqi_over_sio'
|
||||
CQI_API_FUNCTION_NAMES: List[str] = [
|
||||
'ask_feature_cl_2_3',
|
||||
'ask_feature_cqi_1_0',
|
||||
'ask_feature_cqp_2_3',
|
||||
'cl_alg2cpos',
|
||||
'cl_attribute_size',
|
||||
'cl_cpos2alg',
|
||||
'cl_cpos2id',
|
||||
'cl_cpos2lbound',
|
||||
'cl_cpos2rbound',
|
||||
'cl_cpos2str',
|
||||
'cl_cpos2struc',
|
||||
'cl_drop_attribute',
|
||||
'cl_id2cpos',
|
||||
'cl_id2freq',
|
||||
'cl_id2str',
|
||||
'cl_idlist2cpos',
|
||||
'cl_lexicon_size',
|
||||
'cl_regex2id',
|
||||
'cl_str2id',
|
||||
'cl_struc2cpos',
|
||||
'cl_struc2str',
|
||||
'corpus_alignment_attributes',
|
||||
'corpus_charset',
|
||||
'corpus_drop_corpus',
|
||||
'corpus_full_name',
|
||||
'corpus_info',
|
||||
'corpus_list_corpora',
|
||||
'corpus_positional_attributes',
|
||||
'corpus_properties',
|
||||
'corpus_structural_attribute_has_values',
|
||||
'corpus_structural_attributes',
|
||||
'cqp_drop_subcorpus',
|
||||
'cqp_dump_subcorpus',
|
||||
'cqp_fdist_1',
|
||||
'cqp_fdist_2',
|
||||
'cqp_list_subcorpora',
|
||||
'cqp_query',
|
||||
'cqp_subcorpus_has_field',
|
||||
'cqp_subcorpus_size',
|
||||
'ctrl_bye',
|
||||
'ctrl_connect',
|
||||
'ctrl_last_general_error',
|
||||
'ctrl_ping',
|
||||
'ctrl_user_abort'
|
||||
]
|
||||
|
||||
|
||||
from .cqi import * # noqa
|
||||
class CQiNamespace(Namespace):
|
||||
@socketio_login_required
|
||||
def on_connect(self):
|
||||
pass
|
||||
|
||||
|
||||
@socketio.on('connect', namespace=NAMESPACE)
|
||||
@socketio_login_required
|
||||
def connect(auth):
|
||||
# the auth variable is used in a hacky way. It contains the corpus id for
|
||||
# which a corpus analysis session should be started.
|
||||
corpus_id = hashids.decode(auth['corpus_id'])
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
# return {'code': 404, 'msg': 'Not Found'}
|
||||
raise ConnectionRefusedError('Not Found')
|
||||
if not (corpus.user == current_user
|
||||
or current_user.is_following_corpus(corpus)
|
||||
@socketio_login_required
|
||||
def on_init(self, db_corpus_hashid: str):
|
||||
db_corpus_id: int = hashids.decode(db_corpus_hashid)
|
||||
db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
|
||||
if db_corpus is None:
|
||||
return {'code': 404, 'msg': 'Not Found'}
|
||||
if not (db_corpus.user == current_user
|
||||
or current_user.is_following_corpus(db_corpus)
|
||||
or current_user.is_administrator()):
|
||||
# return {'code': 403, 'msg': 'Forbidden'}
|
||||
raise ConnectionRefusedError('Forbidden')
|
||||
if corpus.status not in [
|
||||
return {'code': 403, 'msg': 'Forbidden'}
|
||||
if db_corpus.status not in [
|
||||
CorpusStatus.BUILT,
|
||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||
]:
|
||||
# return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
raise ConnectionRefusedError('Failed Dependency')
|
||||
if corpus.num_analysis_sessions is None:
|
||||
corpus.num_analysis_sessions = 0
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
if db_corpus.num_analysis_sessions is None:
|
||||
db_corpus.num_analysis_sessions = 0
|
||||
db.session.commit()
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||
db.session.commit()
|
||||
retry_counter = 20
|
||||
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
||||
retry_counter: int = 20
|
||||
while db_corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
||||
if retry_counter == 0:
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
return {'code': 408, 'msg': 'Request Timeout'}
|
||||
socketio.sleep(3)
|
||||
retry_counter -= 1
|
||||
db.session.refresh(corpus)
|
||||
cqi_client = CQiClient(f'cqpserver_{corpus_id}')
|
||||
db.session.refresh(db_corpus)
|
||||
# cqi_client: CQiClient = CQiClient(f'cqpserver_{db_corpus_id}')
|
||||
cqpserver_container_name: str = f'cqpserver_{db_corpus_id}'
|
||||
cqpserver_container: Container = docker_client.containers.get(cqpserver_container_name)
|
||||
cqpserver_host: str = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
|
||||
cqi_client: CQiClient = CQiClient(cqpserver_host)
|
||||
session['cqi_over_sio'] = {
|
||||
'corpus_id': corpus_id,
|
||||
'cqi_client': cqi_client,
|
||||
'cqi_client_lock': Lock(),
|
||||
'db_corpus_id': db_corpus_id
|
||||
}
|
||||
# return {'code': 200, 'msg': 'OK'}
|
||||
return {'code': 200, 'msg': 'OK'}
|
||||
|
||||
|
||||
@socketio.on('disconnect', namespace=NAMESPACE)
|
||||
def disconnect():
|
||||
@socketio_login_required
|
||||
def on_exec(self, fn_name: str, fn_args: Dict = {}):
|
||||
try:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||
except KeyError:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
if fn_name in CQI_API_FUNCTION_NAMES:
|
||||
fn: Callable = getattr(cqi_client.api, fn_name)
|
||||
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
|
||||
fn: Callable = getattr(extensions, fn_name)
|
||||
else:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
for param in signature(fn).parameters.values():
|
||||
if param.default is param.empty:
|
||||
if param.name not in fn_args:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
else:
|
||||
if param.name not in fn_args:
|
||||
continue
|
||||
if type(fn_args[param.name]) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
cqi_client_lock.acquire()
|
||||
try:
|
||||
fn_return_value = fn(**fn_args)
|
||||
except BrokenPipeError as e:
|
||||
return {'code': 500, 'msg': 'Internal Server Error'}
|
||||
except CQiException as e:
|
||||
return {
|
||||
'code': 502,
|
||||
'msg': 'Bad Gateway',
|
||||
'payload': {
|
||||
'code': e.code,
|
||||
'desc': e.description,
|
||||
'msg': e.__class__.__name__
|
||||
}
|
||||
}
|
||||
finally:
|
||||
cqi_client_lock.release()
|
||||
if isinstance(fn_return_value, CQiStatus):
|
||||
payload = {
|
||||
'code': fn_return_value.code,
|
||||
'msg': fn_return_value.__class__.__name__
|
||||
}
|
||||
else:
|
||||
payload = fn_return_value
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
def on_disconnect(self):
|
||||
try:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||
except KeyError:
|
||||
return
|
||||
cqi_client_lock.acquire()
|
||||
try:
|
||||
session.pop('cqi_over_sio')
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
cqi_client.api.ctrl_bye()
|
||||
except (BrokenPipeError, CQiException):
|
||||
pass
|
||||
cqi_client_lock.release()
|
||||
corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
|
||||
if db_corpus is None:
|
||||
return
|
||||
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
session.pop('cqi_over_sio')
|
||||
# return {'code': 200, 'msg': 'OK'}
|
||||
|
@ -1,114 +0,0 @@
|
||||
from cqi import CQiClient
|
||||
from cqi.errors import CQiException
|
||||
from cqi.status import CQiStatus
|
||||
from flask import session
|
||||
from inspect import signature
|
||||
from threading import Lock
|
||||
from typing import Callable, Dict, List
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
|
||||
|
||||
CQI_API_FUNCTIONS: List[str] = [
|
||||
'ask_feature_cl_2_3',
|
||||
'ask_feature_cqi_1_0',
|
||||
'ask_feature_cqp_2_3',
|
||||
'cl_alg2cpos',
|
||||
'cl_attribute_size',
|
||||
'cl_cpos2alg',
|
||||
'cl_cpos2id',
|
||||
'cl_cpos2lbound',
|
||||
'cl_cpos2rbound',
|
||||
'cl_cpos2str',
|
||||
'cl_cpos2struc',
|
||||
'cl_drop_attribute',
|
||||
'cl_id2cpos',
|
||||
'cl_id2freq',
|
||||
'cl_id2str',
|
||||
'cl_idlist2cpos',
|
||||
'cl_lexicon_size',
|
||||
'cl_regex2id',
|
||||
'cl_str2id',
|
||||
'cl_struc2cpos',
|
||||
'cl_struc2str',
|
||||
'corpus_alignment_attributes',
|
||||
'corpus_charset',
|
||||
'corpus_drop_corpus',
|
||||
'corpus_full_name',
|
||||
'corpus_info',
|
||||
'corpus_list_corpora',
|
||||
'corpus_positional_attributes',
|
||||
'corpus_properties',
|
||||
'corpus_structural_attribute_has_values',
|
||||
'corpus_structural_attributes',
|
||||
'cqp_drop_subcorpus',
|
||||
'cqp_dump_subcorpus',
|
||||
'cqp_fdist_1',
|
||||
'cqp_fdist_2',
|
||||
'cqp_list_subcorpora',
|
||||
'cqp_query',
|
||||
'cqp_subcorpus_has_field',
|
||||
'cqp_subcorpus_size',
|
||||
'ctrl_bye',
|
||||
'ctrl_connect',
|
||||
'ctrl_last_general_error',
|
||||
'ctrl_ping',
|
||||
'ctrl_user_abort'
|
||||
]
|
||||
|
||||
|
||||
@socketio.on('cqi_client.api', namespace=ns)
|
||||
@socketio_login_required
|
||||
def cqi_over_sio(fn_data):
|
||||
try:
|
||||
fn_name: str = fn_data['fn_name']
|
||||
if fn_name not in CQI_API_FUNCTIONS:
|
||||
raise KeyError
|
||||
except KeyError:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
fn_name: str = fn_data['fn_name']
|
||||
fn_args: Dict = fn_data.get('fn_args', {})
|
||||
try:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||
except KeyError:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
fn: Callable = getattr(cqi_client.api, fn_name)
|
||||
for param in signature(fn).parameters.values():
|
||||
if param.default is param.empty:
|
||||
if param.name not in fn_args:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
else:
|
||||
if param.name not in fn_args:
|
||||
continue
|
||||
if type(fn_args[param.name]) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
cqi_client_lock.acquire()
|
||||
try:
|
||||
return_value = fn(**fn_args)
|
||||
except BrokenPipeError:
|
||||
return_value = {
|
||||
'code': 500,
|
||||
'msg': 'Internal Server Error'
|
||||
}
|
||||
except CQiException as e:
|
||||
return_value = {
|
||||
'code': 502,
|
||||
'msg': 'Bad Gateway',
|
||||
'payload': {
|
||||
'code': e.code,
|
||||
'desc': e.description,
|
||||
'msg': e.__class__.__name__
|
||||
}
|
||||
}
|
||||
finally:
|
||||
cqi_client_lock.release()
|
||||
if isinstance(return_value, CQiStatus):
|
||||
payload = {
|
||||
'code': return_value.code,
|
||||
'msg': return_value.__class__.__name__
|
||||
}
|
||||
else:
|
||||
payload = return_value
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
287
app/corpora/cqi_over_sio/extensions.py
Normal file
@ -0,0 +1,287 @@
|
||||
from collections import Counter
|
||||
from cqi import CQiClient
|
||||
from cqi.models.corpora import Corpus as CQiCorpus
|
||||
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
|
||||
from cqi.models.attributes import (
|
||||
PositionalAttribute as CQiPositionalAttribute,
|
||||
StructuralAttribute as CQiStructuralAttribute
|
||||
)
|
||||
from cqi.status import StatusOk as CQiStatusOk
|
||||
from flask import session
|
||||
from typing import Dict, List
|
||||
import gzip
|
||||
import json
|
||||
import math
|
||||
from app import db
|
||||
from app.models import Corpus
|
||||
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
|
||||
|
||||
|
||||
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
|
||||
'ext_corpus_update_db',
|
||||
'ext_corpus_static_data',
|
||||
'ext_corpus_paginate_corpus',
|
||||
'ext_cqp_paginate_subcorpus',
|
||||
'ext_cqp_partial_export_subcorpus',
|
||||
'ext_cqp_export_subcorpus',
|
||||
]
|
||||
|
||||
|
||||
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
|
||||
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
|
||||
db_corpus.num_tokens = cqi_corpus.size
|
||||
db.session.commit()
|
||||
return CQiStatusOk()
|
||||
|
||||
|
||||
def ext_corpus_static_data(corpus: str) -> Dict:
|
||||
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
|
||||
|
||||
static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz'
|
||||
if static_data_file_path.exists():
|
||||
with static_data_file_path.open('rb') as f:
|
||||
return f.read()
|
||||
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
|
||||
cqi_p_attrs: List[CQiPositionalAttribute] = cqi_corpus.positional_attributes.list()
|
||||
cqi_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list()
|
||||
|
||||
static_data = {
|
||||
'corpus': {
|
||||
'bounds': [0, cqi_corpus.size - 1],
|
||||
'freqs': {}
|
||||
},
|
||||
'p_attrs': {},
|
||||
's_attrs': {},
|
||||
'values': {'p_attrs': {}, 's_attrs': {}}
|
||||
}
|
||||
|
||||
for p_attr in cqi_p_attrs:
|
||||
print(f'corpus.freqs.{p_attr.name}')
|
||||
static_data['corpus']['freqs'][p_attr.name] = []
|
||||
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
|
||||
static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list))
|
||||
del p_attr_id_list
|
||||
|
||||
print(f'p_attrs.{p_attr.name}')
|
||||
static_data['p_attrs'][p_attr.name] = []
|
||||
cpos_list: List[int] = list(range(cqi_corpus.size))
|
||||
static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list))
|
||||
del cpos_list
|
||||
|
||||
print(f'values.p_attrs.{p_attr.name}')
|
||||
static_data['values']['p_attrs'][p_attr.name] = []
|
||||
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
|
||||
static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list))
|
||||
del p_attr_id_list
|
||||
|
||||
for s_attr in cqi_s_attrs:
|
||||
if s_attr.has_values:
|
||||
continue
|
||||
|
||||
static_data['s_attrs'][s_attr.name] = {'lexicon': [], 'values': None}
|
||||
|
||||
if s_attr.name in ['s', 'ent']:
|
||||
##############################################################
|
||||
# A faster way to get cpos boundaries for smaller s_attrs #
|
||||
# Note: Needs more testing, don't use it in production #
|
||||
##############################################################
|
||||
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
|
||||
cqi_subcorpus: CQiSubcorpus = cqi_corpus.subcorpora.get('Last')
|
||||
first_match: int = 0
|
||||
last_match: int = cqi_subcorpus.size - 1
|
||||
match_boundaries = zip(
|
||||
range(first_match, last_match + 1),
|
||||
cqi_subcorpus.dump(
|
||||
cqi_subcorpus.fields['match'],
|
||||
first_match,
|
||||
last_match
|
||||
),
|
||||
cqi_subcorpus.dump(
|
||||
cqi_subcorpus.fields['matchend'],
|
||||
first_match,
|
||||
last_match
|
||||
)
|
||||
)
|
||||
cqi_subcorpus.drop()
|
||||
del cqi_subcorpus, first_match, last_match
|
||||
for id, lbound, rbound in match_boundaries:
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
|
||||
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||
del match_boundaries
|
||||
|
||||
if s_attr.name != 'text':
|
||||
continue
|
||||
|
||||
for id in range(0, s_attr.size):
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
|
||||
# This is a very slow operation, thats why we only use it for
|
||||
# the text attribute
|
||||
lbound, rbound = s_attr.cpos_by_id(id)
|
||||
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
||||
cpos_list: List[int] = list(range(lbound, rbound + 1))
|
||||
for p_attr in cqi_p_attrs:
|
||||
p_attr_ids: List[int] = []
|
||||
p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list))
|
||||
print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
|
||||
del p_attr_ids
|
||||
del cpos_list
|
||||
|
||||
sub_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
||||
print(f's_attrs.{s_attr.name}.values')
|
||||
static_data['s_attrs'][s_attr.name]['values'] = [
|
||||
sub_s_attr.name[(len(s_attr.name) + 1):]
|
||||
for sub_s_attr in sub_s_attrs
|
||||
]
|
||||
s_attr_id_list: List[int] = list(range(s_attr.size))
|
||||
sub_s_attr_values: List[str] = []
|
||||
for sub_s_attr in sub_s_attrs:
|
||||
tmp = []
|
||||
tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list))
|
||||
sub_s_attr_values.append(tmp)
|
||||
del tmp
|
||||
del s_attr_id_list
|
||||
print(f'values.s_attrs.{s_attr.name}')
|
||||
static_data['values']['s_attrs'][s_attr.name] = [
|
||||
{
|
||||
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id]
|
||||
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
||||
static_data['s_attrs'][s_attr.name]['values']
|
||||
)
|
||||
} for s_attr_id in range(0, s_attr.size)
|
||||
]
|
||||
del sub_s_attr_values
|
||||
print('Saving static data to file')
|
||||
with gzip.open(static_data_file_path, 'wt') as f:
|
||||
json.dump(static_data, f)
|
||||
del static_data
|
||||
print('Sending static data to client')
|
||||
with open(static_data_file_path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def ext_corpus_paginate_corpus(
|
||||
corpus: str,
|
||||
page: int = 1,
|
||||
per_page: int = 20
|
||||
) -> Dict:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus = cqi_client.corpora.get(corpus)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_corpus.size > 0
|
||||
and page > math.ceil(cqi_corpus.size / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
first_cpos = (page - 1) * per_page
|
||||
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
|
||||
cpos_list = [*range(first_cpos, last_cpos)]
|
||||
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = [cpos_list]
|
||||
# the lookups for the items
|
||||
payload['lookups'] = lookups
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_corpus.size
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return payload
|
||||
|
||||
|
||||
def ext_cqp_paginate_subcorpus(
|
||||
subcorpus: str,
|
||||
context: int = 50,
|
||||
page: int = 1,
|
||||
per_page: int = 20
|
||||
) -> Dict:
|
||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_subcorpus.size > 0
|
||||
and page > math.ceil(cqi_subcorpus.size / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
offset = (page - 1) * per_page
|
||||
cutoff = per_page
|
||||
cqi_results_export = export_subcorpus(
|
||||
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = cqi_results_export.pop('matches')
|
||||
# the lookups for the items
|
||||
payload['lookups'] = cqi_results_export
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_subcorpus.size
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return payload
|
||||
|
||||
|
||||
def ext_cqp_partial_export_subcorpus(
|
||||
subcorpus: str,
|
||||
match_id_list: list,
|
||||
context: int = 50
|
||||
) -> Dict:
|
||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
||||
return cqi_subcorpus_partial_export
|
||||
|
||||
|
||||
def ext_cqp_export_subcorpus(
|
||||
subcorpus: str,
|
||||
context: int = 50
|
||||
) -> Dict:
|
||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
||||
return cqi_subcorpus_export
|
131
app/corpora/cqi_over_sio/utils.py
Normal file
@ -0,0 +1,131 @@
|
||||
from cqi.models.corpora import Corpus as CQiCorpus
|
||||
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
def lookups_by_cpos(corpus: CQiCorpus, cpos_list: List[int]) -> Dict:
|
||||
lookups = {}
|
||||
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
||||
for attr in corpus.positional_attributes.list():
|
||||
cpos_attr_values: List[str] = attr.values_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
|
||||
for attr in corpus.structural_attributes.list():
|
||||
# We only want to iterate over non subattributes, identifiable by
|
||||
# attr.has_values == False
|
||||
if attr.has_values:
|
||||
continue
|
||||
cpos_attr_ids: List[int] = attr.ids_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
if cpos_attr_ids[i] == -1:
|
||||
continue
|
||||
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
|
||||
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
||||
if len(occured_attr_ids) == 0:
|
||||
continue
|
||||
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
||||
if len(subattrs) == 0:
|
||||
continue
|
||||
lookup_name: str = f'{attr.name}_lookup'
|
||||
lookups[lookup_name] = {}
|
||||
for attr_id in occured_attr_ids:
|
||||
lookups[lookup_name][attr_id] = {}
|
||||
for subattr in subattrs:
|
||||
subattr_name = subattr.name[(len(attr.name) + 1):] # noqa
|
||||
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
||||
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
||||
return lookups
|
||||
|
||||
|
||||
def partial_export_subcorpus(
|
||||
subcorpus: CQiSubcorpus,
|
||||
match_id_list: List[int],
|
||||
context: int = 25
|
||||
) -> Dict:
|
||||
if subcorpus.size == 0:
|
||||
return {"matches": []}
|
||||
match_boundaries = []
|
||||
for match_id in match_id_list:
|
||||
if match_id < 0 or match_id >= subcorpus.size:
|
||||
continue
|
||||
match_boundaries.append(
|
||||
(
|
||||
match_id,
|
||||
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
|
||||
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
|
||||
)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_boundary in match_boundaries:
|
||||
match_num, match_start, match_end = match_boundary
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.size - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
||||
|
||||
|
||||
def export_subcorpus(
|
||||
subcorpus: CQiSubcorpus,
|
||||
context: int = 25,
|
||||
cutoff: float = float('inf'),
|
||||
offset: int = 0
|
||||
) -> Dict:
|
||||
if subcorpus.size == 0:
|
||||
return {"matches": []}
|
||||
first_match = max(0, offset)
|
||||
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
|
||||
match_boundaries = zip(
|
||||
range(first_match, last_match + 1),
|
||||
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
|
||||
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_num, match_start, match_end in match_boundaries:
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.size - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
@ -1,115 +0,0 @@
|
||||
from flask import session
|
||||
from flask_login import current_user
|
||||
from flask_socketio import ConnectionRefusedError
|
||||
from threading import Lock
|
||||
import cqi
|
||||
from app import db, hashids, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus, CorpusStatus
|
||||
|
||||
|
||||
'''
|
||||
This package tunnels the Corpus Query interface (CQi) protocol through
|
||||
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
|
||||
|
||||
This module only handles the SIO connect/disconnect, which handles the setup
|
||||
and teardown of necessary ressources for later use. Each CQi function has a
|
||||
corresponding SIO event. The event handlers are spread across the different
|
||||
modules within this package.
|
||||
|
||||
Basic concept:
|
||||
1. A client connects to the SIO namespace and provides the id of a corpus to be
|
||||
analysed.
|
||||
1.1 The analysis session counter of the corpus is incremented.
|
||||
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
|
||||
1.3 Wait until the CQP server is running.
|
||||
1.4 Connect the CQiClient to the server.
|
||||
1.5 Save the CQiClient and the Lock in the session for subsequential use.
|
||||
2. A client emits an event and may provide a single json object with necessary
|
||||
arguments for the targeted CQi function.
|
||||
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
|
||||
- The event handler function defines all arguments. Hence the client
|
||||
is sent as a single json object, the decorator decomposes it to fit
|
||||
the functions signature. This also includes type checking and proper
|
||||
use of the lock (acquire/release) mechanism.
|
||||
4. Wait for more events
|
||||
5. The client disconnects from the SIO namespace
|
||||
1.1 The analysis session counter of the corpus is decremented.
|
||||
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
||||
'''
|
||||
|
||||
|
||||
NAMESPACE = '/corpora/corpus/corpus_analysis'
|
||||
|
||||
|
||||
# Import all CQi over Socket.IO event handlers
|
||||
from .cqi_corpora_corpus_subcorpora import * # noqa
|
||||
from .cqi_corpora_corpus_structural_attributes import * # noqa
|
||||
from .cqi_corpora_corpus_positional_attributes import * # noqa
|
||||
from .cqi_corpora_corpus_alignment_attributes import * # noqa
|
||||
from .cqi_corpora_corpus import * # noqa
|
||||
from .cqi_corpora import * # noqa
|
||||
from .cqi import * # noqa
|
||||
|
||||
|
||||
@socketio.on('connect', namespace=NAMESPACE)
|
||||
@socketio_login_required
|
||||
def connect(auth):
|
||||
# the auth variable is used in a hacky way. It contains the corpus id for
|
||||
# which a corpus analysis session should be started.
|
||||
corpus_id = hashids.decode(auth['corpus_id'])
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
# return {'code': 404, 'msg': 'Not Found'}
|
||||
raise ConnectionRefusedError('Not Found')
|
||||
if not (corpus.user == current_user
|
||||
or current_user.is_following_corpus(corpus)
|
||||
or current_user.is_administrator()):
|
||||
# return {'code': 403, 'msg': 'Forbidden'}
|
||||
raise ConnectionRefusedError('Forbidden')
|
||||
if corpus.status not in [
|
||||
CorpusStatus.BUILT,
|
||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||
]:
|
||||
# return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
raise ConnectionRefusedError('Failed Dependency')
|
||||
if corpus.num_analysis_sessions is None:
|
||||
corpus.num_analysis_sessions = 0
|
||||
db.session.commit()
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||
db.session.commit()
|
||||
retry_counter = 20
|
||||
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
||||
if retry_counter == 0:
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
return {'code': 408, 'msg': 'Request Timeout'}
|
||||
socketio.sleep(3)
|
||||
retry_counter -= 1
|
||||
db.session.refresh(corpus)
|
||||
cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}')
|
||||
session['d'] = {
|
||||
'corpus_id': corpus_id,
|
||||
'cqi_client': cqi_client,
|
||||
'cqi_client_lock': Lock(),
|
||||
}
|
||||
# return {'code': 200, 'msg': 'OK'}
|
||||
|
||||
|
||||
@socketio.on('disconnect', namespace=NAMESPACE)
|
||||
def disconnect():
|
||||
if 'd' not in session:
|
||||
return
|
||||
session['d']['cqi_client_lock'].acquire()
|
||||
try:
|
||||
session['d']['cqi_client'].disconnect()
|
||||
except (BrokenPipeError, cqi.errors.CQiException):
|
||||
pass
|
||||
session['d']['cqi_client_lock'].release()
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
session.pop('d')
|
||||
# return {'code': 200, 'msg': 'OK'}
|
@ -1,43 +0,0 @@
|
||||
from socket import gaierror
|
||||
import cqi
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
|
||||
|
||||
@socketio.on('cqi.connect', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_connect(cqi_client: cqi.CQiClient):
|
||||
try:
|
||||
cqi_status = cqi_client.connect()
|
||||
except gaierror as e:
|
||||
return {
|
||||
'code': 500,
|
||||
'msg': 'Internal Server Error',
|
||||
'payload': {'code': e.args[0], 'desc': e.args[1]}
|
||||
}
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.disconnect', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_disconnect(cqi_client: cqi.CQiClient):
|
||||
cqi_status = cqi_client.disconnect()
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.ping', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_ping(cqi_client: cqi.CQiClient):
|
||||
cqi_status = cqi_client.ping()
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@ -1,22 +0,0 @@
|
||||
import cqi
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = {**cqi_corpus.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_list(cqi_client: cqi.CQiClient):
|
||||
payload = [{**x.attrs} for x in cqi_client.corpora.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@ -1,199 +0,0 @@
|
||||
from collections import Counter
|
||||
from flask import session
|
||||
import cqi
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
from app import db, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio, lookups_by_cpos
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_status = cqi_corpus.drop()
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_status = cqi_corpus.query(subcorpus_name, query)
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
###############################################################################
|
||||
# nopaque specific CQi extensions #
|
||||
###############################################################################
|
||||
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
corpus.num_tokens = cqi_corpus.size
|
||||
db.session.commit()
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.get_visualization_data', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_get_visualization_data(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
visualization_data_file_path = os.path.join(corpus.path, 'cwb', 'visualization_data.json')
|
||||
if os.path.exists(visualization_data_file_path):
|
||||
with open(visualization_data_file_path, 'r') as f:
|
||||
payload = json.load(f)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
##########################################################################
|
||||
# A faster way to get cpos boundaries for smaller s_attrs #
|
||||
##########################################################################
|
||||
# cqi_corpus.query('Last', '<s> []* </s>;')
|
||||
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
|
||||
# print(cqi_subcorpus.size)
|
||||
# first_match = 0
|
||||
# last_match = cqi_subcorpus.attrs['size'] - 1
|
||||
# match_boundaries = zip(
|
||||
# list(range(first_match, last_match + 1)),
|
||||
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
|
||||
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
||||
# )
|
||||
# for x in match_boundaries:
|
||||
# print(x)
|
||||
cqi_p_attrs = {
|
||||
p_attr.name: p_attr
|
||||
for p_attr in cqi_corpus.positional_attributes.list()
|
||||
}
|
||||
cqi_s_attrs = {
|
||||
s_attr.name: s_attr
|
||||
for s_attr in cqi_corpus.structural_attributes.list()
|
||||
}
|
||||
payload = {
|
||||
'corpus': {
|
||||
'bounds': [0, cqi_corpus.size - 1],
|
||||
'counts': {
|
||||
'token': cqi_corpus.size
|
||||
},
|
||||
'freqs': {}
|
||||
},
|
||||
'p_attrs': {},
|
||||
's_attrs': {},
|
||||
'values': {'p_attrs': {}, 's_attrs': {}}
|
||||
}
|
||||
for p_attr in cqi_p_attrs.values():
|
||||
payload['corpus']['freqs'][p_attr.name] = dict(
|
||||
zip(
|
||||
range(0, p_attr.lexicon_size),
|
||||
p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
|
||||
)
|
||||
)
|
||||
payload['p_attrs'][p_attr.name] = dict(
|
||||
zip(
|
||||
range(0, cqi_corpus.size),
|
||||
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
|
||||
)
|
||||
)
|
||||
payload['values']['p_attrs'][p_attr.name] = dict(
|
||||
zip(
|
||||
range(0, p_attr.lexicon_size),
|
||||
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
|
||||
)
|
||||
)
|
||||
for s_attr in cqi_s_attrs.values():
|
||||
if s_attr.has_values:
|
||||
continue
|
||||
payload['corpus']['counts'][s_attr.name] = s_attr.size
|
||||
payload['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
|
||||
payload['values']['s_attrs'][s_attr.name] = {}
|
||||
for id in range(0, s_attr.size):
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id] = {}
|
||||
lbound, rbound = s_attr.cpos_by_id(id)
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
|
||||
if s_attr.name not in ['text', 's']:
|
||||
continue
|
||||
cpos_range = range(lbound, rbound + 1)
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
|
||||
if s_attr.name != 'text':
|
||||
continue
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
||||
for p_attr in cqi_p_attrs.values():
|
||||
payload['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
|
||||
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
||||
s_attr_value_names = [
|
||||
sub_s_attr.name[(len(s_attr.name) + 1):]
|
||||
for sub_s_attr in sub_s_attrs
|
||||
]
|
||||
sub_s_attr_values = [
|
||||
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
|
||||
for sub_s_attr in sub_s_attrs
|
||||
]
|
||||
payload['s_attrs'][s_attr.name]['values'] = s_attr_value_names
|
||||
payload['values']['s_attrs'][s_attr.name] = {
|
||||
s_attr_id: {
|
||||
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
|
||||
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
||||
payload['s_attrs'][s_attr.name]['values']
|
||||
)
|
||||
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
|
||||
}
|
||||
with open(visualization_data_file_path, 'w') as f:
|
||||
json.dump(payload, f)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_corpus.size > 0
|
||||
and page > math.ceil(cqi_corpus.size / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
first_cpos = (page - 1) * per_page
|
||||
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
|
||||
cpos_list = [*range(first_cpos, last_cpos)]
|
||||
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = [cpos_list]
|
||||
# the lookups for the items
|
||||
payload['lookups'] = lookups
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_corpus.size
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@ -1,24 +0,0 @@
|
||||
import cqi
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
|
||||
payload = {**cqi_alignment_attribute.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@ -1,24 +0,0 @@
|
||||
import cqi
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
|
||||
payload = {**cqi_positional_attribute.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@ -1,24 +0,0 @@
|
||||
import cqi
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
|
||||
payload = {**cqi_structural_attribute.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@ -1,140 +0,0 @@
|
||||
import cqi
|
||||
import math
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
payload = {**cqi_subcorpus.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_status = cqi_subcorpus.drop()
|
||||
payload = {'code': cqi_status.code,
|
||||
'msg': cqi_status.__class__.__name__}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, field: int, first: int, last: int): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
payload = cqi_subcorpus.dump(field, first, last)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_name: str, positional_attribute_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
field = cqi_subcorpus.fields[field_name]
|
||||
pos_attr = cqi_corpus.positional_attributes.get(positional_attribute_name)
|
||||
payload = cqi_subcorpus.fdist_1(cutoff, field, pos_attr)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, cutoff: int, field_1_name: str, positional_attribute_1_name: str, field_2_name: str, positional_attribute_2_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
field_1 = cqi_subcorpus.fields[field_1_name]
|
||||
pos_attr_1 = cqi_corpus.positional_attributes.get(positional_attribute_1_name)
|
||||
field_2 = cqi_subcorpus.fields[field_2_name]
|
||||
pos_attr_2 = cqi_corpus.positional_attributes.get(positional_attribute_2_name)
|
||||
payload = cqi_subcorpus.fdist_2(cutoff, field_1, pos_attr_1, field_2, pos_attr_2)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
###############################################################################
|
||||
# nopaque specific CQi extensions #
|
||||
###############################################################################
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_subcorpus.attrs['size'] > 0
|
||||
and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
offset = (page - 1) * per_page
|
||||
cutoff = per_page
|
||||
cqi_results_export = export_subcorpus(
|
||||
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = cqi_results_export.pop('matches')
|
||||
# the lookups for the items
|
||||
payload['lookups'] = cqi_results_export
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_subcorpus.attrs['size']
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}
|
@ -1,178 +0,0 @@
|
||||
from flask import session
|
||||
from functools import wraps
|
||||
from inspect import signature
|
||||
import cqi
|
||||
|
||||
|
||||
def cqi_over_socketio(f):
|
||||
@wraps(f)
|
||||
def wrapped(*args):
|
||||
if 'd' not in session:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
f_args = {}
|
||||
# Check for missing args and if all provided args are of the right type
|
||||
for param in signature(f).parameters.values():
|
||||
if param.name == 'corpus_name':
|
||||
f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}'
|
||||
continue
|
||||
if param.name == 'cqi_client':
|
||||
f_args[param.name] = session['d']['cqi_client']
|
||||
continue
|
||||
if param.default is param.empty:
|
||||
# args
|
||||
if param.name not in args[0]:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
arg = args[0][param.name]
|
||||
if type(arg) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
f_args[param.name] = arg
|
||||
else:
|
||||
# kwargs
|
||||
if param.name not in args[0]:
|
||||
continue
|
||||
arg = args[0][param.name]
|
||||
if type(arg) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
f_args[param.name] = arg
|
||||
session['d']['cqi_client_lock'].acquire()
|
||||
try:
|
||||
return_value = f(**f_args)
|
||||
except BrokenPipeError:
|
||||
return_value = {
|
||||
'code': 500,
|
||||
'msg': 'Internal Server Error'
|
||||
}
|
||||
except cqi.errors.CQiException as e:
|
||||
return_value = {
|
||||
'code': 500,
|
||||
'msg': 'Internal Server Error',
|
||||
'payload': {
|
||||
'code': e.code,
|
||||
'desc': e.description,
|
||||
'msg': e.__class__.__name__
|
||||
}
|
||||
}
|
||||
finally:
|
||||
session['d']['cqi_client_lock'].release()
|
||||
return return_value
|
||||
return wrapped
|
||||
|
||||
|
||||
def lookups_by_cpos(corpus, cpos_list):
|
||||
lookups = {}
|
||||
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
||||
for attr in corpus.positional_attributes.list():
|
||||
cpos_attr_values = attr.values_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
|
||||
cpos_attr_values[i]
|
||||
for attr in corpus.structural_attributes.list():
|
||||
# We only want to iterate over non subattributes, identifiable by
|
||||
# attr.attrs['has_values'] == False
|
||||
if attr.attrs['has_values']:
|
||||
continue
|
||||
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
if cpos_attr_ids[i] == -1:
|
||||
continue
|
||||
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
|
||||
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
||||
if not occured_attr_ids:
|
||||
continue
|
||||
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
||||
if not subattrs:
|
||||
continue
|
||||
lookup_name = f'{attr.attrs["name"]}_lookup'
|
||||
lookups[lookup_name] = {}
|
||||
for attr_id in occured_attr_ids:
|
||||
lookups[lookup_name][attr_id] = {}
|
||||
for subattr in subattrs:
|
||||
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
|
||||
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
||||
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
||||
return lookups
|
||||
|
||||
|
||||
def partial_export_subcorpus(subcorpus, match_id_list, context=25):
|
||||
if subcorpus.attrs['size'] == 0:
|
||||
return {"matches": []}
|
||||
match_boundaries = []
|
||||
for match_id in match_id_list:
|
||||
if match_id < 0 or match_id >= subcorpus.attrs['size']:
|
||||
continue
|
||||
match_boundaries.append(
|
||||
(
|
||||
match_id,
|
||||
subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0],
|
||||
subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0]
|
||||
)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_boundary in match_boundaries:
|
||||
match_num, match_start, match_end = match_boundary
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.attrs['size'] - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
||||
|
||||
|
||||
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
|
||||
if subcorpus.attrs['size'] == 0:
|
||||
return {"matches": []}
|
||||
first_match = max(0, offset)
|
||||
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
|
||||
match_boundaries = zip(
|
||||
list(range(first_match, last_match + 1)),
|
||||
subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match),
|
||||
subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_num, match_start, match_end in match_boundaries:
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.attrs['size'] - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
@ -7,7 +7,6 @@ from flask import (
|
||||
url_for
|
||||
)
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
import os
|
||||
from app import db
|
||||
from app.models import Corpus, CorpusFile, CorpusStatus
|
||||
from ..decorators import corpus_follower_permission_required
|
||||
@ -92,8 +91,8 @@ def corpus_file(corpus_id, corpus_file_id):
|
||||
def download_corpus_file(corpus_id, corpus_file_id):
|
||||
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||
return send_from_directory(
|
||||
os.path.dirname(corpus_file.path),
|
||||
os.path.basename(corpus_file.path),
|
||||
corpus_file.path.parent,
|
||||
corpus_file.path.name,
|
||||
as_attachment=True,
|
||||
attachment_filename=corpus_file.filename,
|
||||
mimetype=corpus_file.mimetype
|
||||
|
@ -12,65 +12,65 @@ from ..decorators import corpus_follower_permission_required
|
||||
from . import bp
|
||||
|
||||
|
||||
# @bp.route('/<hashid:corpus_id>/followers', methods=['POST'])
|
||||
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||
# @content_negotiation(consumes='application/json', produces='application/json')
|
||||
# def create_corpus_followers(corpus_id):
|
||||
# usernames = request.json
|
||||
# if not (isinstance(usernames, list) or all(isinstance(u, str) for u in usernames)):
|
||||
# abort(400)
|
||||
# corpus = Corpus.query.get_or_404(corpus_id)
|
||||
# for username in usernames:
|
||||
# user = User.query.filter_by(username=username, is_public=True).first_or_404()
|
||||
# user.follow_corpus(corpus)
|
||||
# db.session.commit()
|
||||
# response_data = {
|
||||
# 'message': f'Users are now following "{corpus.title}"',
|
||||
# 'category': 'corpus'
|
||||
# }
|
||||
# return response_data, 200
|
||||
@bp.route('/<hashid:corpus_id>/followers', methods=['POST'])
|
||||
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def create_corpus_followers(corpus_id):
|
||||
usernames = request.json
|
||||
if not (isinstance(usernames, list) or all(isinstance(u, str) for u in usernames)):
|
||||
abort(400)
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
for username in usernames:
|
||||
user = User.query.filter_by(username=username, is_public=True).first_or_404()
|
||||
user.follow_corpus(corpus)
|
||||
db.session.commit()
|
||||
response_data = {
|
||||
'message': f'Users are now following "{corpus.title}"',
|
||||
'category': 'corpus'
|
||||
}
|
||||
return response_data, 200
|
||||
|
||||
|
||||
# @bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>/role', methods=['PUT'])
|
||||
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||
# @content_negotiation(consumes='application/json', produces='application/json')
|
||||
# def update_corpus_follower_role(corpus_id, follower_id):
|
||||
# role_name = request.json
|
||||
# if not isinstance(role_name, str):
|
||||
# abort(400)
|
||||
# cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||
# if cfr is None:
|
||||
# abort(400)
|
||||
# cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
|
||||
# cfa.role = cfr
|
||||
# db.session.commit()
|
||||
# response_data = {
|
||||
# 'message': f'User "{cfa.follower.username}" is now {cfa.role.name}',
|
||||
# 'category': 'corpus'
|
||||
# }
|
||||
# return response_data, 200
|
||||
@bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>/role', methods=['PUT'])
|
||||
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def update_corpus_follower_role(corpus_id, follower_id):
|
||||
role_name = request.json
|
||||
if not isinstance(role_name, str):
|
||||
abort(400)
|
||||
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||
if cfr is None:
|
||||
abort(400)
|
||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
|
||||
cfa.role = cfr
|
||||
db.session.commit()
|
||||
response_data = {
|
||||
'message': f'User "{cfa.follower.username}" is now {cfa.role.name}',
|
||||
'category': 'corpus'
|
||||
}
|
||||
return response_data, 200
|
||||
|
||||
|
||||
# @bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>', methods=['DELETE'])
|
||||
# def delete_corpus_follower(corpus_id, follower_id):
|
||||
# cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
|
||||
# if not (
|
||||
# current_user.id == follower_id
|
||||
# or current_user == cfa.corpus.user
|
||||
# or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
|
||||
# or current_user.is_administrator()):
|
||||
# abort(403)
|
||||
# if current_user.id == follower_id:
|
||||
# flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')
|
||||
# response = make_response()
|
||||
# response.status_code = 204
|
||||
# else:
|
||||
# response_data = {
|
||||
# 'message': f'"{cfa.follower.username}" is not following "{cfa.corpus.title}" anymore',
|
||||
# 'category': 'corpus'
|
||||
# }
|
||||
# response = jsonify(response_data)
|
||||
# response.status_code = 200
|
||||
# cfa.follower.unfollow_corpus(cfa.corpus)
|
||||
# db.session.commit()
|
||||
# return response
|
||||
@bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>', methods=['DELETE'])
|
||||
def delete_corpus_follower(corpus_id, follower_id):
|
||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
|
||||
if not (
|
||||
current_user.id == follower_id
|
||||
or current_user == cfa.corpus.user
|
||||
or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
if current_user.id == follower_id:
|
||||
flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')
|
||||
response = make_response()
|
||||
response.status_code = 204
|
||||
else:
|
||||
response_data = {
|
||||
'message': f'"{cfa.follower.username}" is not following "{cfa.corpus.title}" anymore',
|
||||
'category': 'corpus'
|
||||
}
|
||||
response = jsonify(response_data)
|
||||
response.status_code = 200
|
||||
cfa.follower.unfollow_corpus(cfa.corpus)
|
||||
db.session.commit()
|
||||
return response
|
||||
|
@ -7,6 +7,8 @@ from app.decorators import content_negotiation
|
||||
from app.models import Corpus, CorpusFollowerRole
|
||||
from . import bp
|
||||
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
|
||||
import nltk
|
||||
from string import punctuation
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
|
||||
@ -56,56 +58,68 @@ def build_corpus(corpus_id):
|
||||
}
|
||||
return response_data, 202
|
||||
|
||||
@bp.route('/stopwords')
|
||||
@content_negotiation(produces='application/json')
|
||||
def get_stopwords():
|
||||
nltk.download('stopwords', quiet=True)
|
||||
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
|
||||
stopwords = {}
|
||||
for language in languages:
|
||||
stopwords[language] = nltk.corpus.stopwords.words(language)
|
||||
stopwords['punctuation'] = list(punctuation) + ['—', '|', '–', '“', '„', '--']
|
||||
stopwords['user_stopwords'] = []
|
||||
response_data = stopwords
|
||||
return response_data, 202
|
||||
|
||||
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
|
||||
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||
# @content_negotiation(consumes='application/json', produces='application/json')
|
||||
# def generate_corpus_share_link(corpus_id):
|
||||
# data = request.json
|
||||
# if not isinstance(data, dict):
|
||||
# abort(400)
|
||||
# expiration = data.get('expiration')
|
||||
# if not isinstance(expiration, str):
|
||||
# abort(400)
|
||||
# role_name = data.get('role')
|
||||
# if not isinstance(role_name, str):
|
||||
# abort(400)
|
||||
# expiration_date = datetime.strptime(expiration, '%b %d, %Y')
|
||||
# cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||
# if cfr is None:
|
||||
# abort(400)
|
||||
# corpus = Corpus.query.get_or_404(corpus_id)
|
||||
# token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date)
|
||||
# corpus_share_link = url_for(
|
||||
# 'corpora.follow_corpus',
|
||||
# corpus_id=corpus_id,
|
||||
# token=token,
|
||||
# _external=True
|
||||
# )
|
||||
# response_data = {
|
||||
# 'message': 'Corpus share link generated',
|
||||
# 'category': 'corpus',
|
||||
# 'corpusShareLink': corpus_share_link
|
||||
# }
|
||||
# return response_data, 200
|
||||
@bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
|
||||
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def generate_corpus_share_link(corpus_id):
|
||||
data = request.json
|
||||
if not isinstance(data, dict):
|
||||
abort(400)
|
||||
expiration = data.get('expiration')
|
||||
if not isinstance(expiration, str):
|
||||
abort(400)
|
||||
role_name = data.get('role')
|
||||
if not isinstance(role_name, str):
|
||||
abort(400)
|
||||
expiration_date = datetime.strptime(expiration, '%b %d, %Y')
|
||||
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||
if cfr is None:
|
||||
abort(400)
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date)
|
||||
corpus_share_link = url_for(
|
||||
'corpora.follow_corpus',
|
||||
corpus_id=corpus_id,
|
||||
token=token,
|
||||
_external=True
|
||||
)
|
||||
response_data = {
|
||||
'message': 'Corpus share link generated',
|
||||
'category': 'corpus',
|
||||
'corpusShareLink': corpus_share_link
|
||||
}
|
||||
return response_data, 200
|
||||
|
||||
|
||||
|
||||
# @bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
|
||||
# @corpus_owner_or_admin_required
|
||||
# @content_negotiation(consumes='application/json', produces='application/json')
|
||||
# def update_corpus_is_public(corpus_id):
|
||||
# is_public = request.json
|
||||
# if not isinstance(is_public, bool):
|
||||
# abort(400)
|
||||
# corpus = Corpus.query.get_or_404(corpus_id)
|
||||
# corpus.is_public = is_public
|
||||
# db.session.commit()
|
||||
# response_data = {
|
||||
# 'message': (
|
||||
# f'Corpus "{corpus.title}" is now'
|
||||
# f' {"public" if is_public else "private"}'
|
||||
# ),
|
||||
# 'category': 'corpus'
|
||||
# }
|
||||
# return response_data, 200
|
||||
@bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
|
||||
@corpus_owner_or_admin_required
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def update_corpus_is_public(corpus_id):
|
||||
is_public = request.json
|
||||
if not isinstance(is_public, bool):
|
||||
abort(400)
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
corpus.is_public = is_public
|
||||
db.session.commit()
|
||||
response_data = {
|
||||
'message': (
|
||||
f'Corpus "{corpus.title}" is now'
|
||||
f' {"public" if is_public else "private"}'
|
||||
),
|
||||
'category': 'corpus'
|
||||
}
|
||||
return response_data, 200
|
||||
|
@ -68,20 +68,19 @@ def corpus(corpus_id):
|
||||
corpus=corpus,
|
||||
cfr=cfr,
|
||||
cfrs=cfrs,
|
||||
users = users
|
||||
users=users
|
||||
)
|
||||
if (current_user.is_following_corpus(corpus) or corpus.is_public):
|
||||
abort(404)
|
||||
# cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all()
|
||||
# return render_template(
|
||||
# 'corpora/public_corpus.html.j2',
|
||||
# title=corpus.title,
|
||||
# corpus=corpus,
|
||||
# cfrs=cfrs,
|
||||
# cfr=cfr,
|
||||
# cfas=cfas,
|
||||
# users = users
|
||||
# )
|
||||
cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all()
|
||||
return render_template(
|
||||
'corpora/public_corpus.html.j2',
|
||||
title=corpus.title,
|
||||
corpus=corpus,
|
||||
cfrs=cfrs,
|
||||
cfr=cfr,
|
||||
cfas=cfas,
|
||||
users=users
|
||||
)
|
||||
abort(403)
|
||||
|
||||
|
||||
@ -98,14 +97,14 @@ def analysis(corpus_id):
|
||||
)
|
||||
|
||||
|
||||
# @bp.route('/<hashid:corpus_id>/follow/<token>')
|
||||
# def follow_corpus(corpus_id, token):
|
||||
# corpus = Corpus.query.get_or_404(corpus_id)
|
||||
# if current_user.follow_corpus_by_token(token):
|
||||
# db.session.commit()
|
||||
# flash(f'You are following "{corpus.title}" now', category='corpus')
|
||||
# return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
||||
# abort(403)
|
||||
@bp.route('/<hashid:corpus_id>/follow/<token>')
|
||||
def follow_corpus(corpus_id, token):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if current_user.follow_corpus_by_token(token):
|
||||
db.session.commit()
|
||||
flash(f'You are following "{corpus.title}" now', category='corpus')
|
||||
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
||||
abort(403)
|
||||
|
||||
|
||||
@bp.route('/import', methods=['GET', 'POST'])
|
||||
|
@ -28,24 +28,24 @@ def _create_build_corpus_service(corpus):
|
||||
''' ## Command ## '''
|
||||
command = ['bash', '-c']
|
||||
command.append(
|
||||
f'mkdir /corpora/data/nopaque_{corpus.id}'
|
||||
f'mkdir /corpora/data/nopaque-{corpus.hashid.lower()}'
|
||||
' && '
|
||||
'cwb-encode'
|
||||
' -c utf8'
|
||||
f' -d /corpora/data/nopaque_{corpus.id}'
|
||||
f' -d /corpora/data/nopaque-{corpus.hashid.lower()}'
|
||||
' -f /root/files/corpus.vrt'
|
||||
f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}'
|
||||
f' -R /usr/local/share/cwb/registry/nopaque-{corpus.hashid.lower()}'
|
||||
' -P pos -P lemma -P simple_pos'
|
||||
' -S ent:0+type -S s:0'
|
||||
' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
|
||||
' -xsB -9'
|
||||
' && '
|
||||
f'cwb-make -V NOPAQUE_{corpus.id}'
|
||||
f'cwb-make -V NOPAQUE-{corpus.hashid.upper()}'
|
||||
)
|
||||
''' ## Constraints ## '''
|
||||
constraints = ['node.role==worker']
|
||||
''' ## Image ## '''
|
||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
|
||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
|
||||
''' ## Labels ## '''
|
||||
labels = {
|
||||
'origin': current_app.config['SERVER_NAME'],
|
||||
@ -139,21 +139,25 @@ def _create_cqpserver_container(corpus):
|
||||
''' ## Entrypoint ## '''
|
||||
entrypoint = ['bash', '-c']
|
||||
''' ## Image ## '''
|
||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
|
||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
|
||||
''' ## Name ## '''
|
||||
name = f'cqpserver_{corpus.id}'
|
||||
''' ## Network ## '''
|
||||
network = f'{current_app.config["DOCKER_NETWORK_NAME"]}'
|
||||
network = f'{current_app.config["NOPAQUE_DOCKER_NETWORK_NAME"]}'
|
||||
''' ## Volumes ## '''
|
||||
volumes = []
|
||||
''' ### Corpus data volume ### '''
|
||||
data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
|
||||
data_volume_target = '/corpora/data'
|
||||
# data_volume_source = os.path.join(corpus.path, 'cwb', 'data', f'nopaque_{corpus.id}')
|
||||
# data_volume_target = f'/corpora/data/nopaque_{corpus.hashid.lower()}'
|
||||
data_volume = f'{data_volume_source}:{data_volume_target}:rw'
|
||||
volumes.append(data_volume)
|
||||
''' ### Corpus registry volume ### '''
|
||||
registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
|
||||
registry_volume_target = '/usr/local/share/cwb/registry'
|
||||
# registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry', f'nopaque_{corpus.id}')
|
||||
# registry_volume_target = f'/usr/local/share/cwb/registry/nopaque_{corpus.hashid.lower()}'
|
||||
registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
|
||||
volumes.append(registry_volume)
|
||||
# Check if a cqpserver container already exists. If this is the case,
|
||||
|
2
app/ext/flask_sqlalchemy/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
from .container_column import ContainerColumn
|
||||
from .int_enum_column import IntEnumColumn
|
21
app/ext/flask_sqlalchemy/container_column.py
Normal file
@ -0,0 +1,21 @@
|
||||
import json
|
||||
from app import db
|
||||
|
||||
|
||||
class ContainerColumn(db.TypeDecorator):
|
||||
impl = db.String
|
||||
|
||||
def __init__(self, container_type, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.container_type = container_type
|
||||
|
||||
def process_bind_param(self, value, dialect):
|
||||
if isinstance(value, self.container_type):
|
||||
return json.dumps(value)
|
||||
elif isinstance(value, str) and isinstance(json.loads(value), self.container_type):
|
||||
return value
|
||||
else:
|
||||
return TypeError()
|
||||
|
||||
def process_result_value(self, value, dialect):
|
||||
return json.loads(value)
|
22
app/ext/flask_sqlalchemy/int_enum_column.py
Normal file
@ -0,0 +1,22 @@
|
||||
from app import db
|
||||
|
||||
|
||||
class IntEnumColumn(db.TypeDecorator):
|
||||
impl = db.Integer
|
||||
|
||||
def __init__(self, enum_type, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.enum_type = enum_type
|
||||
|
||||
def process_bind_param(self, value, dialect):
|
||||
if isinstance(value, self.enum_type) and isinstance(value.value, int):
|
||||
return value.value
|
||||
elif isinstance(value, int):
|
||||
return self.enum_type(value).value
|
||||
elif isinstance(value, str):
|
||||
return self.enum_type[value].value
|
||||
else:
|
||||
return TypeError()
|
||||
|
||||
def process_result_value(self, value, dialect):
|
||||
return self.enum_type(value)
|
@ -1,7 +1,6 @@
|
||||
from flask import abort, current_app
|
||||
from flask_login import current_user
|
||||
from threading import Thread
|
||||
import os
|
||||
from app import db
|
||||
from app.decorators import admin_required, content_negotiation
|
||||
from app.models import Job, JobStatus
|
||||
@ -39,10 +38,9 @@ def job_log(job_id):
|
||||
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
||||
response = {'errors': {'message': 'Job status is not completed or failed'}}
|
||||
return response, 409
|
||||
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
|
||||
with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
|
||||
log = log_file.read()
|
||||
response_data = {
|
||||
'message': '',
|
||||
'jobLog': log
|
||||
}
|
||||
return response_data, 200
|
||||
|
@ -7,7 +7,6 @@ from flask import (
|
||||
)
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from flask_login import current_user
|
||||
import os
|
||||
from app.models import Job, JobInput, JobResult
|
||||
from . import bp
|
||||
from .utils import job_dynamic_list_constructor as job_dlc
|
||||
@ -38,8 +37,8 @@ def download_job_input(job_id, job_input_id):
|
||||
if not (job_input.job.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return send_from_directory(
|
||||
os.path.dirname(job_input.path),
|
||||
os.path.basename(job_input.path),
|
||||
job_input.path.parent,
|
||||
job_input.path.name,
|
||||
as_attachment=True,
|
||||
attachment_filename=job_input.filename,
|
||||
mimetype=job_input.mimetype
|
||||
@ -52,8 +51,8 @@ def download_job_result(job_id, job_result_id):
|
||||
if not (job_result.job.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return send_from_directory(
|
||||
os.path.dirname(job_result.path),
|
||||
os.path.basename(job_result.path),
|
||||
job_result.path.parent,
|
||||
job_result.path.name,
|
||||
as_attachment=True,
|
||||
attachment_filename=job_result.filename,
|
||||
mimetype=job_result.mimetype
|
||||
|
@ -1,6 +1,7 @@
|
||||
from flask import current_app
|
||||
from flask_migrate import upgrade
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from app.models import (
|
||||
CorpusFollowerRole,
|
||||
Role,
|
||||
@ -17,16 +18,15 @@ def deploy():
|
||||
# Make default directories
|
||||
print('Make default directories')
|
||||
base_dir = current_app.config['NOPAQUE_DATA_DIR']
|
||||
default_dirs = [
|
||||
os.path.join(base_dir, 'tmp'),
|
||||
os.path.join(base_dir, 'users')
|
||||
default_dirs: List[Path] = [
|
||||
base_dir / 'tmp',
|
||||
base_dir / 'users'
|
||||
]
|
||||
for dir in default_dirs:
|
||||
if os.path.exists(dir):
|
||||
if not os.path.isdir(dir):
|
||||
raise NotADirectoryError(f'{dir} is not a directory')
|
||||
else:
|
||||
os.mkdir(dir)
|
||||
for default_dir in default_dirs:
|
||||
if not default_dir.exists():
|
||||
default_dir.mkdir()
|
||||
if not default_dir.is_dir():
|
||||
raise NotADirectoryError(f'{default_dir} is not a directory')
|
||||
|
||||
# migrate database to latest revision
|
||||
print('Migrate database to latest revision')
|
||||
@ -43,3 +43,5 @@ def deploy():
|
||||
SpaCyNLPPipelineModel.insert_defaults()
|
||||
print('Insert/Update default TesseractOCRPipelineModels')
|
||||
TesseractOCRPipelineModel.insert_defaults()
|
||||
|
||||
# TODO: Implement checks for if the nopaque network exists
|
||||
|
@ -45,12 +45,6 @@ def dashboard():
|
||||
)
|
||||
|
||||
|
||||
# @bp.route('/user_manual')
|
||||
# @register_breadcrumb(bp, '.user_manual', '<i class="material-icons left">help</i>User manual')
|
||||
# def user_manual():
|
||||
# return render_template('main/user_manual.html.j2', title='User manual')
|
||||
|
||||
|
||||
@bp.route('/news')
|
||||
@register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News')
|
||||
def news():
|
||||
@ -78,15 +72,17 @@ def terms_of_use():
|
||||
)
|
||||
|
||||
|
||||
# @bp.route('/social-area')
|
||||
# @register_breadcrumb(bp, '.social_area', '<i class="material-icons left">group</i>Social Area')
|
||||
# @login_required
|
||||
# def social_area():
|
||||
# corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
|
||||
# users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
|
||||
# return render_template(
|
||||
# 'main/social_area.html.j2',
|
||||
# title='Social Area',
|
||||
# corpora=corpora,
|
||||
# users=users
|
||||
# )
|
||||
@bp.route('/social-area')
|
||||
@register_breadcrumb(bp, '.social_area', '<i class="material-icons left">group</i>Social Area')
|
||||
@login_required
|
||||
def social_area():
|
||||
print('test')
|
||||
corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
|
||||
print(corpora)
|
||||
users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
|
||||
return render_template(
|
||||
'main/social_area.html.j2',
|
||||
title='Social Area',
|
||||
corpora=corpora,
|
||||
users=users
|
||||
)
|
||||
|
1810
app/models.py
19
app/models/__init__.py
Normal file
@ -0,0 +1,19 @@
|
||||
from .avatar import *
|
||||
from .corpus_file import *
|
||||
from .corpus_follower_association import *
|
||||
from .corpus_follower_role import *
|
||||
from .corpus import *
|
||||
from .job_input import *
|
||||
from .job_result import *
|
||||
from .job import *
|
||||
from .role import *
|
||||
from .spacy_nlp_pipeline_model import *
|
||||
from .tesseract_ocr_pipeline_model import *
|
||||
from .token import *
|
||||
from .user import *
|
||||
from app import login
|
||||
|
||||
|
||||
@login.user_loader
|
||||
def load_user(user_id):
|
||||
return User.query.get(int(user_id))
|
40
app/models/avatar.py
Normal file
@ -0,0 +1,40 @@
|
||||
from flask import current_app
|
||||
from flask_hashids import HashidMixin
|
||||
from pathlib import Path
|
||||
from app import db
|
||||
from .file_mixin import FileMixin
|
||||
|
||||
|
||||
class Avatar(HashidMixin, FileMixin, db.Model):
|
||||
__tablename__ = 'avatars'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
||||
# Relationships
|
||||
user = db.relationship('User', back_populates='avatar')
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return self.user.path / 'avatar'
|
||||
# return os.path.join(self.user.path, 'avatar')
|
||||
|
||||
def delete(self):
|
||||
try:
|
||||
self.path.unlink(missing_ok=True)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
raise
|
||||
db.session.delete(self)
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
**self.file_mixin_to_json_serializeable()
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
200
app/models/corpus.py
Normal file
@ -0,0 +1,200 @@
|
||||
from datetime import datetime
|
||||
from enum import IntEnum
|
||||
from flask import current_app, url_for
|
||||
from flask_hashids import HashidMixin
|
||||
from sqlalchemy.ext.associationproxy import association_proxy
|
||||
from typing import Union
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import xml.etree.ElementTree as ET
|
||||
from app import db
|
||||
from app.converters.vrt import normalize_vrt_file
|
||||
from app.ext.flask_sqlalchemy import IntEnumColumn
|
||||
from .corpus_follower_association import CorpusFollowerAssociation
|
||||
|
||||
|
||||
class CorpusStatus(IntEnum):
|
||||
UNPREPARED = 1
|
||||
SUBMITTED = 2
|
||||
QUEUED = 3
|
||||
BUILDING = 4
|
||||
BUILT = 5
|
||||
FAILED = 6
|
||||
STARTING_ANALYSIS_SESSION = 7
|
||||
RUNNING_ANALYSIS_SESSION = 8
|
||||
CANCELING_ANALYSIS_SESSION = 9
|
||||
|
||||
@staticmethod
|
||||
def get(corpus_status: Union['CorpusStatus', int, str]) -> 'CorpusStatus':
|
||||
if isinstance(corpus_status, CorpusStatus):
|
||||
return corpus_status
|
||||
if isinstance(corpus_status, int):
|
||||
return CorpusStatus(corpus_status)
|
||||
if isinstance(corpus_status, str):
|
||||
return CorpusStatus[corpus_status]
|
||||
raise TypeError('corpus_status must be CorpusStatus, int, or str')
|
||||
|
||||
|
||||
class Corpus(HashidMixin, db.Model):
|
||||
'''
|
||||
Class to define a corpus.
|
||||
'''
|
||||
__tablename__ = 'corpora'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
||||
# Fields
|
||||
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
|
||||
description = db.Column(db.String(255))
|
||||
status = db.Column(
|
||||
IntEnumColumn(CorpusStatus),
|
||||
default=CorpusStatus.UNPREPARED
|
||||
)
|
||||
title = db.Column(db.String(32))
|
||||
num_analysis_sessions = db.Column(db.Integer, default=0)
|
||||
num_tokens = db.Column(db.Integer, default=0)
|
||||
is_public = db.Column(db.Boolean, default=False)
|
||||
# Relationships
|
||||
files = db.relationship(
|
||||
'CorpusFile',
|
||||
back_populates='corpus',
|
||||
lazy='dynamic',
|
||||
cascade='all, delete-orphan'
|
||||
)
|
||||
corpus_follower_associations = db.relationship(
|
||||
'CorpusFollowerAssociation',
|
||||
back_populates='corpus',
|
||||
cascade='all, delete-orphan'
|
||||
)
|
||||
followers = association_proxy(
|
||||
'corpus_follower_associations',
|
||||
'follower',
|
||||
creator=lambda u: CorpusFollowerAssociation(follower=u)
|
||||
)
|
||||
user = db.relationship('User', back_populates='corpora')
|
||||
# "static" attributes
|
||||
max_num_tokens = 2_147_483_647
|
||||
|
||||
def __repr__(self):
|
||||
return f'<Corpus {self.title}>'
|
||||
|
||||
@property
|
||||
def analysis_url(self):
|
||||
return url_for('corpora.analysis', corpus_id=self.id)
|
||||
|
||||
@property
|
||||
def jsonpatch_path(self):
|
||||
return f'{self.user.jsonpatch_path}/corpora/{self.hashid}'
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return self.user.path / 'corpora' / f'{self.id}'
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return url_for('corpora.corpus', corpus_id=self.id)
|
||||
|
||||
@property
|
||||
def user_hashid(self):
|
||||
return self.user.hashid
|
||||
|
||||
@staticmethod
|
||||
def create(**kwargs):
|
||||
corpus = Corpus(**kwargs)
|
||||
db.session.add(corpus)
|
||||
db.session.flush(objects=[corpus])
|
||||
db.session.refresh(corpus)
|
||||
corpus_files_dir = corpus.path / 'files'
|
||||
corpus_cwb_dir = corpus.path / 'cwb'
|
||||
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
|
||||
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
|
||||
try:
|
||||
corpus.path.mkdir()
|
||||
corpus_files_dir.mkdir()
|
||||
corpus_cwb_dir.mkdir()
|
||||
corpus_cwb_data_dir.mkdir()
|
||||
corpus_cwb_registry_dir.mkdir()
|
||||
except OSError as e:
|
||||
# TODO: Potential leftover cleanup
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise
|
||||
return corpus
|
||||
|
||||
def build(self):
|
||||
corpus_cwb_dir = self.path / 'cwb'
|
||||
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
|
||||
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
|
||||
try:
|
||||
shutil.rmtree(corpus_cwb_dir, ignore_errors=True)
|
||||
corpus_cwb_dir.mkdir()
|
||||
corpus_cwb_data_dir.mkdir()
|
||||
corpus_cwb_registry_dir.mkdir()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
self.status = CorpusStatus.FAILED
|
||||
raise
|
||||
corpus_element = ET.fromstring('<corpus>\n</corpus>')
|
||||
for corpus_file in self.files:
|
||||
normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt'
|
||||
try:
|
||||
normalize_vrt_file(corpus_file.path, normalized_vrt_path)
|
||||
except:
|
||||
self.status = CorpusStatus.FAILED
|
||||
return
|
||||
element_tree = ET.parse(normalized_vrt_path)
|
||||
text_element = element_tree.getroot()
|
||||
text_element.set('author', corpus_file.author)
|
||||
text_element.set('title', corpus_file.title)
|
||||
text_element.set(
|
||||
'publishing_year',
|
||||
f'{corpus_file.publishing_year}'
|
||||
)
|
||||
text_element.set('address', corpus_file.address or 'NULL')
|
||||
text_element.set('booktitle', corpus_file.booktitle or 'NULL')
|
||||
text_element.set('chapter', corpus_file.chapter or 'NULL')
|
||||
text_element.set('editor', corpus_file.editor or 'NULL')
|
||||
text_element.set('institution', corpus_file.institution or 'NULL')
|
||||
text_element.set('journal', corpus_file.journal or 'NULL')
|
||||
text_element.set('pages', f'{corpus_file.pages}' or 'NULL')
|
||||
text_element.set('publisher', corpus_file.publisher or 'NULL')
|
||||
text_element.set('school', corpus_file.school or 'NULL')
|
||||
text_element.tail = '\n'
|
||||
# corpus_element.insert(1, text_element)
|
||||
corpus_element.append(text_element)
|
||||
ET.ElementTree(corpus_element).write(
|
||||
corpus_cwb_dir / 'corpus.vrt',
|
||||
encoding='utf-8'
|
||||
)
|
||||
self.status = CorpusStatus.SUBMITTED
|
||||
|
||||
def delete(self):
|
||||
shutil.rmtree(self.path, ignore_errors=True)
|
||||
db.session.delete(self)
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'creation_date': f'{self.creation_date.isoformat()}Z',
|
||||
'description': self.description,
|
||||
'max_num_tokens': self.max_num_tokens,
|
||||
'num_analysis_sessions': self.num_analysis_sessions,
|
||||
'num_tokens': self.num_tokens,
|
||||
'status': self.status.name,
|
||||
'title': self.title,
|
||||
'is_public': self.is_public
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
json_serializeable['corpus_follower_associations'] = {
|
||||
x.hashid: x.to_json_serializeable()
|
||||
for x in self.corpus_follower_associations
|
||||
}
|
||||
json_serializeable['files'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.files
|
||||
}
|
||||
return json_serializeable
|
102
app/models/corpus_file.py
Normal file
@ -0,0 +1,102 @@
|
||||
from flask import current_app, url_for
|
||||
from flask_hashids import HashidMixin
|
||||
from pathlib import Path
|
||||
from app import db
|
||||
from .corpus import CorpusStatus
|
||||
from .file_mixin import FileMixin
|
||||
|
||||
|
||||
class CorpusFile(FileMixin, HashidMixin, db.Model):
|
||||
__tablename__ = 'corpus_files'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
|
||||
# Fields
|
||||
author = db.Column(db.String(255))
|
||||
description = db.Column(db.String(255))
|
||||
publishing_year = db.Column(db.Integer)
|
||||
title = db.Column(db.String(255))
|
||||
address = db.Column(db.String(255))
|
||||
booktitle = db.Column(db.String(255))
|
||||
chapter = db.Column(db.String(255))
|
||||
editor = db.Column(db.String(255))
|
||||
institution = db.Column(db.String(255))
|
||||
journal = db.Column(db.String(255))
|
||||
pages = db.Column(db.String(255))
|
||||
publisher = db.Column(db.String(255))
|
||||
school = db.Column(db.String(255))
|
||||
# Relationships
|
||||
corpus = db.relationship(
|
||||
'Corpus',
|
||||
back_populates='files'
|
||||
)
|
||||
|
||||
@property
|
||||
def download_url(self):
|
||||
return url_for(
|
||||
'corpora.download_corpus_file',
|
||||
corpus_id=self.corpus_id,
|
||||
corpus_file_id=self.id
|
||||
)
|
||||
|
||||
@property
|
||||
def jsonpatch_path(self):
|
||||
return f'{self.corpus.jsonpatch_path}/files/{self.hashid}'
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return self.corpus.path / 'files' / f'{self.id}'
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return url_for(
|
||||
'corpora.corpus_file',
|
||||
corpus_id=self.corpus_id,
|
||||
corpus_file_id=self.id
|
||||
)
|
||||
|
||||
@property
|
||||
def user_hashid(self):
|
||||
return self.corpus.user.hashid
|
||||
|
||||
@property
|
||||
def user_id(self):
|
||||
return self.corpus.user_id
|
||||
|
||||
def delete(self):
|
||||
try:
|
||||
self.path.unlink(missing_ok=True)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
raise
|
||||
db.session.delete(self)
|
||||
self.corpus.status = CorpusStatus.UNPREPARED
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'address': self.address,
|
||||
'author': self.author,
|
||||
'description': self.description,
|
||||
'booktitle': self.booktitle,
|
||||
'chapter': self.chapter,
|
||||
'editor': self.editor,
|
||||
'institution': self.institution,
|
||||
'journal': self.journal,
|
||||
'pages': self.pages,
|
||||
'publisher': self.publisher,
|
||||
'publishing_year': self.publishing_year,
|
||||
'school': self.school,
|
||||
'title': self.title,
|
||||
**self.file_mixin_to_json_serializeable(
|
||||
backrefs=backrefs,
|
||||
relationships=relationships
|
||||
)
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['corpus'] = \
|
||||
self.corpus.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
47
app/models/corpus_follower_association.py
Normal file
@ -0,0 +1,47 @@
|
||||
from flask_hashids import HashidMixin
|
||||
from app import db
|
||||
from .corpus_follower_role import CorpusFollowerRole
|
||||
|
||||
|
||||
class CorpusFollowerAssociation(HashidMixin, db.Model):
|
||||
__tablename__ = 'corpus_follower_associations'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
|
||||
follower_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
||||
role_id = db.Column(db.Integer, db.ForeignKey('corpus_follower_roles.id'))
|
||||
# Relationships
|
||||
corpus = db.relationship(
|
||||
'Corpus',
|
||||
back_populates='corpus_follower_associations'
|
||||
)
|
||||
follower = db.relationship(
|
||||
'User',
|
||||
back_populates='corpus_follower_associations'
|
||||
)
|
||||
role = db.relationship(
|
||||
'CorpusFollowerRole',
|
||||
back_populates='corpus_follower_associations'
|
||||
)
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if 'role' not in kwargs:
|
||||
kwargs['role'] = CorpusFollowerRole.query.filter_by(default=True).first()
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
return f'<CorpusFollowerAssociation {self.follower.__repr__()} ~ {self.role.__repr__()} ~ {self.corpus.__repr__()}>'
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'corpus': self.corpus.to_json_serializeable(backrefs=True),
|
||||
'follower': self.follower.to_json_serializeable(),
|
||||
'role': self.role.to_json_serializeable()
|
||||
}
|
||||
if backrefs:
|
||||
pass
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
107
app/models/corpus_follower_role.py
Normal file
@ -0,0 +1,107 @@
|
||||
from flask_hashids import HashidMixin
|
||||
from enum import IntEnum
|
||||
from typing import Union
|
||||
from app import db
|
||||
|
||||
|
||||
class CorpusFollowerPermission(IntEnum):
|
||||
VIEW = 1
|
||||
MANAGE_FILES = 2
|
||||
MANAGE_FOLLOWERS = 4
|
||||
MANAGE_CORPUS = 8
|
||||
|
||||
@staticmethod
|
||||
def get(corpus_follower_permission: Union['CorpusFollowerPermission', int, str]) -> 'CorpusFollowerPermission':
|
||||
if isinstance(corpus_follower_permission, CorpusFollowerPermission):
|
||||
return corpus_follower_permission
|
||||
if isinstance(corpus_follower_permission, int):
|
||||
return CorpusFollowerPermission(corpus_follower_permission)
|
||||
if isinstance(corpus_follower_permission, str):
|
||||
return CorpusFollowerPermission[corpus_follower_permission]
|
||||
raise TypeError('corpus_follower_permission must be CorpusFollowerPermission, int, or str')
|
||||
|
||||
|
||||
class CorpusFollowerRole(HashidMixin, db.Model):
|
||||
__tablename__ = 'corpus_follower_roles'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Fields
|
||||
name = db.Column(db.String(64), unique=True)
|
||||
default = db.Column(db.Boolean, default=False, index=True)
|
||||
permissions = db.Column(db.Integer, default=0)
|
||||
# Relationships
|
||||
corpus_follower_associations = db.relationship(
|
||||
'CorpusFollowerAssociation',
|
||||
back_populates='role'
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f'<CorpusFollowerRole {self.name}>'
|
||||
|
||||
def has_permission(self, permission: Union[CorpusFollowerPermission, int, str]):
|
||||
perm = CorpusFollowerPermission.get(permission)
|
||||
return self.permissions & perm.value == perm.value
|
||||
|
||||
def add_permission(self, permission: Union[CorpusFollowerPermission, int, str]):
|
||||
perm = CorpusFollowerPermission.get(permission)
|
||||
if not self.has_permission(perm):
|
||||
self.permissions += perm.value
|
||||
|
||||
def remove_permission(self, permission: Union[CorpusFollowerPermission, int, str]):
|
||||
perm = CorpusFollowerPermission.get(permission)
|
||||
if self.has_permission(perm):
|
||||
self.permissions -= perm.value
|
||||
|
||||
def reset_permissions(self):
|
||||
self.permissions = 0
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'default': self.default,
|
||||
'name': self.name,
|
||||
'permissions': [
|
||||
x.name
|
||||
for x in CorpusFollowerPermission
|
||||
if self.has_permission(x)
|
||||
]
|
||||
}
|
||||
if backrefs:
|
||||
pass
|
||||
if relationships:
|
||||
json_serializeable['corpus_follower_association'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.corpus_follower_association
|
||||
}
|
||||
return json_serializeable
|
||||
|
||||
@staticmethod
|
||||
def insert_defaults():
|
||||
roles = {
|
||||
'Anonymous': [],
|
||||
'Viewer': [
|
||||
CorpusFollowerPermission.VIEW
|
||||
],
|
||||
'Contributor': [
|
||||
CorpusFollowerPermission.VIEW,
|
||||
CorpusFollowerPermission.MANAGE_FILES
|
||||
],
|
||||
'Administrator': [
|
||||
CorpusFollowerPermission.VIEW,
|
||||
CorpusFollowerPermission.MANAGE_FILES,
|
||||
CorpusFollowerPermission.MANAGE_FOLLOWERS,
|
||||
CorpusFollowerPermission.MANAGE_CORPUS
|
||||
|
||||
]
|
||||
}
|
||||
default_role_name = 'Viewer'
|
||||
for role_name, permissions in roles.items():
|
||||
role = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||
if role is None:
|
||||
role = CorpusFollowerRole(name=role_name)
|
||||
role.reset_permissions()
|
||||
for permission in permissions:
|
||||
role.add_permission(permission)
|
||||
role.default = role.name == default_role_name
|
||||
db.session.add(role)
|
||||
db.session.commit()
|
@ -9,6 +9,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Amharic'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
|
||||
@ -20,6 +21,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Arabic'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
|
||||
@ -31,6 +33,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Assamese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
|
||||
@ -42,6 +45,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Azerbaijani'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
|
||||
@ -53,6 +57,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Azerbaijani - Cyrillic'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
|
||||
@ -64,6 +69,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Belarusian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
|
||||
@ -75,6 +81,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Bengali'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
|
||||
@ -86,6 +93,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Tibetan'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
|
||||
@ -97,6 +105,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Bosnian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
|
||||
@ -108,6 +117,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Bulgarian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
|
||||
@ -119,6 +129,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Catalan; Valencian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
|
||||
@ -130,6 +141,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Cebuano'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
|
||||
@ -141,6 +153,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Czech'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
|
||||
@ -152,6 +165,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Chinese - Simplified'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
|
||||
@ -163,6 +177,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Chinese - Traditional'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
|
||||
@ -174,6 +189,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Cherokee'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
|
||||
@ -185,6 +201,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Welsh'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
|
||||
@ -196,6 +213,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Danish'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
|
||||
@ -207,6 +225,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'German'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
|
||||
@ -218,6 +237,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Dzongkha'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
|
||||
@ -229,6 +249,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Greek, Modern (1453-)'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
|
||||
@ -240,6 +261,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'English'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
|
||||
@ -251,6 +273,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'English, Middle (1100-1500)'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
|
||||
@ -262,6 +285,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Esperanto'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
|
||||
@ -273,6 +297,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Estonian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
|
||||
@ -284,6 +309,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Basque'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
|
||||
@ -295,6 +321,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Persian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
|
||||
@ -306,6 +333,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Finnish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
|
||||
@ -317,6 +345,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'French'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
|
||||
@ -328,6 +357,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'German Fraktur'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
|
||||
@ -339,6 +369,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'French, Middle (ca. 1400-1600)'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
|
||||
@ -350,6 +381,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Irish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
|
||||
@ -361,6 +393,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Galician'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
|
||||
@ -372,6 +405,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Greek, Ancient (-1453)'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
|
||||
@ -383,6 +417,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Gujarati'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
|
||||
@ -394,6 +429,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Haitian; Haitian Creole'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
|
||||
@ -405,6 +441,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Hebrew'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
|
||||
@ -416,6 +453,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Hindi'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
|
||||
@ -427,6 +465,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Croatian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
|
||||
@ -438,6 +477,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Hungarian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
|
||||
@ -449,6 +489,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Inuktitut'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
|
||||
@ -460,6 +501,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Indonesian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
|
||||
@ -471,6 +513,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Icelandic'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
|
||||
@ -482,6 +525,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Italian'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
|
||||
@ -493,6 +537,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'Italian - Old'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
|
||||
@ -504,6 +549,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Javanese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
|
||||
@ -515,6 +561,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Japanese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
|
||||
@ -526,6 +573,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Kannada'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
|
||||
@ -537,6 +585,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Georgian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
|
||||
@ -548,6 +597,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Georgian - Old'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
|
||||
@ -559,6 +609,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Kazakh'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
|
||||
@ -570,6 +621,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Central Khmer'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
|
||||
@ -581,6 +633,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Kirghiz; Kyrgyz'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
|
||||
@ -592,6 +645,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Korean'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
|
||||
@ -603,6 +657,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Kurdish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
|
||||
@ -614,6 +669,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Lao'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
|
||||
@ -625,6 +681,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Latin'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
|
||||
@ -636,6 +693,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Latvian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
|
||||
@ -647,6 +705,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Lithuanian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
|
||||
@ -658,6 +717,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Malayalam'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
|
||||
@ -669,6 +729,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Marathi'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
|
||||
@ -680,6 +741,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Macedonian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
|
||||
@ -691,6 +753,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Maltese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
|
||||
@ -702,6 +765,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Malay'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
|
||||
@ -713,6 +777,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Burmese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
|
||||
@ -724,6 +789,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Nepali'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
|
||||
@ -735,6 +801,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Dutch; Flemish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
|
||||
@ -746,6 +813,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Norwegian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
|
||||
@ -757,6 +825,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Oriya'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
|
||||
@ -768,6 +837,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Panjabi; Punjabi'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
|
||||
@ -779,6 +849,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Polish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
|
||||
@ -790,6 +861,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Portuguese'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
|
||||
@ -801,6 +873,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Pushto; Pashto'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
|
||||
@ -812,6 +885,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Romanian; Moldavian; Moldovan'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
|
||||
@ -823,6 +897,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Russian'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
|
||||
@ -834,6 +909,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Sanskrit'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
|
||||
@ -845,6 +921,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Sinhala; Sinhalese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
|
||||
@ -856,6 +933,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Slovak'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
|
||||
@ -867,6 +945,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Slovenian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
|
||||
@ -878,6 +957,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Spanish; Castilian'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
|
||||
@ -889,6 +969,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'Spanish; Castilian - Old'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
|
||||
@ -900,6 +981,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Albanian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
|
||||
@ -911,6 +993,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Serbian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
|
||||
@ -922,6 +1005,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Serbian - Latin'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
|
||||
@ -933,6 +1017,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Swahili'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
|
||||
@ -944,6 +1029,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Swedish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
|
||||
@ -955,6 +1041,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Syriac'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
|
||||
@ -966,6 +1053,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Tamil'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
|
||||
@ -977,6 +1065,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Telugu'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
|
||||
@ -988,6 +1077,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Tajik'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
|
||||
@ -999,6 +1089,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Tagalog'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
|
||||
@ -1010,6 +1101,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Thai'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
|
||||
@ -1021,6 +1113,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Tigrinya'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
|
||||
@ -1032,6 +1125,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Turkish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
|
||||
@ -1043,6 +1137,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Uighur; Uyghur'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
|
||||
@ -1054,6 +1149,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Ukrainian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
|
||||
@ -1065,6 +1161,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Urdu'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
|
||||
@ -1076,6 +1173,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Uzbek'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
|
||||
@ -1087,6 +1185,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Uzbek - Cyrillic'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
|
||||
@ -1098,6 +1197,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Vietnamese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
|
||||
@ -1109,6 +1209,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Yiddish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
|
||||
@ -1120,3 +1221,4 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
133
app/models/event_listeners.py
Normal file
@ -0,0 +1,133 @@
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from app import db, mail, socketio
|
||||
from app.email import create_message
|
||||
from .corpus_file import CorpusFile
|
||||
from .corpus_follower_association import CorpusFollowerAssociation
|
||||
from .corpus import Corpus
|
||||
from .job_input import JobInput
|
||||
from .job_result import JobResult
|
||||
from .job import Job, JobStatus
|
||||
from .spacy_nlp_pipeline_model import SpaCyNLPPipelineModel
|
||||
from .tesseract_ocr_pipeline_model import TesseractOCRPipelineModel
|
||||
from .user import UserSettingJobStatusMailNotificationLevel
|
||||
|
||||
|
||||
def register_event_listeners():
|
||||
resources = [
|
||||
Corpus,
|
||||
CorpusFile,
|
||||
Job,
|
||||
JobInput,
|
||||
JobResult,
|
||||
SpaCyNLPPipelineModel,
|
||||
TesseractOCRPipelineModel
|
||||
]
|
||||
|
||||
for resource in resources:
|
||||
db.event.listen(resource, 'after_delete', resource_after_delete)
|
||||
db.event.listen(resource, 'after_insert', resource_after_insert)
|
||||
db.event.listen(resource, 'after_update', resource_after_update)
|
||||
|
||||
db.event.listen(CorpusFollowerAssociation, 'after_delete', cfa_after_delete)
|
||||
db.event.listen(CorpusFollowerAssociation, 'after_insert', cfa_after_insert)
|
||||
|
||||
db.event.listen(Job, 'after_update', job_after_update)
|
||||
|
||||
|
||||
def resource_after_delete(mapper, connection, resource):
|
||||
jsonpatch = [
|
||||
{
|
||||
'op': 'remove',
|
||||
'path': resource.jsonpatch_path
|
||||
}
|
||||
]
|
||||
room = f'/users/{resource.user_hashid}'
|
||||
socketio.emit('PATCH', jsonpatch, room=room)
|
||||
|
||||
|
||||
def cfa_after_delete(mapper, connection, cfa):
|
||||
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
|
||||
jsonpatch = [
|
||||
{
|
||||
'op': 'remove',
|
||||
'path': jsonpatch_path
|
||||
}
|
||||
]
|
||||
room = f'/users/{cfa.corpus.user.hashid}'
|
||||
socketio.emit('PATCH', jsonpatch, room=room)
|
||||
|
||||
|
||||
def resource_after_insert(mapper, connection, resource):
|
||||
jsonpatch_value = resource.to_json_serializeable()
|
||||
for attr in mapper.relationships:
|
||||
jsonpatch_value[attr.key] = {}
|
||||
jsonpatch = [
|
||||
{
|
||||
'op': 'add',
|
||||
'path': resource.jsonpatch_path,
|
||||
'value': jsonpatch_value
|
||||
}
|
||||
]
|
||||
room = f'/users/{resource.user_hashid}'
|
||||
socketio.emit('PATCH', jsonpatch, room=room)
|
||||
|
||||
|
||||
def cfa_after_insert(mapper, connection, cfa):
|
||||
jsonpatch_value = cfa.to_json_serializeable()
|
||||
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
|
||||
jsonpatch = [
|
||||
{
|
||||
'op': 'add',
|
||||
'path': jsonpatch_path,
|
||||
'value': jsonpatch_value
|
||||
}
|
||||
]
|
||||
room = f'/users/{cfa.corpus.user.hashid}'
|
||||
socketio.emit('PATCH', jsonpatch, room=room)
|
||||
|
||||
|
||||
def resource_after_update(mapper, connection, resource):
|
||||
jsonpatch = []
|
||||
for attr in db.inspect(resource).attrs:
|
||||
if attr.key in mapper.relationships:
|
||||
continue
|
||||
if not attr.load_history().has_changes():
|
||||
continue
|
||||
jsonpatch_path = f'{resource.jsonpatch_path}/{attr.key}'
|
||||
if isinstance(attr.value, datetime):
|
||||
jsonpatch_value = f'{attr.value.isoformat()}Z'
|
||||
elif isinstance(attr.value, Enum):
|
||||
jsonpatch_value = attr.value.name
|
||||
else:
|
||||
jsonpatch_value = attr.value
|
||||
jsonpatch.append(
|
||||
{
|
||||
'op': 'replace',
|
||||
'path': jsonpatch_path,
|
||||
'value': jsonpatch_value
|
||||
}
|
||||
)
|
||||
if jsonpatch:
|
||||
room = f'/users/{resource.user_hashid}'
|
||||
socketio.emit('PATCH', jsonpatch, room=room)
|
||||
|
||||
|
||||
def job_after_update(mapper, connection, job):
|
||||
for attr in db.inspect(job).attrs:
|
||||
if attr.key != 'status':
|
||||
continue
|
||||
if not attr.load_history().has_changes():
|
||||
return
|
||||
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE:
|
||||
return
|
||||
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END:
|
||||
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
||||
return
|
||||
msg = create_message(
|
||||
job.user.email,
|
||||
f'Status update for your Job "{job.title}"',
|
||||
'tasks/email/notification',
|
||||
job=job
|
||||
)
|
||||
mail.send(msg)
|
40
app/models/file_mixin.py
Normal file
@ -0,0 +1,40 @@
|
||||
from datetime import datetime
|
||||
from flask import current_app
|
||||
from werkzeug.utils import secure_filename
|
||||
from app import db
|
||||
|
||||
|
||||
class FileMixin:
|
||||
'''
|
||||
Mixin for db.Model classes. All file related models should use this.
|
||||
'''
|
||||
creation_date = db.Column(db.DateTime, default=datetime.utcnow)
|
||||
filename = db.Column(db.String(255))
|
||||
mimetype = db.Column(db.String(255))
|
||||
|
||||
def file_mixin_to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
return {
|
||||
'creation_date': f'{self.creation_date.isoformat()}Z',
|
||||
'filename': self.filename,
|
||||
'mimetype': self.mimetype
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create(cls, file_storage, **kwargs):
|
||||
filename = kwargs.pop('filename', file_storage.filename)
|
||||
mimetype = kwargs.pop('mimetype', file_storage.mimetype)
|
||||
obj = cls(
|
||||
filename=secure_filename(filename),
|
||||
mimetype=mimetype,
|
||||
**kwargs
|
||||
)
|
||||
db.session.add(obj)
|
||||
db.session.flush(objects=[obj])
|
||||
db.session.refresh(obj)
|
||||
try:
|
||||
file_storage.save(obj.path)
|
||||
except (AttributeError, OSError) as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise e
|
||||
return obj
|
172
app/models/job.py
Normal file
@ -0,0 +1,172 @@
|
||||
from datetime import datetime
|
||||
from enum import IntEnum
|
||||
from flask import current_app, url_for
|
||||
from flask_hashids import HashidMixin
|
||||
from time import sleep
|
||||
from typing import Union
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
from app import db
|
||||
from app.ext.flask_sqlalchemy import ContainerColumn, IntEnumColumn
|
||||
|
||||
|
||||
class JobStatus(IntEnum):
|
||||
INITIALIZING = 1
|
||||
SUBMITTED = 2
|
||||
QUEUED = 3
|
||||
RUNNING = 4
|
||||
CANCELING = 5
|
||||
CANCELED = 6
|
||||
COMPLETED = 7
|
||||
FAILED = 8
|
||||
|
||||
@staticmethod
|
||||
def get(job_status: Union['JobStatus', int, str]) -> 'JobStatus':
|
||||
if isinstance(job_status, JobStatus):
|
||||
return job_status
|
||||
if isinstance(job_status, int):
|
||||
return JobStatus(job_status)
|
||||
if isinstance(job_status, str):
|
||||
return JobStatus[job_status]
|
||||
raise TypeError('job_status must be JobStatus, int, or str')
|
||||
|
||||
|
||||
class Job(HashidMixin, db.Model):
|
||||
'''
|
||||
Class to define Jobs.
|
||||
'''
|
||||
__tablename__ = 'jobs'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
||||
# Fields
|
||||
creation_date = \
|
||||
db.Column(db.DateTime(), default=datetime.utcnow)
|
||||
description = db.Column(db.String(255))
|
||||
end_date = db.Column(db.DateTime())
|
||||
service = db.Column(db.String(64))
|
||||
service_args = db.Column(ContainerColumn(dict, 255))
|
||||
service_version = db.Column(db.String(16))
|
||||
status = db.Column(
|
||||
IntEnumColumn(JobStatus),
|
||||
default=JobStatus.INITIALIZING
|
||||
)
|
||||
title = db.Column(db.String(32))
|
||||
# Relationships
|
||||
inputs = db.relationship(
|
||||
'JobInput',
|
||||
back_populates='job',
|
||||
cascade='all, delete-orphan',
|
||||
lazy='dynamic'
|
||||
)
|
||||
results = db.relationship(
|
||||
'JobResult',
|
||||
back_populates='job',
|
||||
cascade='all, delete-orphan',
|
||||
lazy='dynamic'
|
||||
)
|
||||
user = db.relationship(
|
||||
'User',
|
||||
back_populates='jobs'
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f'<Job {self.title}>'
|
||||
|
||||
@property
|
||||
def jsonpatch_path(self):
|
||||
return f'{self.user.jsonpatch_path}/jobs/{self.hashid}'
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return self.user.path / 'jobs' / f'{self.id}'
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return url_for('jobs.job', job_id=self.id)
|
||||
|
||||
@property
|
||||
def user_hashid(self):
|
||||
return self.user.hashid
|
||||
|
||||
@staticmethod
|
||||
def create(**kwargs):
|
||||
job = Job(**kwargs)
|
||||
db.session.add(job)
|
||||
db.session.flush(objects=[job])
|
||||
db.session.refresh(job)
|
||||
job_inputs_dir = job.path / 'inputs'
|
||||
job_pipeline_data_dir = job.path / 'pipeline_data'
|
||||
job_results_dir = job.path / 'results'
|
||||
try:
|
||||
job.path.mkdir()
|
||||
job_inputs_dir.mkdir()
|
||||
job_pipeline_data_dir.mkdir()
|
||||
job_results_dir.mkdir()
|
||||
except OSError as e:
|
||||
# TODO: Potential leftover cleanup
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise
|
||||
return job
|
||||
|
||||
def delete(self):
|
||||
''' Delete the job and its inputs and results from the database. '''
|
||||
if self.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: # noqa
|
||||
self.status = JobStatus.CANCELING
|
||||
db.session.commit()
|
||||
while self.status != JobStatus.CANCELED:
|
||||
# In case the daemon handled a job in any way
|
||||
if self.status != JobStatus.CANCELING:
|
||||
self.status = JobStatus.CANCELING
|
||||
db.session.commit()
|
||||
sleep(1)
|
||||
db.session.refresh(self)
|
||||
try:
|
||||
shutil.rmtree(self.path)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise e
|
||||
db.session.delete(self)
|
||||
|
||||
def restart(self):
|
||||
''' Restart a job - only if the status is failed '''
|
||||
if self.status != JobStatus.FAILED:
|
||||
raise Exception('Job status is not "failed"')
|
||||
shutil.rmtree(self.path / 'results', ignore_errors=True)
|
||||
shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True)
|
||||
for result in self.results:
|
||||
db.session.delete(result)
|
||||
self.end_date = None
|
||||
self.status = JobStatus.SUBMITTED
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'creation_date': f'{self.creation_date.isoformat()}Z',
|
||||
'description': self.description,
|
||||
'end_date': (
|
||||
None if self.end_date is None
|
||||
else f'{self.end_date.isoformat()}Z'
|
||||
),
|
||||
'service': self.service,
|
||||
'service_args': self.service_args,
|
||||
'service_version': self.service_version,
|
||||
'status': self.status.name,
|
||||
'title': self.title
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
json_serializeable['inputs'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.inputs
|
||||
}
|
||||
json_serializeable['results'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.results
|
||||
}
|
||||
return json_serializeable
|
65
app/models/job_input.py
Normal file
@ -0,0 +1,65 @@
|
||||
from flask import url_for
|
||||
from flask_hashids import HashidMixin
|
||||
from pathlib import Path
|
||||
from app import db
|
||||
from .file_mixin import FileMixin
|
||||
|
||||
|
||||
class JobInput(FileMixin, HashidMixin, db.Model):
|
||||
__tablename__ = 'job_inputs'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
|
||||
# Relationships
|
||||
job = db.relationship(
|
||||
'Job',
|
||||
back_populates='inputs'
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f'<JobInput {self.filename}>'
|
||||
|
||||
@property
|
||||
def content_url(self):
|
||||
return url_for(
|
||||
'jobs.download_job_input',
|
||||
job_id=self.job.id,
|
||||
job_input_id=self.id
|
||||
)
|
||||
|
||||
@property
|
||||
def jsonpatch_path(self):
|
||||
return f'{self.job.jsonpatch_path}/inputs/{self.hashid}'
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return self.job.path / 'inputs' / f'{self.id}'
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return url_for(
|
||||
'jobs.job',
|
||||
job_id=self.job_id,
|
||||
_anchor=f'job-{self.job.hashid}-input-{self.hashid}'
|
||||
)
|
||||
|
||||
@property
|
||||
def user_hashid(self):
|
||||
return self.job.user.hashid
|
||||
|
||||
@property
|
||||
def user_id(self):
|
||||
return self.job.user.id
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
**self.file_mixin_to_json_serializeable()
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['job'] = \
|
||||
self.job.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
71
app/models/job_result.py
Normal file
@ -0,0 +1,71 @@
|
||||
from flask import url_for
|
||||
from flask_hashids import HashidMixin
|
||||
from pathlib import Path
|
||||
from app import db
|
||||
from .file_mixin import FileMixin
|
||||
|
||||
|
||||
class JobResult(FileMixin, HashidMixin, db.Model):
|
||||
__tablename__ = 'job_results'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
|
||||
# Fields
|
||||
description = db.Column(db.String(255))
|
||||
# Relationships
|
||||
job = db.relationship(
|
||||
'Job',
|
||||
back_populates='results'
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f'<JobResult {self.filename}>'
|
||||
|
||||
@property
|
||||
def download_url(self):
|
||||
return url_for(
|
||||
'jobs.download_job_result',
|
||||
job_id=self.job_id,
|
||||
job_result_id=self.id
|
||||
)
|
||||
|
||||
@property
|
||||
def jsonpatch_path(self):
|
||||
return f'{self.job.jsonpatch_path}/results/{self.hashid}'
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return self.job.path / 'results' / f'{self.id}'
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return url_for(
|
||||
'jobs.job',
|
||||
job_id=self.job_id,
|
||||
_anchor=f'job-{self.job.hashid}-result-{self.hashid}'
|
||||
)
|
||||
|
||||
@property
|
||||
def user_hashid(self):
|
||||
return self.job.user.hashid
|
||||
|
||||
@property
|
||||
def user_id(self):
|
||||
return self.job.user.id
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'description': self.description,
|
||||
**self.file_mixin_to_json_serializeable(
|
||||
backrefs=backrefs,
|
||||
relationships=relationships
|
||||
)
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['job'] = \
|
||||
self.job.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
100
app/models/role.py
Normal file
@ -0,0 +1,100 @@
|
||||
from enum import IntEnum
|
||||
from flask_hashids import HashidMixin
|
||||
from typing import Union
|
||||
from app import db
|
||||
|
||||
|
||||
class Permission(IntEnum):
|
||||
'''
|
||||
Defines User permissions as integers by the power of 2. User permission
|
||||
can be evaluated using the bitwise operator &.
|
||||
'''
|
||||
ADMINISTRATE = 1
|
||||
CONTRIBUTE = 2
|
||||
USE_API = 4
|
||||
|
||||
@staticmethod
|
||||
def get(permission: Union['Permission', int, str]) -> 'Permission':
|
||||
if isinstance(permission, Permission):
|
||||
return permission
|
||||
if isinstance(permission, int):
|
||||
return Permission(permission)
|
||||
if isinstance(permission, str):
|
||||
return Permission[permission]
|
||||
raise TypeError('permission must be Permission, int, or str')
|
||||
|
||||
|
||||
class Role(HashidMixin, db.Model):
|
||||
__tablename__ = 'roles'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Fields
|
||||
name = db.Column(db.String(64), unique=True)
|
||||
default = db.Column(db.Boolean, default=False, index=True)
|
||||
permissions = db.Column(db.Integer, default=0)
|
||||
# Relationships
|
||||
users = db.relationship('User', back_populates='role', lazy='dynamic')
|
||||
|
||||
def __repr__(self):
|
||||
return f'<Role {self.name}>'
|
||||
|
||||
def has_permission(self, permission: Union[Permission, int, str]):
|
||||
p = Permission.get(permission)
|
||||
return self.permissions & p.value == p.value
|
||||
|
||||
def add_permission(self, permission: Union[Permission, int, str]):
|
||||
p = Permission.get(permission)
|
||||
if not self.has_permission(p):
|
||||
self.permissions += p.value
|
||||
|
||||
def remove_permission(self, permission: Union[Permission, int, str]):
|
||||
p = Permission.get(permission)
|
||||
if self.has_permission(p):
|
||||
self.permissions -= p.value
|
||||
|
||||
def reset_permissions(self):
|
||||
self.permissions = 0
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'default': self.default,
|
||||
'name': self.name,
|
||||
'permissions': [
|
||||
x.name for x in Permission
|
||||
if self.has_permission(x.value)
|
||||
]
|
||||
}
|
||||
if backrefs:
|
||||
pass
|
||||
if relationships:
|
||||
json_serializeable['users'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.users
|
||||
}
|
||||
return json_serializeable
|
||||
|
||||
@staticmethod
|
||||
def insert_defaults():
|
||||
roles = {
|
||||
'User': [],
|
||||
'API user': [Permission.USE_API],
|
||||
'Contributor': [Permission.CONTRIBUTE],
|
||||
'Administrator': [
|
||||
Permission.ADMINISTRATE,
|
||||
Permission.CONTRIBUTE,
|
||||
Permission.USE_API
|
||||
],
|
||||
'System user': []
|
||||
}
|
||||
default_role_name = 'User'
|
||||
for role_name, permissions in roles.items():
|
||||
role = Role.query.filter_by(name=role_name).first()
|
||||
if role is None:
|
||||
role = Role(name=role_name)
|
||||
role.reset_permissions()
|
||||
for permission in permissions:
|
||||
role.add_permission(permission)
|
||||
role.default = role.name == default_role_name
|
||||
db.session.add(role)
|
||||
db.session.commit()
|
136
app/models/spacy_nlp_pipeline_model.py
Normal file
@ -0,0 +1,136 @@
|
||||
from flask import current_app, url_for
|
||||
from flask_hashids import HashidMixin
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
import requests
|
||||
import yaml
|
||||
from app import db
|
||||
from app.ext.flask_sqlalchemy import ContainerColumn
|
||||
from .file_mixin import FileMixin
|
||||
from .user import User
|
||||
|
||||
|
||||
class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
|
||||
__tablename__ = 'spacy_nlp_pipeline_models'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
||||
# Fields
|
||||
title = db.Column(db.String(64))
|
||||
description = db.Column(db.String(255))
|
||||
version = db.Column(db.String(16))
|
||||
compatible_service_versions = db.Column(ContainerColumn(list, 255))
|
||||
publisher = db.Column(db.String(128))
|
||||
publisher_url = db.Column(db.String(512))
|
||||
publishing_url = db.Column(db.String(512))
|
||||
publishing_year = db.Column(db.Integer)
|
||||
pipeline_name = db.Column(db.String(64))
|
||||
is_public = db.Column(db.Boolean, default=False)
|
||||
# Relationships
|
||||
user = db.relationship('User', back_populates='spacy_nlp_pipeline_models')
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}'
|
||||
|
||||
@property
|
||||
def jsonpatch_path(self):
|
||||
return f'{self.user.jsonpatch_path}/spacy_nlp_pipeline_models/{self.hashid}'
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return url_for(
|
||||
'contributions.spacy_nlp_pipeline_model',
|
||||
spacy_nlp_pipeline_model_id=self.id
|
||||
)
|
||||
|
||||
@property
|
||||
def user_hashid(self):
|
||||
return self.user.hashid
|
||||
|
||||
@staticmethod
|
||||
def insert_defaults(force_download=False):
|
||||
nopaque_user = User.query.filter_by(username='nopaque').first()
|
||||
default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml'
|
||||
with default_records_file.open('r') as f:
|
||||
default_records = yaml.safe_load(f)
|
||||
for m in default_records:
|
||||
model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
|
||||
if model is not None:
|
||||
model.compatible_service_versions = m['compatible_service_versions']
|
||||
model.description = m['description']
|
||||
model.filename = m['url'].split('/')[-1]
|
||||
model.publisher = m['publisher']
|
||||
model.publisher_url = m['publisher_url']
|
||||
model.publishing_url = m['publishing_url']
|
||||
model.publishing_year = m['publishing_year']
|
||||
model.is_public = True
|
||||
model.title = m['title']
|
||||
model.version = m['version']
|
||||
model.pipeline_name = m['pipeline_name']
|
||||
else:
|
||||
model = SpaCyNLPPipelineModel(
|
||||
compatible_service_versions=m['compatible_service_versions'],
|
||||
description=m['description'],
|
||||
filename=m['url'].split('/')[-1],
|
||||
publisher=m['publisher'],
|
||||
publisher_url=m['publisher_url'],
|
||||
publishing_url=m['publishing_url'],
|
||||
publishing_year=m['publishing_year'],
|
||||
is_public=True,
|
||||
title=m['title'],
|
||||
user=nopaque_user,
|
||||
version=m['version'],
|
||||
pipeline_name=m['pipeline_name']
|
||||
)
|
||||
db.session.add(model)
|
||||
db.session.flush(objects=[model])
|
||||
db.session.refresh(model)
|
||||
if not model.path.exists() or force_download:
|
||||
r = requests.get(m['url'], stream=True)
|
||||
pbar = tqdm(
|
||||
desc=f'{model.title} ({model.filename})',
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
total=int(r.headers['Content-Length'])
|
||||
)
|
||||
pbar.clear()
|
||||
with open(model.path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk: # filter out keep-alive new chunks
|
||||
pbar.update(len(chunk))
|
||||
f.write(chunk)
|
||||
pbar.close()
|
||||
db.session.commit()
|
||||
|
||||
def delete(self):
|
||||
try:
|
||||
self.path.unlink(missing_ok=True)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
raise
|
||||
db.session.delete(self)
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'compatible_service_versions': self.compatible_service_versions,
|
||||
'description': self.description,
|
||||
'publisher': self.publisher,
|
||||
'publisher_url': self.publisher_url,
|
||||
'publishing_url': self.publishing_url,
|
||||
'publishing_year': self.publishing_year,
|
||||
'pipeline_name': self.pipeline_name,
|
||||
'is_public': self.is_public,
|
||||
'title': self.title,
|
||||
'version': self.version,
|
||||
**self.file_mixin_to_json_serializeable()
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
132
app/models/tesseract_ocr_pipeline_model.py
Normal file
@ -0,0 +1,132 @@
|
||||
from flask import current_app, url_for
|
||||
from flask_hashids import HashidMixin
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
import requests
|
||||
import yaml
|
||||
from app import db
|
||||
from app.ext.flask_sqlalchemy import ContainerColumn
|
||||
from .file_mixin import FileMixin
|
||||
from .user import User
|
||||
|
||||
|
||||
class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
|
||||
__tablename__ = 'tesseract_ocr_pipeline_models'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
||||
# Fields
|
||||
title = db.Column(db.String(64))
|
||||
description = db.Column(db.String(255))
|
||||
version = db.Column(db.String(16))
|
||||
compatible_service_versions = db.Column(ContainerColumn(list, 255))
|
||||
publisher = db.Column(db.String(128))
|
||||
publisher_url = db.Column(db.String(512))
|
||||
publishing_url = db.Column(db.String(512))
|
||||
publishing_year = db.Column(db.Integer)
|
||||
is_public = db.Column(db.Boolean, default=False)
|
||||
# Relationships
|
||||
user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models')
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}'
|
||||
|
||||
@property
|
||||
def jsonpatch_path(self):
|
||||
return f'{self.user.jsonpatch_path}/tesseract_ocr_pipeline_models/{self.hashid}'
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return url_for(
|
||||
'contributions.tesseract_ocr_pipeline_model',
|
||||
tesseract_ocr_pipeline_model_id=self.id
|
||||
)
|
||||
|
||||
@property
|
||||
def user_hashid(self):
|
||||
return self.user.hashid
|
||||
|
||||
@staticmethod
|
||||
def insert_defaults(force_download=False):
|
||||
nopaque_user = User.query.filter_by(username='nopaque').first()
|
||||
default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml'
|
||||
with default_records_file.open('r') as f:
|
||||
default_records = yaml.safe_load(f)
|
||||
for m in default_records:
|
||||
model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
|
||||
if model is not None:
|
||||
model.compatible_service_versions = m['compatible_service_versions']
|
||||
model.description = m['description']
|
||||
model.filename = f'{model.id}.traineddata'
|
||||
model.publisher = m['publisher']
|
||||
model.publisher_url = m['publisher_url']
|
||||
model.publishing_url = m['publishing_url']
|
||||
model.publishing_year = m['publishing_year']
|
||||
model.is_public = True
|
||||
model.title = m['title']
|
||||
model.version = m['version']
|
||||
else:
|
||||
model = TesseractOCRPipelineModel(
|
||||
compatible_service_versions=m['compatible_service_versions'],
|
||||
description=m['description'],
|
||||
publisher=m['publisher'],
|
||||
publisher_url=m['publisher_url'],
|
||||
publishing_url=m['publishing_url'],
|
||||
publishing_year=m['publishing_year'],
|
||||
is_public=True,
|
||||
title=m['title'],
|
||||
user=nopaque_user,
|
||||
version=m['version']
|
||||
)
|
||||
db.session.add(model)
|
||||
db.session.flush(objects=[model])
|
||||
db.session.refresh(model)
|
||||
model.filename = f'{model.id}.traineddata'
|
||||
if not model.path.exists() or force_download:
|
||||
r = requests.get(m['url'], stream=True)
|
||||
pbar = tqdm(
|
||||
desc=f'{model.title} ({model.filename})',
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
total=int(r.headers['Content-Length'])
|
||||
)
|
||||
pbar.clear()
|
||||
with open(model.path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk: # filter out keep-alive new chunks
|
||||
pbar.update(len(chunk))
|
||||
f.write(chunk)
|
||||
pbar.close()
|
||||
db.session.commit()
|
||||
|
||||
def delete(self):
|
||||
try:
|
||||
self.path.unlink(missing_ok=True)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
raise
|
||||
db.session.delete(self)
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'compatible_service_versions': self.compatible_service_versions,
|
||||
'description': self.description,
|
||||
'publisher': self.publisher,
|
||||
'publisher_url': self.publisher_url,
|
||||
'publishing_url': self.publishing_url,
|
||||
'publishing_year': self.publishing_year,
|
||||
'is_public': self.is_public,
|
||||
'title': self.title,
|
||||
'version': self.version,
|
||||
**self.file_mixin_to_json_serializeable()
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
48
app/models/token.py
Normal file
@ -0,0 +1,48 @@
|
||||
from datetime import datetime, timedelta
|
||||
from app import db
|
||||
|
||||
|
||||
class Token(db.Model):
|
||||
__tablename__ = 'tokens'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
||||
# Fields
|
||||
access_token = db.Column(db.String(64), index=True)
|
||||
access_expiration = db.Column(db.DateTime)
|
||||
refresh_token = db.Column(db.String(64), index=True)
|
||||
refresh_expiration = db.Column(db.DateTime)
|
||||
# Relationships
|
||||
user = db.relationship('User', back_populates='tokens')
|
||||
|
||||
def expire(self):
|
||||
self.access_expiration = datetime.utcnow()
|
||||
self.refresh_expiration = datetime.utcnow()
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'access_token': self.access_token,
|
||||
'access_expiration': (
|
||||
None if self.access_expiration is None
|
||||
else f'{self.access_expiration.isoformat()}Z'
|
||||
),
|
||||
'refresh_token': self.refresh_token,
|
||||
'refresh_expiration': (
|
||||
None if self.refresh_expiration is None
|
||||
else f'{self.refresh_expiration.isoformat()}Z'
|
||||
)
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
||||
|
||||
@staticmethod
|
||||
def clean():
|
||||
"""Remove any tokens that have been expired for more than a day."""
|
||||
yesterday = datetime.utcnow() - timedelta(days=1)
|
||||
Token.query.filter(Token.refresh_expiration < yesterday).delete()
|
452
app/models/user.py
Normal file
@ -0,0 +1,452 @@
|
||||
from datetime import datetime, timedelta
|
||||
from enum import IntEnum
|
||||
from flask import current_app, url_for
|
||||
from flask_hashids import HashidMixin
|
||||
from flask_login import UserMixin
|
||||
from sqlalchemy.ext.associationproxy import association_proxy
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
from werkzeug.security import generate_password_hash, check_password_hash
|
||||
import jwt
|
||||
import re
|
||||
import secrets
|
||||
import shutil
|
||||
from app import db, hashids
|
||||
from app.ext.flask_sqlalchemy import IntEnumColumn
|
||||
from .corpus import Corpus
|
||||
from .corpus_follower_association import CorpusFollowerAssociation
|
||||
from .corpus_follower_role import CorpusFollowerRole
|
||||
from .role import Permission, Role
|
||||
from .token import Token
|
||||
|
||||
|
||||
class ProfilePrivacySettings(IntEnum):
|
||||
SHOW_EMAIL = 1
|
||||
SHOW_LAST_SEEN = 2
|
||||
SHOW_MEMBER_SINCE = 4
|
||||
|
||||
@staticmethod
|
||||
def get(profile_privacy_setting: Union['ProfilePrivacySettings', int, str]) -> 'ProfilePrivacySettings':
|
||||
if isinstance(profile_privacy_setting, ProfilePrivacySettings):
|
||||
return profile_privacy_setting
|
||||
if isinstance(profile_privacy_setting, int):
|
||||
return ProfilePrivacySettings(profile_privacy_setting)
|
||||
if isinstance(profile_privacy_setting, str):
|
||||
return ProfilePrivacySettings[profile_privacy_setting]
|
||||
raise TypeError('profile_privacy_setting must be ProfilePrivacySettings, int, or str')
|
||||
|
||||
|
||||
class UserSettingJobStatusMailNotificationLevel(IntEnum):
|
||||
NONE = 1
|
||||
END = 2
|
||||
ALL = 3
|
||||
|
||||
|
||||
class User(HashidMixin, UserMixin, db.Model):
|
||||
__tablename__ = 'users'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
role_id = db.Column(db.Integer, db.ForeignKey('roles.id'))
|
||||
# Fields
|
||||
email = db.Column(db.String(254), index=True, unique=True)
|
||||
username = db.Column(db.String(64), index=True, unique=True)
|
||||
username_pattern = re.compile(r'^[A-Za-zÄÖÜäöüß0-9_.]*$')
|
||||
password_hash = db.Column(db.String(128))
|
||||
confirmed = db.Column(db.Boolean, default=False)
|
||||
terms_of_use_accepted = db.Column(db.Boolean, default=False)
|
||||
member_since = db.Column(db.DateTime(), default=datetime.utcnow)
|
||||
setting_job_status_mail_notification_level = db.Column(
|
||||
IntEnumColumn(UserSettingJobStatusMailNotificationLevel),
|
||||
default=UserSettingJobStatusMailNotificationLevel.END
|
||||
)
|
||||
last_seen = db.Column(db.DateTime())
|
||||
full_name = db.Column(db.String(64))
|
||||
about_me = db.Column(db.String(256))
|
||||
location = db.Column(db.String(64))
|
||||
website = db.Column(db.String(128))
|
||||
organization = db.Column(db.String(128))
|
||||
is_public = db.Column(db.Boolean, default=False)
|
||||
profile_privacy_settings = db.Column(db.Integer(), default=0)
|
||||
# Relationships
|
||||
avatar = db.relationship(
|
||||
'Avatar',
|
||||
back_populates='user',
|
||||
cascade='all, delete-orphan',
|
||||
uselist=False
|
||||
)
|
||||
corpora = db.relationship(
|
||||
'Corpus',
|
||||
back_populates='user',
|
||||
cascade='all, delete-orphan',
|
||||
lazy='dynamic'
|
||||
)
|
||||
corpus_follower_associations = db.relationship(
|
||||
'CorpusFollowerAssociation',
|
||||
back_populates='follower',
|
||||
cascade='all, delete-orphan'
|
||||
)
|
||||
followed_corpora = association_proxy(
|
||||
'corpus_follower_associations',
|
||||
'corpus',
|
||||
creator=lambda c: CorpusFollowerAssociation(corpus=c)
|
||||
)
|
||||
jobs = db.relationship(
|
||||
'Job',
|
||||
back_populates='user',
|
||||
cascade='all, delete-orphan',
|
||||
lazy='dynamic'
|
||||
)
|
||||
role = db.relationship(
|
||||
'Role',
|
||||
back_populates='users'
|
||||
)
|
||||
spacy_nlp_pipeline_models = db.relationship(
|
||||
'SpaCyNLPPipelineModel',
|
||||
back_populates='user',
|
||||
cascade='all, delete-orphan',
|
||||
lazy='dynamic'
|
||||
)
|
||||
tesseract_ocr_pipeline_models = db.relationship(
|
||||
'TesseractOCRPipelineModel',
|
||||
back_populates='user',
|
||||
cascade='all, delete-orphan',
|
||||
lazy='dynamic'
|
||||
)
|
||||
tokens = db.relationship(
|
||||
'Token',
|
||||
back_populates='user',
|
||||
cascade='all, delete-orphan',
|
||||
lazy='dynamic'
|
||||
)
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if 'role' not in kwargs:
|
||||
kwargs['role'] = (
|
||||
Role.query.filter_by(name='Administrator').first()
|
||||
if kwargs['email'] == current_app.config['NOPAQUE_ADMIN']
|
||||
else Role.query.filter_by(default=True).first()
|
||||
)
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
return f'<User {self.username}>'
|
||||
|
||||
@property
|
||||
def jsonpatch_path(self):
|
||||
return f'/users/{self.hashid}'
|
||||
|
||||
@property
|
||||
def password(self):
|
||||
raise AttributeError('password is not a readable attribute')
|
||||
|
||||
@password.setter
|
||||
def password(self, password):
|
||||
self.password_hash = generate_password_hash(password)
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return current_app.config.get('NOPAQUE_DATA_DIR') / 'users' / f'{self.id}'
|
||||
|
||||
@staticmethod
|
||||
def create(**kwargs):
|
||||
user = User(**kwargs)
|
||||
db.session.add(user)
|
||||
db.session.flush(objects=[user])
|
||||
db.session.refresh(user)
|
||||
user_spacy_nlp_pipeline_models_dir = user.path / 'spacy_nlp_pipeline_models'
|
||||
user_tesseract_ocr_pipeline_models_dir = user.path / 'tesseract_ocr_pipeline_models'
|
||||
user_corpora_dir = user.path / 'corpora'
|
||||
user_jobs_dir = user.path / 'jobs'
|
||||
try:
|
||||
user.path.mkdir()
|
||||
user_spacy_nlp_pipeline_models_dir.mkdir()
|
||||
user_tesseract_ocr_pipeline_models_dir.mkdir()
|
||||
user_corpora_dir.mkdir()
|
||||
user_jobs_dir.mkdir()
|
||||
except OSError as e:
|
||||
# TODO: Potential leftover cleanup
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise
|
||||
return user
|
||||
|
||||
@staticmethod
|
||||
def insert_defaults():
|
||||
nopaque_user = User.query.filter_by(username='nopaque').first()
|
||||
system_user_role = Role.query.filter_by(name='System user').first()
|
||||
if nopaque_user is None:
|
||||
nopaque_user = User.create(
|
||||
username='nopaque',
|
||||
role=system_user_role
|
||||
)
|
||||
db.session.add(nopaque_user)
|
||||
elif nopaque_user.role != system_user_role:
|
||||
nopaque_user.role = system_user_role
|
||||
db.session.commit()
|
||||
|
||||
@staticmethod
|
||||
def reset_password(token, new_password):
|
||||
try:
|
||||
payload = jwt.decode(
|
||||
token,
|
||||
current_app.config['SECRET_KEY'],
|
||||
algorithms=['HS256'],
|
||||
issuer=current_app.config['SERVER_NAME'],
|
||||
options={'require': ['exp', 'iat', 'iss', 'purpose', 'sub']}
|
||||
)
|
||||
except jwt.PyJWTError:
|
||||
return False
|
||||
if payload.get('purpose') != 'User.reset_password':
|
||||
return False
|
||||
user_hashid = payload.get('sub')
|
||||
user_id = hashids.decode(user_hashid)
|
||||
user = User.query.get(user_id)
|
||||
if user is None:
|
||||
return False
|
||||
user.password = new_password
|
||||
db.session.add(user)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def verify_access_token(access_token, refresh_token=None):
|
||||
token = Token.query.filter(Token.access_token == access_token).first()
|
||||
if token is not None:
|
||||
if token.access_expiration > datetime.utcnow():
|
||||
token.user.ping()
|
||||
db.session.commit()
|
||||
if token.user.role.name != 'System user':
|
||||
return token.user
|
||||
|
||||
@staticmethod
|
||||
def verify_refresh_token(refresh_token, access_token):
|
||||
token = Token.query.filter((Token.refresh_token == refresh_token) & (Token.access_token == access_token)).first()
|
||||
if token is not None:
|
||||
if token.refresh_expiration > datetime.utcnow():
|
||||
return token
|
||||
# someone tried to refresh with an expired token
|
||||
# revoke all tokens from this user as a precaution
|
||||
token.user.revoke_auth_tokens()
|
||||
db.session.commit()
|
||||
|
||||
def can(self, permission):
|
||||
return self.role is not None and self.role.has_permission(permission)
|
||||
|
||||
def confirm(self, confirmation_token):
|
||||
try:
|
||||
payload = jwt.decode(
|
||||
confirmation_token,
|
||||
current_app.config['SECRET_KEY'],
|
||||
algorithms=['HS256'],
|
||||
issuer=current_app.config['SERVER_NAME'],
|
||||
options={'require': ['exp', 'iat', 'iss', 'purpose', 'sub']}
|
||||
)
|
||||
except jwt.PyJWTError:
|
||||
return False
|
||||
if payload.get('purpose') != 'user.confirm':
|
||||
return False
|
||||
if payload.get('sub') != self.hashid:
|
||||
return False
|
||||
self.confirmed = True
|
||||
db.session.add(self)
|
||||
return True
|
||||
|
||||
def delete(self):
|
||||
shutil.rmtree(self.path, ignore_errors=True)
|
||||
db.session.delete(self)
|
||||
|
||||
def generate_auth_token(self):
|
||||
return Token(
|
||||
access_token=secrets.token_urlsafe(),
|
||||
access_expiration=datetime.utcnow() + timedelta(minutes=15),
|
||||
refresh_token=secrets.token_urlsafe(),
|
||||
refresh_expiration=datetime.utcnow() + timedelta(days=7),
|
||||
user=self
|
||||
)
|
||||
|
||||
def generate_confirm_token(self, expiration=3600):
|
||||
now = datetime.utcnow()
|
||||
payload = {
|
||||
'exp': now + timedelta(seconds=expiration),
|
||||
'iat': now,
|
||||
'iss': current_app.config['SERVER_NAME'],
|
||||
'purpose': 'user.confirm',
|
||||
'sub': self.hashid
|
||||
}
|
||||
return jwt.encode(
|
||||
payload,
|
||||
current_app.config['SECRET_KEY'],
|
||||
algorithm='HS256'
|
||||
)
|
||||
|
||||
def generate_reset_password_token(self, expiration=3600):
|
||||
now = datetime.utcnow()
|
||||
payload = {
|
||||
'exp': now + timedelta(seconds=expiration),
|
||||
'iat': now,
|
||||
'iss': current_app.config['SERVER_NAME'],
|
||||
'purpose': 'User.reset_password',
|
||||
'sub': self.hashid
|
||||
}
|
||||
return jwt.encode(
|
||||
payload,
|
||||
current_app.config['SECRET_KEY'],
|
||||
algorithm='HS256'
|
||||
)
|
||||
|
||||
def is_administrator(self):
|
||||
return self.can(Permission.ADMINISTRATE)
|
||||
|
||||
def ping(self):
|
||||
self.last_seen = datetime.utcnow()
|
||||
|
||||
def revoke_auth_tokens(self):
|
||||
for token in self.tokens:
|
||||
db.session.delete(token)
|
||||
|
||||
def verify_password(self, password):
|
||||
if self.role.name == 'System user':
|
||||
return False
|
||||
return check_password_hash(self.password_hash, password)
|
||||
|
||||
#region Profile Privacy settings
|
||||
def has_profile_privacy_setting(self, setting):
|
||||
s = ProfilePrivacySettings.get(setting)
|
||||
return self.profile_privacy_settings & s.value == s.value
|
||||
|
||||
def add_profile_privacy_setting(self, setting):
|
||||
s = ProfilePrivacySettings.get(setting)
|
||||
if not self.has_profile_privacy_setting(s):
|
||||
self.profile_privacy_settings += s.value
|
||||
|
||||
def remove_profile_privacy_setting(self, setting):
|
||||
s = ProfilePrivacySettings.get(setting)
|
||||
if self.has_profile_privacy_setting(s):
|
||||
self.profile_privacy_settings -= s.value
|
||||
|
||||
def reset_profile_privacy_settings(self):
|
||||
self.profile_privacy_settings = 0
|
||||
#endregion Profile Privacy settings
|
||||
|
||||
def follow_corpus(self, corpus, role=None):
|
||||
if role is None:
|
||||
cfr = CorpusFollowerRole.query.filter_by(default=True).first()
|
||||
else:
|
||||
cfr = role
|
||||
if self.is_following_corpus(corpus):
|
||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus=corpus, follower=self).first()
|
||||
if cfa.role != cfr:
|
||||
cfa.role = cfr
|
||||
else:
|
||||
cfa = CorpusFollowerAssociation(corpus=corpus, role=cfr, follower=self)
|
||||
db.session.add(cfa)
|
||||
|
||||
def unfollow_corpus(self, corpus):
|
||||
if not self.is_following_corpus(corpus):
|
||||
return
|
||||
self.followed_corpora.remove(corpus)
|
||||
|
||||
def is_following_corpus(self, corpus):
|
||||
return corpus in self.followed_corpora
|
||||
|
||||
def generate_follow_corpus_token(self, corpus_hashid, role_name, expiration=7):
|
||||
now = datetime.utcnow()
|
||||
payload = {
|
||||
'exp': expiration,
|
||||
'iat': now,
|
||||
'iss': current_app.config['SERVER_NAME'],
|
||||
'purpose': 'User.follow_corpus',
|
||||
'role_name': role_name,
|
||||
'sub': corpus_hashid
|
||||
}
|
||||
return jwt.encode(
|
||||
payload,
|
||||
current_app.config['SECRET_KEY'],
|
||||
algorithm='HS256'
|
||||
)
|
||||
|
||||
def follow_corpus_by_token(self, token):
|
||||
try:
|
||||
payload = jwt.decode(
|
||||
token,
|
||||
current_app.config['SECRET_KEY'],
|
||||
algorithms=['HS256'],
|
||||
issuer=current_app.config['SERVER_NAME'],
|
||||
options={'require': ['exp', 'iat', 'iss', 'purpose', 'role_name', 'sub']}
|
||||
)
|
||||
except jwt.PyJWTError:
|
||||
return False
|
||||
if payload.get('purpose') != 'User.follow_corpus':
|
||||
return False
|
||||
corpus_hashid = payload.get('sub')
|
||||
corpus_id = hashids.decode(corpus_hashid)
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if corpus is None:
|
||||
return False
|
||||
role_name = payload.get('role_name')
|
||||
role = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||
if role is None:
|
||||
return False
|
||||
self.follow_corpus(corpus, role)
|
||||
# db.session.add(self)
|
||||
return True
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False, filter_by_privacy_settings=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'confirmed': self.confirmed,
|
||||
'avatar': url_for('users.user_avatar', user_id=self.id),
|
||||
'email': self.email,
|
||||
'last_seen': (
|
||||
None if self.last_seen is None
|
||||
else f'{self.last_seen.isoformat()}Z'
|
||||
),
|
||||
'member_since': f'{self.member_since.isoformat()}Z',
|
||||
'username': self.username,
|
||||
'full_name': self.full_name,
|
||||
'about_me': self.about_me,
|
||||
'website': self.website,
|
||||
'location': self.location,
|
||||
'organization': self.organization,
|
||||
'job_status_mail_notification_level': \
|
||||
self.setting_job_status_mail_notification_level.name,
|
||||
'profile_privacy_settings': {
|
||||
'is_public': self.is_public,
|
||||
'show_email': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL),
|
||||
'show_last_seen': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_LAST_SEEN),
|
||||
'show_member_since': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_MEMBER_SINCE)
|
||||
}
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['role'] = \
|
||||
self.role.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
json_serializeable['corpus_follower_associations'] = {
|
||||
x.hashid: x.to_json_serializeable()
|
||||
for x in self.corpus_follower_associations
|
||||
}
|
||||
json_serializeable['corpora'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.corpora
|
||||
}
|
||||
json_serializeable['jobs'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.jobs
|
||||
}
|
||||
json_serializeable['tesseract_ocr_pipeline_models'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.tesseract_ocr_pipeline_models
|
||||
}
|
||||
json_serializeable['spacy_nlp_pipeline_models'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.spacy_nlp_pipeline_models
|
||||
}
|
||||
|
||||
if filter_by_privacy_settings:
|
||||
if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL):
|
||||
json_serializeable.pop('email')
|
||||
if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_LAST_SEEN):
|
||||
json_serializeable.pop('last_seen')
|
||||
if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_MEMBER_SINCE):
|
||||
json_serializeable.pop('member_since')
|
||||
return json_serializeable
|
@ -1,12 +1,11 @@
|
||||
from flask import Blueprint
|
||||
from flask_login import login_required
|
||||
import os
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
|
||||
|
||||
services_file = \
|
||||
os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml')
|
||||
with open(services_file, 'r') as f:
|
||||
services_file = Path(__file__).parent / 'services.yml'
|
||||
with services_file.open('r') as f:
|
||||
SERVICES = yaml.safe_load(f)
|
||||
|
||||
bp = Blueprint('services', __name__)
|
||||
|
@ -6,6 +6,7 @@ from app import db, hashids
|
||||
from app.models import (
|
||||
Job,
|
||||
JobInput,
|
||||
JobResult,
|
||||
JobStatus,
|
||||
TesseractOCRPipelineModel,
|
||||
SpaCyNLPPipelineModel
|
||||
@ -74,6 +75,8 @@ def tesseract_ocr_pipeline():
|
||||
version = request.args.get('version', service_manifest['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
job_results = JobResult.query.all()
|
||||
choosable_job_ids = [job_result.job.hashid for job_result in job_results if job_result.job.service == "file-setup-pipeline" and job_result.filename.endswith('.pdf')]
|
||||
form = CreateTesseractOCRPipelineJobForm(prefix='create-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
@ -111,6 +114,7 @@ def tesseract_ocr_pipeline():
|
||||
return render_template(
|
||||
'services/tesseract_ocr_pipeline.html.j2',
|
||||
title=service_manifest['name'],
|
||||
choosable_job_ids=choosable_job_ids,
|
||||
form=form,
|
||||
tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
|
||||
user_tesseract_ocr_pipeline_models_count=user_tesseract_ocr_pipeline_models_count
|
||||
|
@ -10,7 +10,7 @@ file-setup-pipeline:
|
||||
tesseract-ocr-pipeline:
|
||||
name: 'Tesseract OCR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.1'
|
||||
latest_version: '0.1.2'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
@ -23,6 +23,12 @@ tesseract-ocr-pipeline:
|
||||
- 'ocropus_nlbin_threshold'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
|
||||
0.1.2:
|
||||
methods:
|
||||
- 'binarization'
|
||||
- 'ocropus_nlbin_threshold'
|
||||
publishing_year: 2023
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
|
||||
transkribus-htr-pipeline:
|
||||
name: 'Transkribus HTR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
@ -41,7 +47,7 @@ transkribus-htr-pipeline:
|
||||
spacy-nlp-pipeline:
|
||||
name: 'SpaCy NLP Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.2'
|
||||
latest_version: '0.1.1'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
@ -56,5 +62,5 @@ spacy-nlp-pipeline:
|
||||
0.1.2:
|
||||
methods:
|
||||
- 'encoding_detection'
|
||||
publishing_year: 2022
|
||||
publishing_year: 2024
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'
|
||||
|
@ -1,132 +1,108 @@
|
||||
.modal-conent {
|
||||
#corpus-analysis-concordance-query-builder-input-field {
|
||||
border-bottom: #9E9E9E 1px solid;
|
||||
min-height: 38px;
|
||||
margin-top: 23px;
|
||||
}
|
||||
|
||||
#corpus-analysis-concordance-query-builder-input-field-placeholder {
|
||||
color: #9E9E9E;
|
||||
}
|
||||
|
||||
.modal-content {
|
||||
overflow-x: hidden;
|
||||
}
|
||||
|
||||
#concordance-query-builder {
|
||||
#corpus-analysis-concordance-positional-attr-modal, #corpus-analysis-concordance-corpus-analysis-concordance-structural-attr-modal {
|
||||
width: 70%;
|
||||
}
|
||||
|
||||
#concordance-query-builder nav {
|
||||
background-color: #6B3F89;
|
||||
margin-top: -25px;
|
||||
margin-left: -25px;
|
||||
width: 105%;
|
||||
}
|
||||
|
||||
#query-builder-nav{
|
||||
padding-left: 15px;
|
||||
}
|
||||
|
||||
#close-query-builder {
|
||||
margin-right: 50px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#general-options-query-builder-tutorial-info-icon {
|
||||
#corpus-analysis-concordance-general-options-query-builder-tutorial-info-icon {
|
||||
color: black;
|
||||
}
|
||||
|
||||
#your-query {
|
||||
border-bottom-style: solid;
|
||||
border-bottom-width: 1px;
|
||||
}
|
||||
|
||||
#insert-query-button {
|
||||
#corpus-analysis-concordance-insert-query-button {
|
||||
background-color: #00426f;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
#structural-attr h6 {
|
||||
margin-left: 15px;
|
||||
}
|
||||
|
||||
#add-structural-attribute-tutorial-info-icon {
|
||||
color: black;
|
||||
}
|
||||
|
||||
#sentence {
|
||||
background-color:#FD9720;
|
||||
}
|
||||
|
||||
#entity {
|
||||
background-color: #A6E22D;
|
||||
}
|
||||
|
||||
#text-annotation {
|
||||
background-color: #2FBBAB;
|
||||
}
|
||||
|
||||
#no-value-metadata-message {
|
||||
padding-top: 25px;
|
||||
margin-left: -20px;
|
||||
}
|
||||
|
||||
#token-kind-selector {
|
||||
.attr-modal-header {
|
||||
background-color: #f2eff7;
|
||||
padding: 15px;
|
||||
border-top-style: solid;
|
||||
border-color: #6B3F89;
|
||||
padding-left: 25px;
|
||||
border-top: 10px solid #6B3F89;
|
||||
margin-left: -24px;
|
||||
margin-top: -24px;
|
||||
margin-right: -24px;
|
||||
}
|
||||
|
||||
#token-kind-selector.s5 {
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
#token-kind-selector h6 {
|
||||
.attr-modal-header h6 {
|
||||
margin-left: 15px;
|
||||
}
|
||||
|
||||
#token-tutorial-info-icon {
|
||||
#corpus-analysis-concordance-add-structural-attribute-tutorial-info-icon {
|
||||
color: black;
|
||||
}
|
||||
|
||||
#no-value-message {
|
||||
[data-structural-attr-modal-action-button="sentence"]{
|
||||
background-color:#FD9720 !important;
|
||||
}
|
||||
|
||||
[data-structural-attr-modal-action-button="entity"]{
|
||||
background-color: #A6E22D !important;
|
||||
}
|
||||
|
||||
[data-structural-attr-modal-action-button="meta-data"]{
|
||||
background-color: #2FBBAB !important;
|
||||
}
|
||||
|
||||
#corpus-analysis-concordance-no-value-metadata-message {
|
||||
padding-top: 25px;
|
||||
margin-left: -20px;
|
||||
}
|
||||
|
||||
#token-edit-options h6 {
|
||||
margin-left: 15px;
|
||||
.attr-modal-header.input-field {
|
||||
margin-left: 41px;
|
||||
}
|
||||
|
||||
#edit-options-tutorial-info-icon {
|
||||
#corpus-analysis-concordance-token-attr {
|
||||
margin-left: 41px;
|
||||
}
|
||||
|
||||
#corpus-analysis-concordance-token-tutorial-info-icon {
|
||||
color: black;
|
||||
}
|
||||
|
||||
#incidence-modifiers-button a{
|
||||
background-color: #2FBBAB;
|
||||
#corpus-analysis-concordance-no-value-message {
|
||||
padding-top: 25px;
|
||||
margin-left: -20px;
|
||||
}
|
||||
|
||||
#incidence-modifiers a{
|
||||
background-color: white;
|
||||
#corpus-analysis-concordance-token-edit-options h6 {
|
||||
margin-left: 15px;
|
||||
}
|
||||
|
||||
#ignore-case {
|
||||
margin-left: 5px;
|
||||
#corpus-analysis-concordance-edit-options-tutorial-info-icon {
|
||||
color: black;
|
||||
}
|
||||
|
||||
#or, #and {
|
||||
background-color: #fc0;
|
||||
[data-toggle-area="input-field-options"] a {
|
||||
margin-right: 10px;
|
||||
}
|
||||
|
||||
#betweenNM {
|
||||
width: 60%;
|
||||
[data-target="corpus-analysis-concordance-character-incidence-modifiers-dropdown"], [data-target="corpus-analysis-concordance-token-incidence-modifiers-dropdown"] {
|
||||
background-color: #2FBBAB !important;
|
||||
}
|
||||
|
||||
#query-builder-tutorial-modal {
|
||||
width: 60%;
|
||||
#corpus-analysis-concordance-exactly-n-token-modal, #corpus-analysis-concordance-between-nm-token-modal {
|
||||
width: 30%;
|
||||
}
|
||||
|
||||
#query-builder-tutorial-modal ul {
|
||||
margin-top: 10px;
|
||||
[data-modal-id="corpus-analysis-concordance-exactly-n-token-modal"], [data-modal-id="corpus-analysis-concordance-between-nm-token-modal"] {
|
||||
margin-top: 15px !important;
|
||||
}
|
||||
|
||||
#query-builder-tutorial {
|
||||
padding:15px;
|
||||
}
|
||||
|
||||
#scroll-up-button-query-builder-tutorial {
|
||||
background-color: #28B3D1;
|
||||
[data-options-action="and"], [data-options-action="or"] {
|
||||
background-color: #fc0 !important;
|
||||
}
|
||||
|
||||
[data-type="start-sentence"], [data-type="end-sentence"] {
|
||||
@ -134,13 +110,18 @@
|
||||
}
|
||||
|
||||
[data-type="start-empty-entity"], [data-type="start-entity"], [data-type="end-entity"] {
|
||||
background-color: #A6E22D;
|
||||
background-color: #a6e22d;
|
||||
}
|
||||
|
||||
[data-type="start-text-annotation"]{
|
||||
[data-type="text-annotation"]{
|
||||
background-color: #2FBBAB;
|
||||
}
|
||||
|
||||
[data-type="token"] {
|
||||
background-color: #28B3D1;
|
||||
}
|
||||
|
||||
[data-type="token-incidence-modifier"] {
|
||||
background-color: #4db6ac;
|
||||
color: white;
|
||||
}
|
||||
|
Before Width: | Height: | Size: 222 KiB After Width: | Height: | Size: 123 KiB |
Before Width: | Height: | Size: 378 KiB After Width: | Height: | Size: 402 KiB |
BIN
app/static/images/manual/query_builder/editing_chips.gif
Normal file
After Width: | Height: | Size: 720 KiB |
Before Width: | Height: | Size: 854 KiB After Width: | Height: | Size: 589 KiB |
BIN
app/static/images/manual/query_builder/expert_mode.gif
Normal file
After Width: | Height: | Size: 436 KiB |
BIN
app/static/images/manual/query_builder/incidence_modifier.gif
Normal file
After Width: | Height: | Size: 189 KiB |
Before Width: | Height: | Size: 511 KiB After Width: | Height: | Size: 381 KiB |
Before Width: | Height: | Size: 1009 KiB After Width: | Height: | Size: 759 KiB |
Before Width: | Height: | Size: 903 KiB After Width: | Height: | Size: 750 KiB |
Before Width: | Height: | Size: 413 KiB After Width: | Height: | Size: 524 KiB |
BIN
app/static/images/nopaque_slogan_transparent.png
Normal file
After Width: | Height: | Size: 23 KiB |
Before Width: | Height: | Size: 34 KiB After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 160 KiB |
BIN
app/static/images/workshops/fgho_sommerschule_2023/corpus.png
Normal file
After Width: | Height: | Size: 182 KiB |
After Width: | Height: | Size: 155 KiB |
BIN
app/static/images/workshops/fgho_sommerschule_2023/dashboard.png
Normal file
After Width: | Height: | Size: 104 KiB |
After Width: | Height: | Size: 115 KiB |
@ -1,104 +0,0 @@
|
||||
class App {
|
||||
constructor() {
|
||||
this.data = {
|
||||
promises: {getUser: {}, subscribeUser: {}},
|
||||
users: {},
|
||||
};
|
||||
this.socket = io({transports: ['websocket'], upgrade: false});
|
||||
this.socket.on('PATCH', (patch) => {this.onPatch(patch);});
|
||||
}
|
||||
|
||||
getUser(userId, backrefs=true, relationships=true) {
|
||||
if (userId in this.data.promises.getUser) {
|
||||
return this.data.promises.getUser[userId];
|
||||
}
|
||||
|
||||
this.data.promises.getUser[userId] = new Promise((resolve, reject) => {
|
||||
this.socket.emit('GET /users/<user_id>', userId, backrefs, relationships, (response) => {
|
||||
if (response.status !== 200) {
|
||||
reject(response);
|
||||
return;
|
||||
}
|
||||
this.data.users[userId] = response.body;
|
||||
resolve(this.data.users[userId]);
|
||||
});
|
||||
});
|
||||
|
||||
return this.data.promises.getUser[userId];
|
||||
}
|
||||
|
||||
subscribeUser(userId) {
|
||||
if (userId in this.data.promises.subscribeUser) {
|
||||
return this.data.promises.subscribeUser[userId];
|
||||
}
|
||||
|
||||
this.data.promises.subscribeUser[userId] = new Promise((resolve, reject) => {
|
||||
this.socket.emit('SUBSCRIBE /users/<user_id>', userId, (response) => {
|
||||
if (response.status !== 200) {
|
||||
reject(response);
|
||||
return;
|
||||
}
|
||||
resolve(response);
|
||||
});
|
||||
});
|
||||
|
||||
return this.data.promises.subscribeUser[userId];
|
||||
}
|
||||
|
||||
flash(message, category) {
|
||||
let iconPrefix = '';
|
||||
switch (category) {
|
||||
case 'corpus': {
|
||||
iconPrefix = '<i class="left material-icons">book</i>';
|
||||
break;
|
||||
}
|
||||
case 'error': {
|
||||
iconPrefix = '<i class="error-color-text left material-icons">error</i>';
|
||||
break;
|
||||
}
|
||||
case 'job': {
|
||||
iconPrefix = '<i class="left nopaque-icons">J</i>';
|
||||
break;
|
||||
}
|
||||
case 'settings': {
|
||||
iconPrefix = '<i class="left material-icons">settings</i>';
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
iconPrefix = '<i class="left material-icons">notifications</i>';
|
||||
break;
|
||||
}
|
||||
}
|
||||
let toast = M.toast(
|
||||
{
|
||||
html: `
|
||||
<span>${iconPrefix}${message}</span>
|
||||
<button class="action-button btn-flat toast-action white-text" data-action="close">
|
||||
<i class="material-icons">close</i>
|
||||
</button>
|
||||
`.trim()
|
||||
}
|
||||
);
|
||||
let toastCloseActionElement = toast.el.querySelector('.action-button[data-action="close"]');
|
||||
toastCloseActionElement.addEventListener('click', () => {toast.dismiss();});
|
||||
}
|
||||
|
||||
onPatch(patch) {
|
||||
// Filter Patch to only include operations on users that are initialized
|
||||
let regExp = new RegExp(`^/users/(${Object.keys(this.data.users).join('|')})`);
|
||||
let filteredPatch = patch.filter(operation => regExp.test(operation.path));
|
||||
|
||||
// Handle job status updates
|
||||
let subRegExp = new RegExp(`^/users/([A-Za-z0-9]*)/jobs/([A-Za-z0-9]*)/status$`);
|
||||
let subFilteredPatch = filteredPatch
|
||||
.filter((operation) => {return operation.op === 'replace';})
|
||||
.filter((operation) => {return subRegExp.test(operation.path);});
|
||||
for (let operation of subFilteredPatch) {
|
||||
let [match, userId, jobId] = operation.path.match(subRegExp);
|
||||
this.flash(`[<a href="/jobs/${jobId}">${this.data.users[userId].jobs[jobId].title}</a>] New status: <span class="job-status-text" data-status="${operation.value}"></span>`, 'job');
|
||||
}
|
||||
|
||||
// Apply Patch
|
||||
jsonpatch.applyPatch(this.data, filteredPatch);
|
||||
}
|
||||
}
|
@ -1,245 +0,0 @@
|
||||
class CorpusAnalysisApp {
|
||||
constructor(corpusId) {
|
||||
this.data = {};
|
||||
|
||||
// HTML elements
|
||||
this.elements = {
|
||||
container: document.querySelector('#corpus-analysis-app-container'),
|
||||
extensionTabs: document.querySelector('#corpus-analysis-app-extension-tabs'),
|
||||
initModal: document.querySelector('#corpus-analysis-app-init-modal'),
|
||||
overview: document.querySelector('#corpus-analysis-app-overview')
|
||||
};
|
||||
// Materialize elements
|
||||
this.elements.m = {
|
||||
extensionTabs: M.Tabs.init(this.elements.extensionTabs),
|
||||
initModal: M.Modal.init(this.elements.initModal, {dismissible: false})
|
||||
};
|
||||
|
||||
this.extensions = {};
|
||||
|
||||
this.settings = {
|
||||
corpusId: corpusId
|
||||
};
|
||||
}
|
||||
|
||||
init() {
|
||||
this.disableActionElements();
|
||||
this.elements.m.initModal.open();
|
||||
// Init data
|
||||
this.data.cQiClient = new CQiClient(this.settings.corpusId);
|
||||
this.data.cQiClient.connect()
|
||||
.then(cQiStatus => {
|
||||
return this.data.cQiClient.corpora.get(`NOPAQUE_${this.settings.corpusId}`);
|
||||
})
|
||||
.then(
|
||||
cQiCorpus => {
|
||||
this.data.corpus = {o: cQiCorpus};
|
||||
this.data.corpus.o.getVisualizationData()
|
||||
.then(
|
||||
(data) => {
|
||||
console.log(data);
|
||||
this.renderGeneralCorpusInfo(data);
|
||||
this.renderTextInfoList(data);
|
||||
this.renderTextProportionsGraphic(data);
|
||||
this.renderFrequenciesGraphic(data);
|
||||
this.renderBoundsGraphic(data);
|
||||
}
|
||||
);
|
||||
// this.data.corpus.o.getCorpusData()
|
||||
// .then(corpusData => {
|
||||
// console.log(corpusData);
|
||||
// this.renderGeneralCorpusInfo(corpusData);
|
||||
// this.renderTextInfoList(corpusData);
|
||||
// this.renderTextProportionsGraphic(corpusData);
|
||||
// this.renderFrequenciesGraphic(corpusData);
|
||||
// this.renderBoundsGraphic(corpusData);
|
||||
// });
|
||||
// TODO: Don't do this hgere
|
||||
cQiCorpus.updateDb();
|
||||
this.enableActionElements();
|
||||
for (let extension of Object.values(this.extensions)) {extension.init();}
|
||||
this.elements.m.initModal.close();
|
||||
},
|
||||
cQiError => {
|
||||
let errorsElement = this.elements.initModal.querySelector('.errors');
|
||||
let progressElement = this.elements.initModal.querySelector('.progress');
|
||||
errorsElement.innerText = JSON.stringify(cQiError);
|
||||
errorsElement.classList.remove('hide');
|
||||
progressElement.classList.add('hide');
|
||||
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
|
||||
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
// Add event listeners
|
||||
for (let extensionSelectorElement of this.elements.overview.querySelectorAll('.extension-selector')) {
|
||||
extensionSelectorElement.addEventListener('click', () => {
|
||||
this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
registerExtension(extension) {
|
||||
if (extension.name in this.extensions) {
|
||||
console.error(`Can't register extension ${extension.name}: Already registered`);
|
||||
return;
|
||||
}
|
||||
this.extensions[extension.name] = extension;
|
||||
if ('cQiClient' in this.data && this.data.cQiClient.connected) {extension.init();}
|
||||
}
|
||||
|
||||
disableActionElements() {
|
||||
let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
|
||||
for (let actionElement of actionElements) {
|
||||
if (actionElement.nodeName === 'INPUT') {
|
||||
actionElement.disabled = true;
|
||||
} else if (actionElement.nodeName === 'SELECT') {
|
||||
actionElement.parentNode.querySelector('input.select-dropdown').disabled = true;
|
||||
} else {
|
||||
actionElement.classList.add('disabled');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enableActionElements() {
|
||||
let actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
|
||||
for (let actionElement of actionElements) {
|
||||
if (actionElement.nodeName === 'INPUT') {
|
||||
actionElement.disabled = false;
|
||||
} else if (actionElement.nodeName === 'SELECT') {
|
||||
actionElement.parentNode.querySelector('input.select-dropdown').disabled = false;
|
||||
} else {
|
||||
actionElement.classList.remove('disabled');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
renderGeneralCorpusInfo(corpusData) {
|
||||
document.querySelector('.corpus-num-tokens').innerHTML = corpusData.corpus.counts.token;
|
||||
document.querySelector('.corpus-num-s').innerHTML = corpusData.corpus.counts.s;
|
||||
document.querySelector('.corpus-num-unique-words').innerHTML = Object.entries(corpusData.corpus.freqs.word).length;
|
||||
document.querySelector('.corpus-num-unique-lemmas').innerHTML = Object.entries(corpusData.corpus.freqs.lemma).length;
|
||||
document.querySelector('.corpus-num-unique-pos').innerHTML = Object.entries(corpusData.corpus.freqs.pos).length;
|
||||
document.querySelector('.corpus-num-unique-simple-pos').innerHTML = Object.entries(corpusData.corpus.freqs.simple_pos).length;
|
||||
}
|
||||
|
||||
renderTextInfoList(corpusData) {
|
||||
let corpusTextInfoListElement = document.querySelector('.corpus-text-info-list');
|
||||
let corpusTextInfoList = new CorpusTextInfoList(corpusTextInfoListElement);
|
||||
let texts = corpusData.s_attrs.text.lexicon;
|
||||
let textData = [];
|
||||
for (let i = 0; i < Object.entries(texts).length; i++) {
|
||||
let resource = {
|
||||
title: corpusData.values.s_attrs.text[i].title,
|
||||
publishing_year: corpusData.values.s_attrs.text[i].publishing_year,
|
||||
num_tokens: corpusData.s_attrs.text.lexicon[i].counts.token,
|
||||
num_sentences: corpusData.s_attrs.text.lexicon[i].counts.s,
|
||||
num_unique_words: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.word).length,
|
||||
num_unique_lemmas: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.lemma).length,
|
||||
num_unique_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.pos).length,
|
||||
num_unique_simple_pos: Object.entries(corpusData.s_attrs.text.lexicon[i].freqs.simple_pos).length
|
||||
};
|
||||
|
||||
textData.push(resource);
|
||||
}
|
||||
|
||||
corpusTextInfoList.add(textData);
|
||||
|
||||
let textCountChipElement = document.querySelector('.text-count-chip');
|
||||
textCountChipElement.innerHTML = `Text count: ${corpusData.corpus.counts.text}`;
|
||||
}
|
||||
|
||||
renderTextProportionsGraphic(corpusData) {
|
||||
let textProportionsGraphicElement = document.querySelector('#text-proportions-graphic');
|
||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
||||
let graphData = [
|
||||
{
|
||||
values: texts.map(text => text[1].counts.token),
|
||||
labels: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
type: 'pie'
|
||||
}
|
||||
];
|
||||
let config = {responsive: true};
|
||||
|
||||
Plotly.newPlot(textProportionsGraphicElement, graphData, config);
|
||||
}
|
||||
|
||||
renderFrequenciesGraphic(corpusData) {
|
||||
let frequenciesTokenCategoryDropdownElement = document.querySelector('[data-target="frequencies-token-category-dropdown"]');
|
||||
let frequenciesTokenCategoryDropdownListElement = document.querySelector("#frequencies-token-category-dropdown");
|
||||
let frequenciesGraphicElement = document.querySelector('#frequencies-graphic');
|
||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
||||
|
||||
|
||||
frequenciesTokenCategoryDropdownListElement.addEventListener('click', (event) => {
|
||||
frequenciesTokenCategoryDropdownElement.firstChild.textContent = event.target.innerHTML;
|
||||
this.renderFrequenciesGraphic(corpusData);
|
||||
});
|
||||
|
||||
let tokenCategory = frequenciesTokenCategoryDropdownElement.firstChild.textContent.toLowerCase();
|
||||
|
||||
let graphData = this.createFrequenciesGraphData(tokenCategory, texts, corpusData);
|
||||
let graphLayout = {
|
||||
barmode: 'stack',
|
||||
type: 'bar'
|
||||
};
|
||||
let config = {responsive: true};
|
||||
|
||||
Plotly.newPlot(frequenciesGraphicElement, graphData, graphLayout, config);
|
||||
}
|
||||
|
||||
createFrequenciesGraphData(category, texts, corpusData) {
|
||||
let graphData = [];
|
||||
let sortedData = Object.entries(corpusData.corpus.freqs[category]).sort((a, b) => b[1] - a[1]).slice(0, 5);
|
||||
|
||||
for (let item of sortedData) {
|
||||
let data = {
|
||||
x: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
y: texts.map(text => text[1].freqs[category][item[0]]),
|
||||
name: corpusData.values.p_attrs[category][item[0]],
|
||||
type: 'bar'
|
||||
};
|
||||
graphData.push(data);
|
||||
}
|
||||
|
||||
return graphData;
|
||||
}
|
||||
|
||||
renderBoundsGraphic(corpusData) {
|
||||
let boundsGraphicElement = document.querySelector('#bounds-graphic');
|
||||
|
||||
let graphData = [];
|
||||
let texts = Object.entries(corpusData.s_attrs.text.lexicon);
|
||||
|
||||
graphData = [{
|
||||
type: 'bar',
|
||||
x: texts.map(text => text[1].bounds[1] - text[1].bounds[0]),
|
||||
y: texts.map(text => corpusData.values.s_attrs.text[text[0]].title),
|
||||
base: texts.map(text => text[1].bounds[0]),
|
||||
text: texts.map(text => `${corpusData.values.s_attrs.text[text[0]].title} (${corpusData.values.s_attrs.text[text[0]].publishing_year})`),
|
||||
orientation: 'h',
|
||||
hovertemplate: '%{base} - %{x} <br>%{y}',
|
||||
showlegend: false
|
||||
}];
|
||||
|
||||
let graphLayout = {
|
||||
barmode: 'stack',
|
||||
type: 'bar',
|
||||
showgrid: false,
|
||||
xaxis: {
|
||||
rangemode: 'nonnegative',
|
||||
autorange: true
|
||||
},
|
||||
yaxis: {
|
||||
autorange: true,
|
||||
showticklabels: false
|
||||
}
|
||||
};
|
||||
|
||||
let config = {responsive: true};
|
||||
|
||||
Plotly.newPlot(boundsGraphicElement, graphData, graphLayout, config);
|
||||
}
|
||||
}
|
@ -1,972 +0,0 @@
|
||||
class ConcordanceQueryBuilder {
|
||||
|
||||
constructor() {
|
||||
|
||||
|
||||
this.elements = {
|
||||
|
||||
counter: 0,
|
||||
yourQueryContent: [],
|
||||
queryContent:[],
|
||||
concordanceQueryBuilder: document.querySelector('#concordance-query-builder'),
|
||||
concordanceQueryBuilderButton: document.querySelector('#concordance-query-builder-button'),
|
||||
closeQueryBuilder: document.querySelector('#close-query-builder'),
|
||||
queryBuilderTutorialModal: document.querySelector('#query-builder-tutorial-modal'),
|
||||
valueValidator: true,
|
||||
|
||||
|
||||
//#region QueryBuilder Elements
|
||||
|
||||
positionalAttrButton: document.querySelector('#positional-attr-button'),
|
||||
positionalAttrArea: document.querySelector('#positional-attr'),
|
||||
positionalAttr: document.querySelector('#token-attr'),
|
||||
structuralAttrButton: document.querySelector('#structural-attr-button'),
|
||||
structuralAttrArea: document.querySelector('#structural-attr'),
|
||||
queryContainer: document.querySelector('#query-container'),
|
||||
buttonPreparer: document.querySelector('#button-preparer'),
|
||||
yourQuery: document.querySelector('#your-query'),
|
||||
insertQueryButton: document.querySelector('#insert-query-button'),
|
||||
queryPreview: document.querySelector('#query-preview'),
|
||||
tokenQuery: document.querySelector('#token-query'),
|
||||
tokenBuilderContent: document.querySelector('#token-builder-content'),
|
||||
tokenSubmitButton: document.querySelector('#token-submit'),
|
||||
extFormQuery: document.querySelector('#concordance-extension-form-query'),
|
||||
dropButton: '',
|
||||
|
||||
queryBuilderTutorialInfoIcon: document.querySelector('#query-builder-tutorial-info-icon'),
|
||||
tokenTutorialInfoIcon: document.querySelector('#token-tutorial-info-icon'),
|
||||
editTokenTutorialInfoIcon: document.querySelector('#edit-options-tutorial-info-icon'),
|
||||
structuralAttributeTutorialInfoIcon: document.querySelector('#add-structural-attribute-tutorial-info-icon'),
|
||||
generalOptionsQueryBuilderTutorialInfoIcon: document.querySelector('#general-options-query-builder-tutorial-info-icon'),
|
||||
|
||||
|
||||
//#endregion QueryBuilder Elements
|
||||
|
||||
//#region Strucutral Attributes
|
||||
|
||||
sentence:document.querySelector('#sentence'),
|
||||
entity: document.querySelector('#entity'),
|
||||
textAnnotation: document.querySelector('#text-annotation'),
|
||||
|
||||
entityBuilder: document.querySelector('#entity-builder'),
|
||||
englishEntType: document.querySelector('#english-ent-type'),
|
||||
germanEntType: document.querySelector('#german-ent-type'),
|
||||
emptyEntity: document.querySelector('#empty-entity'),
|
||||
entityAnyType: false,
|
||||
|
||||
textAnnotationBuilder: document.querySelector('#text-annotation-builder'),
|
||||
textAnnotationOptions: document.querySelector('#text-annotation-options'),
|
||||
textAnnotationInput: document.querySelector('#text-annotation-input'),
|
||||
textAnnotationSubmit: document.querySelector('#text-annotation-submit'),
|
||||
noValueMetadataMessage: document.querySelector('#no-value-metadata-message'),
|
||||
//#endregion Structural Attributes
|
||||
|
||||
//#region Token Attributes
|
||||
tokenQueryFilled: false,
|
||||
|
||||
lemma: document.querySelector('#lemma'),
|
||||
emptyToken: document.querySelector('#empty-token'),
|
||||
word: document.querySelector('#word'),
|
||||
lemma: document.querySelector('#lemma'),
|
||||
pos: document.querySelector('#pos'),
|
||||
simplePosButton: document.querySelector('#simple-pos-button'),
|
||||
incidenceModifiers: document.querySelector('[data-target="incidence-modifiers"]'),
|
||||
or: document.querySelector('#or'),
|
||||
and: document.querySelector('#and'),
|
||||
|
||||
//#region Word and Lemma Elements
|
||||
wordBuilder: document.querySelector('#word-builder'),
|
||||
lemmaBuilder: document.querySelector('#lemma-builder'),
|
||||
inputOptions: document.querySelector('#input-options'),
|
||||
incidenceModifiersButton: document.querySelector('#incidence-modifiers-button'),
|
||||
conditionContainer: document.querySelector('#condition-container'),
|
||||
wordInput: document.querySelector('#word-input'),
|
||||
lemmaInput: document.querySelector('#lemma-input'),
|
||||
ignoreCaseCheckbox : document.querySelector('#ignore-case-checkbox'),
|
||||
ignoreCase: document.querySelector('input[type="checkbox"]'),
|
||||
wildcardChar: document.querySelector('#wildcard-char'),
|
||||
optionGroup: document.querySelector('#option-group'),
|
||||
//#endregion Word and Lemma Elements
|
||||
|
||||
//#region posBuilder Elements
|
||||
englishPosBuilder: document.querySelector('#english-pos-builder'),
|
||||
englishPos: document.querySelector('#english-pos'),
|
||||
germanPosBuilder: document.querySelector('#german-pos-builder'),
|
||||
germanPos: document.querySelector('#german-pos'),
|
||||
//#endregion posBuilder Elements
|
||||
|
||||
//#region simple_posBuilder Elements
|
||||
simplePosBuilder: document.querySelector('#simplepos-builder'),
|
||||
simplePos: document.querySelector('#simple-pos'),
|
||||
//#endregion simple_posBuilder Elements
|
||||
|
||||
//#region incidence modifiers
|
||||
oneOrMore: document.querySelector('#one-or-more'),
|
||||
zeroOrMore: document.querySelector('#zero-or-more'),
|
||||
zeroOrOne: document.querySelector('#zero-or-one'),
|
||||
exactlyN: document.querySelector('#exactlyN'),
|
||||
betweenNM: document.querySelector('#betweenNM'),
|
||||
nInput: document.querySelector('#n-input'),
|
||||
nSubmit: document.querySelector('#n-submit'),
|
||||
nmInput: document.querySelector('#n-m-input'),
|
||||
mInput: document.querySelector('#m-input'),
|
||||
nmSubmit: document.querySelector('#n-m-submit'),
|
||||
//#endregion incidence modifiers
|
||||
|
||||
cancelBool: false,
|
||||
noValueMessage: document.querySelector('#no-value-message'),
|
||||
//#endregion Token Attributes
|
||||
}
|
||||
|
||||
this.elements.closeQueryBuilder.addEventListener('click', () => {this.closeQueryBuilderModal(this.elements.concordanceQueryBuilder);});
|
||||
this.elements.concordanceQueryBuilderButton.addEventListener('click', () => {this.clearAll();});
|
||||
this.elements.insertQueryButton.addEventListener('click', () => {this.insertQuery();});
|
||||
this.elements.positionalAttrButton.addEventListener('click', () => {this.showPositionalAttrArea();});
|
||||
this.elements.structuralAttrButton.addEventListener('click', () => {this.showStructuralAttrArea();});
|
||||
|
||||
//#region Structural Attribute Event Listeners
|
||||
this.elements.sentence.addEventListener('click', () => {this.addSentence();});
|
||||
this.elements.entity.addEventListener('click', () => {this.addEntity();});
|
||||
this.elements.textAnnotation.addEventListener('click', () => {this.addTextAnnotation();});
|
||||
|
||||
this.elements.englishEntType.addEventListener('change', () => {this.englishEntTypeHandler();});
|
||||
this.elements.germanEntType.addEventListener('change', () => {this.germanEntTypeHandler();});
|
||||
this.elements.emptyEntity.addEventListener('click', () => {this.emptyEntityButton();});
|
||||
|
||||
this.elements.textAnnotationSubmit.addEventListener('click', () => {this.textAnnotationSubmitHandler();});
|
||||
|
||||
//#endregion
|
||||
|
||||
//#region Token Attribute Event Listeners
|
||||
this.elements.queryBuilderTutorialInfoIcon.addEventListener('click', () => {this.tutorialIconHandler('#query-builder-tutorial-start');});
|
||||
this.elements.tokenTutorialInfoIcon.addEventListener('click', () => {this.tutorialIconHandler('#add-new-token-tutorial');});
|
||||
this.elements.editTokenTutorialInfoIcon.addEventListener('click', () => {this.tutorialIconHandler('#edit-options-tutorial');});
|
||||
this.elements.structuralAttributeTutorialInfoIcon.addEventListener('click', () => {this.tutorialIconHandler('#add-structural-attribute-tutorial');});
|
||||
this.elements.generalOptionsQueryBuilderTutorialInfoIcon.addEventListener('click', () => {this.tutorialIconHandler('#general-options-query-builder');});
|
||||
|
||||
this.elements.positionalAttr.addEventListener('change', () => {this.tokenTypeSelector();});
|
||||
this.elements.tokenSubmitButton.addEventListener('click', () => {this.addTokenToQuery();});
|
||||
|
||||
this.elements.wordInput.addEventListener('input', () => {this.inputFieldHandler();});
|
||||
this.elements.lemmaInput.addEventListener('input', () => {this.inputFieldHandler();});
|
||||
this.elements.ignoreCase.addEventListener('change', () => {this.inputOptionHandler(this.elements.ignoreCase);});
|
||||
this.elements.wildcardChar.addEventListener('click', () => {this.inputOptionHandler(this.elements.wildcardChar);});
|
||||
this.elements.optionGroup.addEventListener('click', () => {this.inputOptionHandler(this.elements.optionGroup);});
|
||||
|
||||
this.elements.oneOrMore.addEventListener('click', () => {this.incidenceModifiersHandler(this.elements.oneOrMore);});
|
||||
this.elements.zeroOrMore.addEventListener('click', () => {this.incidenceModifiersHandler(this.elements.zeroOrMore);});
|
||||
this.elements.zeroOrOne.addEventListener('click', () => {this.incidenceModifiersHandler(this.elements.zeroOrOne);});
|
||||
this.elements.nSubmit.addEventListener('click', () => {this.nSubmitHandler();});
|
||||
this.elements.nmSubmit.addEventListener('click', () => {this.nmSubmitHandler();});
|
||||
|
||||
this.elements.or.addEventListener('click', () => {this.orHandler();});
|
||||
this.elements.and.addEventListener('click', () => {this.andHandler();});
|
||||
|
||||
|
||||
//#endregion Token Attribute Event Listeners
|
||||
|
||||
}
|
||||
|
||||
|
||||
// ##########################################################################
|
||||
// #################### General Functions ###################################
|
||||
// ##########################################################################
|
||||
|
||||
//#region General Functions
|
||||
|
||||
closeQueryBuilderModal(closeInstance) {
|
||||
let instance = M.Modal.getInstance(closeInstance);
|
||||
instance.close();
|
||||
|
||||
}
|
||||
|
||||
showPositionalAttrArea() {
|
||||
this.elements.positionalAttrArea.classList.remove('hide');
|
||||
this.wordBuilder();
|
||||
|
||||
this.elements.tokenQueryFilled = false;
|
||||
|
||||
window.location.href = '#token-builder-content';
|
||||
}
|
||||
|
||||
showStructuralAttrArea() {
|
||||
this.elements.positionalAttrArea.classList.add('hide');
|
||||
this.elements.structuralAttrArea.classList.remove('hide');
|
||||
}
|
||||
|
||||
queryChipFactory(dataType, prettyQueryText, queryText) {
|
||||
window.location.href = '#query-container';
|
||||
queryText = Utils.escape(queryText);
|
||||
prettyQueryText = Utils.escape(prettyQueryText);
|
||||
let queryChipElement = Utils.HTMLToElement(
|
||||
`
|
||||
<span class="chip query-component" data-type="${dataType}" data-query="${queryText}" draggable="true">
|
||||
${prettyQueryText}
|
||||
<i class="material-icons close">close</i>
|
||||
</span>
|
||||
`
|
||||
);
|
||||
queryChipElement.addEventListener('click', () => {this.deleteAttr(queryChipElement);});
|
||||
queryChipElement.addEventListener('dragstart', (event) => {
|
||||
// selects all nodes without target class
|
||||
let queryChips = this.elements.yourQuery.querySelectorAll('.query-component');
|
||||
|
||||
// Adds a target chip in front of all draggable childnodes
|
||||
setTimeout(() => {
|
||||
let targetChipElement = Utils.HTMLToElement('<span class="chip drop-target">Drop here</span>');
|
||||
for (let element of queryChips) {
|
||||
if (element === queryChipElement.nextSibling) {continue;}
|
||||
let targetChipClone = targetChipElement.cloneNode(true);
|
||||
if (element === queryChipElement) {
|
||||
// If the dragged element is not at the very end, a target chip is also inserted at the end
|
||||
if (queryChips[queryChips.length - 1] !== element) {
|
||||
queryChips[queryChips.length - 1].insertAdjacentElement('afterend', targetChipClone);
|
||||
}
|
||||
} else {
|
||||
element.insertAdjacentElement('beforebegin', targetChipClone);
|
||||
}
|
||||
targetChipClone.addEventListener('dragover', (event) => {
|
||||
event.preventDefault();
|
||||
});
|
||||
targetChipClone.addEventListener('dragenter', (event) => {
|
||||
event.preventDefault();
|
||||
event.target.style.borderStyle = 'solid dotted';
|
||||
});
|
||||
targetChipClone.addEventListener('dragleave', (event) => {
|
||||
event.preventDefault();
|
||||
event.target.style.borderStyle = 'hidden';
|
||||
});
|
||||
targetChipClone.addEventListener('drop', (event) => {
|
||||
let dropzone = event.target;
|
||||
dropzone.parentElement.replaceChild(queryChipElement, dropzone);
|
||||
this.queryPreviewBuilder();
|
||||
});
|
||||
}
|
||||
}, 0);
|
||||
});
|
||||
|
||||
queryChipElement.addEventListener('dragend', (event) => {
|
||||
let targets = document.querySelectorAll('.drop-target');
|
||||
for (let target of targets) {
|
||||
target.remove();
|
||||
}
|
||||
});
|
||||
|
||||
// Ensures that metadata is always at the end of the query:
|
||||
const lastChild = this.elements.yourQuery.lastChild;
|
||||
const isLastChildTextAnnotation = lastChild && lastChild.dataset.type === 'text-annotation';
|
||||
|
||||
if (!isLastChildTextAnnotation) {
|
||||
this.elements.yourQuery.appendChild(queryChipElement);
|
||||
} else {
|
||||
this.elements.yourQuery.insertBefore(queryChipElement, lastChild);
|
||||
}
|
||||
|
||||
this.elements.queryContainer.classList.remove('hide');
|
||||
this.queryPreviewBuilder();
|
||||
|
||||
// Shows a hint about possible functions for editing the query at the first added element in the query
|
||||
if (this.elements.yourQuery.childNodes.length === 1) {
|
||||
app.flash('You can edit your query by deleting individual elements or moving them via drag and drop.');
|
||||
}
|
||||
}
|
||||
|
||||
queryPreviewBuilder() {
|
||||
this.elements.yourQueryContent = [];
|
||||
for (let element of this.elements.yourQuery.childNodes) {
|
||||
let queryElement = decodeURI(element.dataset.query);
|
||||
queryElement = Utils.escape(queryElement);
|
||||
if (queryElement !== 'undefined') {
|
||||
this.elements.yourQueryContent.push(queryElement);
|
||||
}
|
||||
}
|
||||
|
||||
let queryString = this.elements.yourQueryContent.join(' ');
|
||||
queryString += ';';
|
||||
this.elements.queryPreview.innerHTML = queryString;
|
||||
}
|
||||
|
||||
|
||||
deleteAttr(attr) {
|
||||
this.elements.yourQuery.removeChild(attr);
|
||||
if (attr.dataset.type === "start-sentence") {
|
||||
this.elements.sentence.innerHTML = 'Sentence';
|
||||
} else if (attr.dataset.type === "start-entity" || attr.dataset.type === "start-empty-entity") {
|
||||
this.elements.entity.innerHTML = 'Entity';
|
||||
}
|
||||
this.elements.counter -= 1;
|
||||
if (this.elements.counter === 0) {
|
||||
this.elements.queryContainer.classList.add('hide');
|
||||
}
|
||||
this.queryPreviewBuilder();
|
||||
}
|
||||
|
||||
insertQuery() {
|
||||
this.elements.yourQueryContent = [];
|
||||
this.validateValue();
|
||||
if (this.elements.valueValidator) {
|
||||
for (let element of this.elements.yourQuery.childNodes) {
|
||||
let queryElement = decodeURI(element.dataset.query);
|
||||
if (queryElement !== 'undefined') {
|
||||
this.elements.yourQueryContent.push(queryElement);
|
||||
}
|
||||
}
|
||||
|
||||
let queryString = this.elements.yourQueryContent.join(' ');
|
||||
queryString += ';';
|
||||
|
||||
this.elements.concordanceQueryBuilder.classList.add('modal-close');
|
||||
this.elements.extFormQuery.value = queryString;
|
||||
}
|
||||
}
|
||||
|
||||
validateValue() {
|
||||
this.elements.valueValidator = true;
|
||||
let sentenceCounter = 0;
|
||||
let sentenceEndCounter = 0;
|
||||
let entityCounter = 0;
|
||||
let entityEndCounter = 0;
|
||||
for (let element of this.elements.yourQuery.childNodes) {
|
||||
if (element.dataset.type === 'start-sentence') {
|
||||
sentenceCounter += 1;
|
||||
}else if (element.dataset.type === 'end-sentence') {
|
||||
sentenceEndCounter += 1;
|
||||
}else if (element.dataset.type === 'start-entity' || element.dataset.type === 'start-empty-entity') {
|
||||
entityCounter += 1;
|
||||
}else if (element.dataset.type === 'end-entity') {
|
||||
entityEndCounter += 1;
|
||||
}
|
||||
}
|
||||
// Checks if the same number of opening and closing tags (entity and sentence) are present. Depending on what is missing, the corresponding error message is ejected
|
||||
if (sentenceCounter > sentenceEndCounter) {
|
||||
app.flash('Please add the closing sentence tag', 'error');
|
||||
this.elements.valueValidator = false;
|
||||
} else if (sentenceCounter < sentenceEndCounter) {
|
||||
app.flash('Please remove the closing sentence tag', 'error');
|
||||
this.elements.valueValidator = false;
|
||||
}
|
||||
if (entityCounter > entityEndCounter) {
|
||||
app.flash('Please add the closing entity tag', 'error');
|
||||
this.elements.valueValidator = false;
|
||||
} else if (entityCounter < entityEndCounter) {
|
||||
app.flash('Please remove the closing entity tag', 'error');
|
||||
this.elements.valueValidator = false;
|
||||
}
|
||||
}
|
||||
|
||||
clearAll() {
|
||||
// Everything is reset.
|
||||
let instance = M.Tooltip.getInstance(this.elements.queryBuilderTutorialInfoIcon);
|
||||
|
||||
this.hideEverything();
|
||||
this.elements.counter = 0;
|
||||
this.elements.concordanceQueryBuilder.classList.remove('modal-close');
|
||||
this.elements.positionalAttrArea.classList.add('hide');
|
||||
this.elements.structuralAttrArea.classList.add('hide');
|
||||
this.elements.yourQuery.innerHTML = '';
|
||||
this.elements.queryContainer.classList.add('hide');
|
||||
this.elements.entity.innerHTML = 'Entity';
|
||||
this.elements.sentence.innerHTML = 'Sentence';
|
||||
|
||||
// If the Modal is open after 5 seconds for 5 seconds (with 'instance'), a message is displayed indicating that further information can be obtained via the question mark icon
|
||||
instance.tooltipEl.style.background = '#98ACD2';
|
||||
instance.tooltipEl.style.borderTop = 'solid 4px #0064A3';
|
||||
instance.tooltipEl.style.padding = '10px';
|
||||
instance.tooltipEl.style.color = 'black';
|
||||
|
||||
setTimeout(() => {
|
||||
let modalInstance = M.Modal.getInstance(this.elements.concordanceQueryBuilder);
|
||||
if (modalInstance.isOpen) {
|
||||
instance.open();
|
||||
setTimeout(() => {
|
||||
instance.close();
|
||||
}, 5000);
|
||||
}
|
||||
}, 5000);
|
||||
|
||||
}
|
||||
|
||||
tutorialIconHandler(id) {
|
||||
setTimeout(() => {
|
||||
window.location.href= id;
|
||||
}, 0);
|
||||
|
||||
}
|
||||
|
||||
//#endregion General Functions
|
||||
|
||||
|
||||
// ##########################################################################
|
||||
// ############## Token Attribute Builder Functions #########################
|
||||
// ##########################################################################
|
||||
|
||||
//#region Token Attribute Builder Functions
|
||||
|
||||
//#region General functions of the Token Builder
|
||||
tokenTypeSelector() {
|
||||
this.hideEverything();
|
||||
switch (this.elements.positionalAttr.value) {
|
||||
case 'word':
|
||||
this.wordBuilder();
|
||||
break;
|
||||
case 'lemma':
|
||||
this.lemmaBuilder();
|
||||
break;
|
||||
case 'english-pos':
|
||||
this.englishPosHandler();
|
||||
break;
|
||||
case 'german-pos':
|
||||
this.germanPosHandler();
|
||||
break;
|
||||
case 'simple-pos-button':
|
||||
this.simplePosBuilder();
|
||||
break;
|
||||
case 'empty-token':
|
||||
this.emptyTokenHandler();
|
||||
break;
|
||||
default:
|
||||
this.wordBuilder();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
hideEverything() {
|
||||
|
||||
this.elements.wordBuilder.classList.add('hide');
|
||||
this.elements.lemmaBuilder.classList.add('hide');
|
||||
this.elements.ignoreCaseCheckbox.classList.add('hide');
|
||||
this.elements.inputOptions.classList.add('hide');
|
||||
this.elements.incidenceModifiersButton.classList.add('hide');
|
||||
this.elements.conditionContainer.classList.add('hide');
|
||||
this.elements.englishPosBuilder.classList.add('hide');
|
||||
this.elements.germanPosBuilder.classList.add('hide');
|
||||
this.elements.simplePosBuilder.classList.add('hide');
|
||||
this.elements.entityBuilder.classList.add('hide');
|
||||
this.elements.textAnnotationBuilder.classList.add('hide');
|
||||
|
||||
}
|
||||
|
||||
tokenChipFactory(prettyQueryText, tokenText) {
|
||||
tokenText = encodeURI(tokenText);
|
||||
let builderElement;
|
||||
let queryChipElement;
|
||||
builderElement = document.createElement('div');
|
||||
builderElement.innerHTML = `
|
||||
<div class='chip col s2 l2' style='margin-top:20px;' data-tokentext='${tokenText}'>
|
||||
${prettyQueryText}
|
||||
<i class='material-icons close'>close</i>
|
||||
</div>`;
|
||||
queryChipElement = builderElement.firstElementChild;
|
||||
queryChipElement.addEventListener('click', () => {this.deleteTokenAttr(queryChipElement);});
|
||||
this.elements.tokenQuery.appendChild(queryChipElement);
|
||||
}
|
||||
|
||||
deleteTokenAttr(attr) {
|
||||
if (this.elements.tokenQuery.childNodes.length < 2) {
|
||||
this.elements.tokenQuery.removeChild(attr);
|
||||
this.wordBuilder();
|
||||
} else {
|
||||
this.elements.tokenQuery.removeChild(attr);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
addTokenToQuery() {
|
||||
let c;
|
||||
let tokenQueryContent = ''; //for ButtonFactory(prettyQueryText)
|
||||
let tokenQueryText = ''; //for ButtonFactory(queryText)
|
||||
this.elements.cancelBool = false;
|
||||
let tokenIsEmpty = false;
|
||||
|
||||
if (this.elements.ignoreCase.checked) {
|
||||
c = ' %c';
|
||||
} else {
|
||||
c = '';
|
||||
}
|
||||
|
||||
for (let element of this.elements.tokenQuery.childNodes) {
|
||||
tokenQueryContent += ' ' + element.firstChild.data + ' ';
|
||||
tokenQueryText += decodeURI(element.dataset.tokentext);
|
||||
if (element.innerText.indexOf('empty token') !== -1) {
|
||||
tokenIsEmpty = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.elements.tokenQueryFilled === false) {
|
||||
switch (this.elements.positionalAttr.value) {
|
||||
case 'word':
|
||||
if (this.elements.wordInput.value === '') {
|
||||
this.disableTokenSubmit();
|
||||
} else {
|
||||
tokenQueryContent += `word=${this.elements.wordInput.value}${c}`;
|
||||
tokenQueryText += `word="${this.elements.wordInput.value}"${c}`;
|
||||
this.elements.wordInput.value = '';
|
||||
}
|
||||
break;
|
||||
case 'lemma':
|
||||
if (this.elements.lemmaInput.value === '') {
|
||||
this.disableTokenSubmit();
|
||||
} else {
|
||||
tokenQueryContent += `lemma=${this.elements.lemmaInput.value}${c}`;
|
||||
tokenQueryText += `lemma="${this.elements.lemmaInput.value}"${c}`;
|
||||
this.elements.lemmaInput.value = '';
|
||||
}
|
||||
break;
|
||||
case 'english-pos':
|
||||
if (this.elements.englishPos.value === 'default') {
|
||||
this.disableTokenSubmit();
|
||||
} else {
|
||||
tokenQueryContent += `pos=${this.elements.englishPos.value}`;
|
||||
tokenQueryText += `pos="${this.elements.englishPos.value}"`;
|
||||
this.elements.englishPos.value = '';
|
||||
}
|
||||
break;
|
||||
case 'german-pos':
|
||||
if (this.elements.germanPos.value === 'default') {
|
||||
this.disableTokenSubmit();
|
||||
} else {
|
||||
tokenQueryContent += `pos=${this.elements.germanPos.value}`;
|
||||
tokenQueryText += `pos="${this.elements.germanPos.value}"`;
|
||||
this.elements.germanPos.value = '';
|
||||
}
|
||||
break;
|
||||
case 'simple-pos-button':
|
||||
if (this.elements.simplePos.value === 'default') {
|
||||
this.disableTokenSubmit();
|
||||
} else {
|
||||
tokenQueryContent += `simple_pos=${this.elements.simplePos.value}`;
|
||||
tokenQueryText += `simple_pos="${this.elements.simplePos.value}"`;
|
||||
this.elements.simplePos.value = '';
|
||||
}
|
||||
break;
|
||||
default:
|
||||
this.wordBuilder();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// cancelBool looks in disableTokenSubmit() whether a value is passed. If the input fields/dropdowns are empty (cancelBool === true), no token is added.
|
||||
if (this.elements.cancelBool === false) {
|
||||
// Square brackets are added only if it is not an empty token (where they are already present).
|
||||
if (tokenIsEmpty === false) {
|
||||
tokenQueryText = '[' + tokenQueryText + ']';
|
||||
}
|
||||
this.queryChipFactory('token', tokenQueryContent, tokenQueryText);
|
||||
this.hideEverything();
|
||||
this.elements.positionalAttrArea.classList.add('hide');
|
||||
this.elements.tokenQuery.innerHTML = '';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
disableTokenSubmit() {
|
||||
this.elements.cancelBool = true;
|
||||
this.elements.tokenSubmitButton.classList.add('red');
|
||||
this.elements.noValueMessage.classList.remove('hide');
|
||||
setTimeout(() => {
|
||||
this.elements.tokenSubmitButton.classList.remove('red');
|
||||
}, 500);
|
||||
setTimeout(() => {
|
||||
this.elements.noValueMessage.classList.add('hide');
|
||||
}, 3000);
|
||||
}
|
||||
|
||||
inputFieldHandler() {
|
||||
let input;
|
||||
|
||||
if (this.elements.wordBuilder.classList.contains('hide') === false) {
|
||||
input = this.elements.wordInput;
|
||||
} else {
|
||||
input = this.elements.lemmaInput;
|
||||
}
|
||||
|
||||
if (input.value === '') {
|
||||
this.elements.incidenceModifiersButton.firstElementChild.classList.add('disabled');
|
||||
this.elements.or.classList.add('disabled');
|
||||
this.elements.and.classList.add('disabled');
|
||||
} else {
|
||||
this.elements.incidenceModifiersButton.firstElementChild.classList.remove('disabled');
|
||||
this.elements.or.classList.remove('disabled');
|
||||
this.elements.and.classList.remove('disabled');
|
||||
}
|
||||
}
|
||||
|
||||
//#endregion General functions of the Token Builder
|
||||
|
||||
//#region Dropdown Select Handler
|
||||
wordBuilder() {
|
||||
this.hideEverything();
|
||||
this.elements.wordInput.value = '';
|
||||
this.elements.wordBuilder.classList.remove('hide');
|
||||
this.elements.inputOptions.classList.remove('hide');
|
||||
this.elements.incidenceModifiersButton.classList.remove('hide');
|
||||
this.elements.conditionContainer.classList.remove('hide');
|
||||
this.elements.ignoreCaseCheckbox.classList.remove('hide');
|
||||
|
||||
this.elements.incidenceModifiersButton.firstElementChild.classList.add('disabled');
|
||||
this.elements.or.classList.add('disabled');
|
||||
this.elements.and.classList.add('disabled');
|
||||
|
||||
// Resets materialize select field to default value
|
||||
let SelectInstance = M.FormSelect.getInstance(this.elements.positionalAttr);
|
||||
SelectInstance.input.value = 'word';
|
||||
this.elements.positionalAttr.value = 'word';
|
||||
|
||||
}
|
||||
|
||||
lemmaBuilder() {
|
||||
this.hideEverything();
|
||||
this.elements.lemmaInput.value = '';
|
||||
this.elements.lemmaBuilder.classList.remove('hide');
|
||||
this.elements.inputOptions.classList.remove('hide');
|
||||
this.elements.incidenceModifiersButton.classList.remove('hide');
|
||||
this.elements.incidenceModifiersButton.firstElementChild.classList.add('disabled');
|
||||
this.elements.conditionContainer.classList.remove('hide');
|
||||
this.elements.ignoreCaseCheckbox.classList.remove('hide');
|
||||
|
||||
this.elements.incidenceModifiersButton.firstElementChild.classList.add('disabled');
|
||||
this.elements.or.classList.add('disabled');
|
||||
this.elements.and.classList.add('disabled');
|
||||
}
|
||||
|
||||
englishPosHandler() {
|
||||
this.hideEverything();
|
||||
this.elements.englishPosBuilder.classList.remove('hide');
|
||||
// this.elements.incidenceModifiersButton.classList.remove('hide');
|
||||
this.elements.conditionContainer.classList.remove('hide');
|
||||
|
||||
// Resets materialize select dropdown
|
||||
let selectInstance = M.FormSelect.getInstance(this.elements.englishPos);
|
||||
selectInstance.input.value = 'English pos tagset';
|
||||
this.elements.englishPos.value = 'default';
|
||||
}
|
||||
|
||||
germanPosHandler() {
|
||||
this.hideEverything();
|
||||
this.elements.germanPosBuilder.classList.remove('hide');
|
||||
// this.elements.incidenceModifiersButton.classList.remove('hide');
|
||||
this.elements.conditionContainer.classList.remove('hide');
|
||||
|
||||
// Resets materialize select dropdown
|
||||
let selectInstance = M.FormSelect.getInstance(this.elements.germanPos);
|
||||
selectInstance.input.value = 'German pos tagset';
|
||||
this.elements.germanPos.value = 'default';
|
||||
}
|
||||
|
||||
simplePosBuilder() {
|
||||
this.hideEverything();
|
||||
this.elements.simplePosBuilder.classList.remove('hide');
|
||||
// this.elements.incidenceModifiersButton.classList.remove('hide');
|
||||
this.elements.conditionContainer.classList.remove('hide');
|
||||
this.elements.simplePos.selectedIndex = 0;
|
||||
|
||||
// Resets materialize select dropdown
|
||||
let selectInstance = M.FormSelect.getInstance(this.elements.simplePos);
|
||||
selectInstance.input.value = 'simple_pos tagset';
|
||||
this.elements.simplePos.value = 'default';
|
||||
}
|
||||
|
||||
emptyTokenHandler() {
|
||||
this.tokenChipFactory('empty token', '[]');
|
||||
this.elements.tokenQueryFilled = true;
|
||||
this.hideEverything();
|
||||
this.elements.incidenceModifiersButton.classList.remove('hide');
|
||||
}
|
||||
//#endregion Dropdown Select Handler
|
||||
|
||||
//#region Options to edit your token - Wildcard Charakter, Option Group, Incidence Modifiers, Ignore Case, 'and', 'or'
|
||||
|
||||
inputOptionHandler(elem) {
|
||||
let input;
|
||||
|
||||
if (this.elements.wordBuilder.classList.contains('hide') === false) {
|
||||
input = this.elements.wordInput;
|
||||
} else {
|
||||
input = this.elements.lemmaInput;
|
||||
}
|
||||
|
||||
if (elem === this.elements.optionGroup) {
|
||||
input.value += '( option1 | option2 )';
|
||||
let firstIndex = input.value.indexOf('option1');
|
||||
let lastIndex = firstIndex + 'option1'.length;
|
||||
input.focus();
|
||||
input.setSelectionRange(firstIndex, lastIndex);
|
||||
} else if (elem === this.elements.wildcardChar) {
|
||||
input.value += '.';
|
||||
}
|
||||
}
|
||||
|
||||
nSubmitHandler() {
|
||||
let instance = M.Modal.getInstance(this.elements.exactlyN);
|
||||
instance.close();
|
||||
|
||||
switch (this.elements.positionalAttr.value) {
|
||||
case 'word':
|
||||
this.elements.wordInput.value += ' {' + this.elements.nInput.value + '}';
|
||||
break;
|
||||
case 'lemma':
|
||||
this.elements.lemmaInput.value += ' {' + this.elements.nInput.value + '}';
|
||||
break;
|
||||
case 'english-pos':
|
||||
this.elements.tokenQueryFilled = true;
|
||||
this.tokenChipFactory(`pos=${this.elements.englishPos.value}`, `pos="${this.elements.englishPos.value}"`);
|
||||
this.tokenChipFactory('{' + this.elements.nInput.value + '}', '{' + this.elements.nInput.value + '}');
|
||||
this.elements.englishPosBuilder.classList.add('hide');
|
||||
this.elements.incidenceModifiersButton.classList.add('hide');
|
||||
break;
|
||||
case 'german-pos':
|
||||
this.elements.tokenQueryFilled = true;
|
||||
this.tokenChipFactory(`pos=${this.elements.germanPos.value}`, `pos="${this.elements.germanPos.value}"`);
|
||||
this.tokenChipFactory('{' + this.elements.nInput.value + '}', '{' + this.elements.nInput.value + '}');
|
||||
this.elements.germanPosBuilder.classList.add('hide');
|
||||
this.elements.incidenceModifiersButton.classList.add('hide');
|
||||
break;
|
||||
case 'simple-pos-button':
|
||||
this.elements.tokenQueryFilled = true;
|
||||
this.tokenChipFactory(`simple_pos=${this.elements.simplePos.value}`, `simple_pos="${this.elements.simplePos.value}"`);
|
||||
this.tokenChipFactory('{' + this.elements.nInput.value + '}', '{' + this.elements.nInput.value + '}');
|
||||
this.elements.simplePosBuilder.classList.add('hide');
|
||||
this.elements.incidenceModifiersButton.classList.add('hide');
|
||||
break;
|
||||
case 'empty-token':
|
||||
this.tokenChipFactory('{' + this.elements.nInput.value + '}', '{' + this.elements.nInput.value + '}');
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
nmSubmitHandler() {
|
||||
let instance = M.Modal.getInstance(this.elements.betweenNM);
|
||||
instance.close();
|
||||
|
||||
switch (this.elements.positionalAttr.value) {
|
||||
case 'word':
|
||||
this.elements.wordInput.value += `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`;
|
||||
break;
|
||||
case 'lemma':
|
||||
this.elements.lemmaInput.value += `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`;
|
||||
break;
|
||||
case 'english-pos':
|
||||
this.elements.tokenQueryFilled = true;
|
||||
this.tokenChipFactory(`pos=${this.elements.englishPos.value}`, `pos="${this.elements.englishPos.value}"`);
|
||||
this.tokenChipFactory(`{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`, `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`);
|
||||
this.elements.englishPosBuilder.classList.add('hide');
|
||||
this.elements.incidenceModifiersButton.classList.add('hide');
|
||||
break;
|
||||
case 'german-pos':
|
||||
this.elements.tokenQueryFilled = true;
|
||||
this.tokenChipFactory(`pos=${this.elements.germanPos.value}`, `pos="${this.elements.germanPos.value}"`);
|
||||
this.tokenChipFactory(`{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`, `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`);
|
||||
this.elements.germanPosBuilder.classList.add('hide');
|
||||
this.elements.incidenceModifiersButton.classList.add('hide');
|
||||
break;
|
||||
case 'simple-pos-button':
|
||||
this.elements.tokenQueryFilled = true;
|
||||
this.tokenChipFactory(`simple_pos=${this.elements.simplePos.value}`, `simple_pos="${this.elements.simplePos.value}"`);
|
||||
this.tokenChipFactory(`{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`, `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`);
|
||||
this.elements.simplePosBuilder.classList.add('hide');
|
||||
this.elements.incidenceModifiersButton.classList.add('hide');
|
||||
break;
|
||||
case 'empty-token':
|
||||
this.tokenChipFactory(`{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`, `{${this.elements.nmInput.value}, ${this.elements.mInput.value}}`);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
incidenceModifiersHandler(elem) {
|
||||
// For word and lemma, the incidence modifiers are inserted in the input field. For the others, one or two chips are created which contain the respective value of the token and the incidence modifier.
|
||||
if (this.elements.positionalAttr.value === 'empty-token') {
|
||||
this.tokenChipFactory(elem.innerText, elem.dataset.token);
|
||||
} else if (this.elements.positionalAttr.value === 'english-pos') {
|
||||
this.tokenChipFactory(`pos=${this.elements.englishPos.value}`, `pos="${this.elements.englishPos.value}"`);
|
||||
this.tokenChipFactory(elem.innerText, elem.dataset.token);
|
||||
this.elements.englishPosBuilder.classList.add('hide');
|
||||
this.elements.incidenceModifiersButton.classList.add('hide');
|
||||
this.elements.tokenQueryFilled = true;
|
||||
} else if (this.elements.positionalAttr.value === 'german-pos') {
|
||||
this.tokenChipFactory(`pos=${this.elements.germanPos.value}`, `pos="${this.elements.germanPos.value}"`);
|
||||
this.tokenChipFactory(elem.innerText, elem.dataset.token);
|
||||
this.elements.germanPosBuilder.classList.add('hide');
|
||||
this.elements.incidenceModifiersButton.classList.add('hide');
|
||||
this.elements.tokenQueryFilled = true;
|
||||
} else if (this.elements.positionalAttr.value === 'simple-pos-button') {
|
||||
this.tokenChipFactory(`simple_pos=${this.elements.simplePos.value}`, `simple_pos="${this.elements.simplePos.value}"`);
|
||||
this.tokenChipFactory(elem.innerText, elem.dataset.token);
|
||||
this.elements.simplePosBuilder.classList.add('hide');
|
||||
this.elements.incidenceModifiersButton.classList.add('hide');
|
||||
this.elements.tokenQueryFilled = true;
|
||||
} else {
|
||||
let input;
|
||||
|
||||
if (this.elements.wordBuilder.classList.contains('hide') === false) {
|
||||
input = this.elements.wordInput;
|
||||
} else {
|
||||
input = this.elements.lemmaInput;
|
||||
}
|
||||
|
||||
input.value += ' ' + elem.dataset.token;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
orHandler() {
|
||||
this.conditionHandler('or', ' | ');
|
||||
}
|
||||
|
||||
andHandler() {
|
||||
this.conditionHandler('and', ' & ');
|
||||
}
|
||||
|
||||
conditionHandler(conditionText, conditionQueryContent) {
|
||||
this.hideEverything();
|
||||
let tokenQueryContent;
|
||||
let tokenQueryText;
|
||||
let c;
|
||||
|
||||
if (this.elements.ignoreCase.checked) {
|
||||
c = ' %c';
|
||||
} else {
|
||||
c = '';
|
||||
}
|
||||
|
||||
switch (this.elements.positionalAttr.value) {
|
||||
case 'word':
|
||||
tokenQueryContent = `word=${this.elements.wordInput.value}${c}`;
|
||||
tokenQueryText = `word="${this.elements.wordInput.value}"${c}`;
|
||||
this.elements.wordInput.value = '';
|
||||
break;
|
||||
case 'lemma':
|
||||
tokenQueryContent = `lemma=${this.elements.lemmaInput.value}${c}`;
|
||||
tokenQueryText = `lemma="${this.elements.lemmaInput.value}"${c}`;
|
||||
this.elements.lemmaInput.value = '';
|
||||
break;
|
||||
case 'english-pos':
|
||||
tokenQueryContent = `pos=${this.elements.englishPos.value}`;
|
||||
tokenQueryText = `pos="${this.elements.englishPos.value}"`;
|
||||
this.elements.englishPos.value = '';
|
||||
break;
|
||||
case 'german-pos':
|
||||
tokenQueryContent = `pos=${this.elements.germanPos.value}`;
|
||||
tokenQueryText = `pos="${this.elements.germanPos.value}"`;
|
||||
this.elements.germanPos.value = '';
|
||||
break;
|
||||
case 'simple-pos-button':
|
||||
tokenQueryContent = `simple_pos=${this.elements.simplePos.value}`;
|
||||
tokenQueryText = `simple_pos="${this.elements.simplePos.value}"`;
|
||||
this.elements.simplePos.value = '';
|
||||
break;
|
||||
default:
|
||||
this.wordBuilder();
|
||||
break;
|
||||
}
|
||||
|
||||
this.tokenChipFactory(tokenQueryContent, tokenQueryText);
|
||||
this.tokenChipFactory(conditionText, conditionQueryContent);
|
||||
this.wordBuilder();
|
||||
}
|
||||
|
||||
//#endregion Options to edit your token - Wildcard Charakter, Option Group, Incidence Modifiers, Ignore Case, 'and', 'or'
|
||||
|
||||
//#endregion Token Attribute Builder Functions
|
||||
|
||||
|
||||
// ##########################################################################
|
||||
// ############ Structural Attribute Builder Functions ######################
|
||||
// ##########################################################################
|
||||
|
||||
//#region Structural Attribute Builder Functions
|
||||
addSentence() {
|
||||
this.hideEverything();
|
||||
if (this.elements.sentence.text === 'End Sentence') {
|
||||
this.queryChipFactory('end-sentence', 'Sentence End', '</s>');
|
||||
this.elements.sentence.innerHTML = 'Sentence';
|
||||
} else {
|
||||
this.queryChipFactory('start-sentence', 'Sentence Start', '<s>');
|
||||
this.elements.queryContent.push('sentence');
|
||||
this.elements.sentence.innerHTML = 'End Sentence';
|
||||
}
|
||||
}
|
||||
|
||||
addEntity() {
|
||||
if (this.elements.entity.text === 'End Entity') {
|
||||
let queryText;
|
||||
if (this.elements.entityAnyType === false) {
|
||||
queryText = '</ent_type>';
|
||||
} else {
|
||||
queryText = '</ent>';
|
||||
}
|
||||
this.queryChipFactory('end-entity', 'Entity End', queryText);
|
||||
this.elements.entity.innerHTML = 'Entity';
|
||||
} else {
|
||||
this.hideEverything();
|
||||
this.elements.entityBuilder.classList.remove('hide');
|
||||
window.location.href = '#entity-builder';
|
||||
}
|
||||
}
|
||||
|
||||
englishEntTypeHandler() {
|
||||
this.queryChipFactory('start-entity', 'Entity Type=' + this.elements.englishEntType.value, '<ent_type="' + this.elements.englishEntType.value + '">');
|
||||
this.elements.entity.innerHTML = 'End Entity';
|
||||
this.hideEverything();
|
||||
this.elements.entityAnyType = false;
|
||||
|
||||
// Resets materialize select dropdown
|
||||
let SelectInstance = M.FormSelect.getInstance(this.elements.englishEntType);
|
||||
SelectInstance.input.value = 'English ent_type';
|
||||
this.elements.englishEntType.value = 'default';
|
||||
}
|
||||
|
||||
germanEntTypeHandler() {
|
||||
this.queryChipFactory('start-entity', 'Entity Type=' + this.elements.germanEntType.value, '<ent_type="' + this.elements.germanEntType.value + '">');
|
||||
this.elements.entity.innerHTML = 'End Entity';
|
||||
this.hideEverything();
|
||||
this.elements.entityAnyType = false;
|
||||
|
||||
// Resets materialize select dropdown
|
||||
let SelectInstance = M.FormSelect.getInstance(this.elements.germanEntType);
|
||||
SelectInstance.input.value = 'German ent_type';
|
||||
this.elements.germanEntType.value = 'default';
|
||||
}
|
||||
|
||||
emptyEntityButton() {
|
||||
this.queryChipFactory('start-empty-entity', 'Entity Start', '<ent>');
|
||||
this.elements.entity.innerHTML = 'End Entity';
|
||||
this.hideEverything();
|
||||
this.elements.entityAnyType = true;
|
||||
}
|
||||
|
||||
addTextAnnotation() {
|
||||
this.hideEverything();
|
||||
this.elements.textAnnotationBuilder.classList.remove('hide');
|
||||
window.location.href = '#text-annotation-builder';
|
||||
|
||||
// Resets materialize select dropdown
|
||||
let SelectInstance = M.FormSelect.getInstance(this.elements.textAnnotationOptions);
|
||||
SelectInstance.input.value = 'address';
|
||||
this.elements.textAnnotationOptions.value = 'address';
|
||||
this.elements.textAnnotationInput.value= '';
|
||||
}
|
||||
|
||||
textAnnotationSubmitHandler() {
|
||||
if (this.elements.textAnnotationInput.value === '') {
|
||||
this.elements.textAnnotationSubmit.classList.add('red');
|
||||
this.elements.noValueMetadataMessage.classList.remove('hide');
|
||||
setTimeout(() => {
|
||||
this.elements.textAnnotationSubmit.classList.remove('red');
|
||||
}, 500);
|
||||
setTimeout(() => {
|
||||
this.elements.noValueMetadataMessage.classList.add('hide');
|
||||
}, 3000);
|
||||
} else {
|
||||
let queryText = `:: match.text_${this.elements.textAnnotationOptions.value}="${this.elements.textAnnotationInput.value}"`;
|
||||
this.queryChipFactory('text-annotation', `${this.elements.textAnnotationOptions.value}=${this.elements.textAnnotationInput.value}`, queryText);
|
||||
this.hideEverything();
|
||||
}
|
||||
}
|
||||
//#endregion Structural Attribute Builder Functions
|
||||
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
class CreateContributionForm extends Form {
|
||||
static autoInit() {
|
||||
let createContributionFormElements = document.querySelectorAll('.create-contribution-form');
|
||||
for (let createContributionFormElement of createContributionFormElements) {
|
||||
new CreateContributionForm(createContributionFormElement);
|
||||
}
|
||||
}
|
||||
|
||||
constructor(formElement) {
|
||||
super(formElement);
|
||||
|
||||
this.addEventListener('requestLoad', (event) => {
|
||||
if (event.target.status === 201) {
|
||||
window.location.href = event.target.getResponseHeader('Location');
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
class CreateCorpusFileForm extends Form {
|
||||
static autoInit() {
|
||||
let createCorpusFileFormElements = document.querySelectorAll('.create-corpus-file-form');
|
||||
for (let createCorpusFileFormElement of createCorpusFileFormElements) {
|
||||
new CreateCorpusFileForm(createCorpusFileFormElement);
|
||||
}
|
||||
}
|
||||
|
||||
constructor(formElement) {
|
||||
super(formElement);
|
||||
|
||||
this.addEventListener('requestLoad', (event) => {
|
||||
if (event.target.status === 201) {
|
||||
window.location.href = event.target.getResponseHeader('Location');
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
/*****************************************************************************
|
||||
* Admin *
|
||||
* Fetch requests for /admin routes *
|
||||
*****************************************************************************/
|
||||
Requests.admin = {};
|
||||
|
||||
Requests.admin.users = {};
|
||||
|
||||
Requests.admin.users.entity = {};
|
||||
|
||||
Requests.admin.users.entity.confirmed = {};
|
||||
|
||||
Requests.admin.users.entity.confirmed.update = (userId, value) => {
|
||||
let input = `/admin/users/${userId}/confirmed`;
|
||||
let init = {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify(value)
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
@ -1,5 +0,0 @@
|
||||
/*****************************************************************************
|
||||
* Contributions *
|
||||
* Fetch requests for /contributions routes *
|
||||
*****************************************************************************/
|
||||
Requests.contributions = {};
|
@ -1,26 +0,0 @@
|
||||
/*****************************************************************************
|
||||
* SpaCy NLP Pipeline Models *
|
||||
* Fetch requests for /contributions/spacy-nlp-pipeline-models routes *
|
||||
*****************************************************************************/
|
||||
Requests.contributions.spacy_nlp_pipeline_models = {};
|
||||
|
||||
Requests.contributions.spacy_nlp_pipeline_models.entity = {};
|
||||
|
||||
Requests.contributions.spacy_nlp_pipeline_models.entity.delete = (spacyNlpPipelineModelId) => {
|
||||
let input = `/contributions/spacy-nlp-pipeline-models/${spacyNlpPipelineModelId}`;
|
||||
let init = {
|
||||
method: 'DELETE'
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
Requests.contributions.spacy_nlp_pipeline_models.entity.isPublic = {};
|
||||
|
||||
Requests.contributions.spacy_nlp_pipeline_models.entity.isPublic.update = (spacyNlpPipelineModelId, value) => {
|
||||
let input = `/contributions/spacy-nlp-pipeline-models/${spacyNlpPipelineModelId}/is_public`;
|
||||
let init = {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify(value)
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
@ -1,26 +0,0 @@
|
||||
/*****************************************************************************
|
||||
* Tesseract OCR Pipeline Models *
|
||||
* Fetch requests for /contributions/tesseract-ocr-pipeline-models routes *
|
||||
*****************************************************************************/
|
||||
Requests.contributions.tesseract_ocr_pipeline_models = {};
|
||||
|
||||
Requests.contributions.tesseract_ocr_pipeline_models.entity = {};
|
||||
|
||||
Requests.contributions.tesseract_ocr_pipeline_models.entity.delete = (tesseractOcrPipelineModelId) => {
|
||||
let input = `/contributions/tesseract-ocr-pipeline-models/${tesseractOcrPipelineModelId}`;
|
||||
let init = {
|
||||
method: 'DELETE'
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
Requests.contributions.tesseract_ocr_pipeline_models.entity.isPublic = {};
|
||||
|
||||
Requests.contributions.tesseract_ocr_pipeline_models.entity.isPublic.update = (tesseractOcrPipelineModelId, value) => {
|
||||
let input = `/contributions/tesseract-ocr-pipeline-models/${tesseractOcrPipelineModelId}/is_public`;
|
||||
let init = {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify(value)
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
@ -1,46 +0,0 @@
|
||||
/*****************************************************************************
|
||||
* Corpora *
|
||||
* Fetch requests for /corpora routes *
|
||||
*****************************************************************************/
|
||||
Requests.corpora = {};
|
||||
|
||||
Requests.corpora.entity = {};
|
||||
|
||||
Requests.corpora.entity.delete = (corpusId) => {
|
||||
let input = `/corpora/${corpusId}`;
|
||||
let init = {
|
||||
method: 'DELETE'
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
Requests.corpora.entity.build = (corpusId) => {
|
||||
let input = `/corpora/${corpusId}/build`;
|
||||
let init = {
|
||||
method: 'POST',
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
Requests.corpora.entity.generateShareLink = (corpusId, role, expiration) => {
|
||||
let input = `/corpora/${corpusId}/generate-share-link`;
|
||||
let init = {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({role: role, expiration: expiration})
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
Requests.corpora.entity.isPublic = {};
|
||||
|
||||
Requests.corpora.entity.isPublic.update = (corpusId, isPublic) => {
|
||||
let input = `/corpora/${corpusId}/is_public`;
|
||||
let init = {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify(isPublic)
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
|
||||
|
@ -1,15 +0,0 @@
|
||||
/*****************************************************************************
|
||||
* Corpora *
|
||||
* Fetch requests for /corpora/<entity>/files routes *
|
||||
*****************************************************************************/
|
||||
Requests.corpora.entity.files = {};
|
||||
|
||||
Requests.corpora.entity.files.ent = {};
|
||||
|
||||
Requests.corpora.entity.files.ent.delete = (corpusId, corpusFileId) => {
|
||||
let input = `/corpora/${corpusId}/files/${corpusFileId}`;
|
||||
let init = {
|
||||
method: 'DELETE',
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
@ -1,35 +0,0 @@
|
||||
/*****************************************************************************
|
||||
* Corpora *
|
||||
* Fetch requests for /corpora/<entity>/followers routes *
|
||||
*****************************************************************************/
|
||||
Requests.corpora.entity.followers = {};
|
||||
|
||||
Requests.corpora.entity.followers.add = (corpusId, usernames) => {
|
||||
let input = `/corpora/${corpusId}/followers`;
|
||||
let init = {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(usernames)
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
Requests.corpora.entity.followers.entity = {};
|
||||
|
||||
Requests.corpora.entity.followers.entity.delete = (corpusId, followerId) => {
|
||||
let input = `/corpora/${corpusId}/followers/${followerId}`;
|
||||
let init = {
|
||||
method: 'DELETE',
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
Requests.corpora.entity.followers.entity.role = {};
|
||||
|
||||
Requests.corpora.entity.followers.entity.role.update = (corpusId, followerId, value) => {
|
||||
let input = `/corpora/${corpusId}/followers/${followerId}/role`;
|
||||
let init = {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify(value)
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
@ -1,31 +0,0 @@
|
||||
/*****************************************************************************
|
||||
* Jobs *
|
||||
* Fetch requests for /jobs routes *
|
||||
*****************************************************************************/
|
||||
Requests.jobs = {};
|
||||
|
||||
Requests.jobs.entity = {};
|
||||
|
||||
Requests.jobs.entity.delete = (jobId) => {
|
||||
let input = `/jobs/${jobId}`;
|
||||
let init = {
|
||||
method: 'DELETE'
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
}
|
||||
|
||||
Requests.jobs.entity.log = (jobId) => {
|
||||
let input = `/jobs/${jobId}/log`;
|
||||
let init = {
|
||||
method: 'GET'
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
}
|
||||
|
||||
Requests.jobs.entity.restart = (jobId) => {
|
||||
let input = `/jobs/${jobId}/restart`;
|
||||
let init = {
|
||||
method: 'POST'
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
/*****************************************************************************
|
||||
* Settings *
|
||||
* Fetch requests for /users/<entity>/settings routes *
|
||||
*****************************************************************************/
|
||||
Requests.users.entity.settings = {};
|
||||
|
||||
Requests.users.entity.settings.profilePrivacy = {};
|
||||
|
||||
Requests.users.entity.settings.profilePrivacy.update = (userId, profilePrivacySetting, enabled) => {
|
||||
let input = `/users/${userId}/settings/profile-privacy/${profilePrivacySetting}`;
|
||||
let init = {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify(enabled)
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
@ -1,35 +0,0 @@
|
||||
/*****************************************************************************
|
||||
* Users *
|
||||
* Fetch requests for /users routes *
|
||||
*****************************************************************************/
|
||||
Requests.users = {};
|
||||
|
||||
Requests.users.entity = {};
|
||||
|
||||
Requests.users.entity.delete = (userId) => {
|
||||
let input = `/users/${userId}`;
|
||||
let init = {
|
||||
method: 'DELETE'
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
Requests.users.entity.acceptTermsOfUse = () => {
|
||||
let input = `/users/accept-terms-of-use`;
|
||||
let init = {
|
||||
method: 'POST'
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
};
|
||||
|
||||
|
||||
Requests.users.entity.avatar = {};
|
||||
|
||||
Requests.users.entity.avatar.delete = (userId) => {
|
||||
let input = `/users/${userId}/avatar`;
|
||||
let init = {
|
||||
method: 'DELETE'
|
||||
};
|
||||
return Requests.JSONfetch(input, init);
|
||||
}
|
||||
|
204
app/static/js/app.js
Normal file
@ -0,0 +1,204 @@
|
||||
nopaque.App = class App {
|
||||
constructor() {
|
||||
this.data = {
|
||||
promises: {getUser: {}, subscribeUser: {}},
|
||||
users: {},
|
||||
};
|
||||
this.socket = io({transports: ['websocket'], upgrade: false});
|
||||
this.socket.on('PATCH', (patch) => {this.onPatch(patch);});
|
||||
}
|
||||
|
||||
getUser(userId) {
|
||||
if (userId in this.data.promises.getUser) {
|
||||
return this.data.promises.getUser[userId];
|
||||
}
|
||||
|
||||
this.data.promises.getUser[userId] = new Promise((resolve, reject) => {
|
||||
this.socket.emit('GET /users/<user_id>', userId, (response) => {
|
||||
if (response.status === 200) {
|
||||
this.data.users[userId] = response.body;
|
||||
resolve(this.data.users[userId]);
|
||||
} else {
|
||||
reject(`[${response.status}] ${response.statusText}`);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return this.data.promises.getUser[userId];
|
||||
}
|
||||
|
||||
subscribeUser(userId) {
|
||||
if (userId in this.data.promises.subscribeUser) {
|
||||
return this.data.promises.subscribeUser[userId];
|
||||
}
|
||||
|
||||
this.data.promises.subscribeUser[userId] = new Promise((resolve, reject) => {
|
||||
this.socket.emit('SUBSCRIBE /users/<user_id>', userId, (response) => {
|
||||
if (response.status !== 200) {
|
||||
reject(response);
|
||||
return;
|
||||
}
|
||||
resolve(response);
|
||||
});
|
||||
});
|
||||
|
||||
return this.data.promises.subscribeUser[userId];
|
||||
}
|
||||
|
||||
flash(message, category) {
|
||||
let iconPrefix = '';
|
||||
switch (category) {
|
||||
case 'corpus': {
|
||||
iconPrefix = '<i class="left material-icons">book</i>';
|
||||
break;
|
||||
}
|
||||
case 'error': {
|
||||
iconPrefix = '<i class="error-color-text left material-icons">error</i>';
|
||||
break;
|
||||
}
|
||||
case 'job': {
|
||||
iconPrefix = '<i class="left nopaque-icons">J</i>';
|
||||
break;
|
||||
}
|
||||
case 'settings': {
|
||||
iconPrefix = '<i class="left material-icons">settings</i>';
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
iconPrefix = '<i class="left material-icons">notifications</i>';
|
||||
break;
|
||||
}
|
||||
}
|
||||
let toast = M.toast(
|
||||
{
|
||||
html: `
|
||||
<span>${iconPrefix}${message}</span>
|
||||
<button class="action-button btn-flat toast-action white-text" data-action="close">
|
||||
<i class="material-icons">close</i>
|
||||
</button>
|
||||
`.trim()
|
||||
}
|
||||
);
|
||||
let toastCloseActionElement = toast.el.querySelector('.action-button[data-action="close"]');
|
||||
toastCloseActionElement.addEventListener('click', () => {toast.dismiss();});
|
||||
}
|
||||
|
||||
onPatch(patch) {
|
||||
// Filter Patch to only include operations on users that are initialized
|
||||
let regExp = new RegExp(`^/users/(${Object.keys(this.data.users).join('|')})`);
|
||||
let filteredPatch = patch.filter(operation => regExp.test(operation.path));
|
||||
|
||||
// Handle job status updates
|
||||
let subRegExp = new RegExp(`^/users/([A-Za-z0-9]*)/jobs/([A-Za-z0-9]*)/status$`);
|
||||
let subFilteredPatch = filteredPatch
|
||||
.filter((operation) => {return operation.op === 'replace';})
|
||||
.filter((operation) => {return subRegExp.test(operation.path);});
|
||||
for (let operation of subFilteredPatch) {
|
||||
let [match, userId, jobId] = operation.path.match(subRegExp);
|
||||
this.flash(`[<a href="/jobs/${jobId}">${this.data.users[userId].jobs[jobId].title}</a>] New status: <span class="job-status-text" data-status="${operation.value}"></span>`, 'job');
|
||||
}
|
||||
|
||||
// Apply Patch
|
||||
jsonpatch.applyPatch(this.data, filteredPatch);
|
||||
}
|
||||
|
||||
init() {
|
||||
this.initUi();
|
||||
}
|
||||
|
||||
initUi() {
|
||||
/* Pre-Initialization fixes */
|
||||
// #region
|
||||
|
||||
// Flask-WTF sets the standard HTML maxlength Attribute on input/textarea
|
||||
// elements to specify their maximum length (in characters). Unfortunatly
|
||||
// Materialize won't recognize the maxlength Attribute, instead it uses
|
||||
// the data-length Attribute. It's conversion time :)
|
||||
for (let elem of document.querySelectorAll('input[maxlength], textarea[maxlength]')) {
|
||||
elem.dataset.length = elem.getAttribute('maxlength');
|
||||
elem.removeAttribute('maxlength');
|
||||
}
|
||||
|
||||
// To work around some limitations with the Form setup of Flask-WTF.
|
||||
// HTML option elements with an empty value are considered as placeholder
|
||||
// elements. The user should not be able to actively select these options.
|
||||
// So they get the disabled attribute.
|
||||
for (let optionElement of document.querySelectorAll('option[value=""]')) {
|
||||
optionElement.disabled = true;
|
||||
}
|
||||
|
||||
// TODO: Check why we are doing this.
|
||||
for (let optgroupElement of document.querySelectorAll('optgroup[label=""]')) {
|
||||
for (let c of optgroupElement.children) {
|
||||
optgroupElement.parentElement.insertAdjacentElement('afterbegin', c);
|
||||
}
|
||||
optgroupElement.remove();
|
||||
}
|
||||
// #endregion
|
||||
|
||||
|
||||
/* Initialize Materialize Components */
|
||||
// #region
|
||||
|
||||
// Automatically initialize Materialize Components that do not require
|
||||
// additional configuration.
|
||||
M.AutoInit();
|
||||
|
||||
// CharacterCounters
|
||||
// Materialize didn't include the CharacterCounter plugin within the
|
||||
// AutoInit method (maybe they forgot it?). Anyway... We do it here. :)
|
||||
M.CharacterCounter.init(document.querySelectorAll('input[data-length]:not(.no-autoinit), textarea[data-length]:not(.no-autoinit)'));
|
||||
|
||||
// Header navigation "more" Dropdown.
|
||||
M.Dropdown.init(
|
||||
document.querySelector('#nav-more-dropdown-trigger'),
|
||||
{
|
||||
alignment: 'right',
|
||||
constrainWidth: false,
|
||||
coverTrigger: false
|
||||
}
|
||||
);
|
||||
|
||||
// Manual modal
|
||||
M.Modal.init(
|
||||
document.querySelector('#manual-modal'),
|
||||
{
|
||||
onOpenStart: (modalElement, modalTriggerElement) => {
|
||||
if ('manualModalChapter' in modalTriggerElement.dataset) {
|
||||
let manualModalTocElement = document.querySelector('#manual-modal-toc');
|
||||
let manualModalToc = M.Tabs.getInstance(manualModalTocElement);
|
||||
manualModalToc.select(modalTriggerElement.dataset.manualModalChapter);
|
||||
// TODO: Make this work.
|
||||
// if ('manualModalChapterAnchor' in modalTriggerElement.dataset) {
|
||||
// let manualModalChapterAnchor = document.querySelector(`#${modalTriggerElement.dataset.manualModalChapterAnchor}`);
|
||||
// let xCoord = manualModalChapterAnchor.getBoundingClientRect().left;
|
||||
// let yCoord = manualModalChapterAnchor.getBoundingClientRect().top;
|
||||
// let modalContentElement = modalElement.querySelector('.modal-content');
|
||||
// modalContentElement.scroll(xCoord, yCoord);
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Terms of use modal
|
||||
M.Modal.init(
|
||||
document.querySelector('#terms-of-use-modal'),
|
||||
{
|
||||
dismissible: false,
|
||||
onCloseEnd: (modalElement) => {
|
||||
nopaque.requests.users.entity.acceptTermsOfUse();
|
||||
}
|
||||
}
|
||||
);
|
||||
// #endregion
|
||||
|
||||
|
||||
/* Initialize nopaque Components */
|
||||
// #region
|
||||
nopaque.resource_displays.AutoInit();
|
||||
nopaque.resource_lists.AutoInit();
|
||||
nopaque.forms.AutoInit();
|
||||
// #endregion
|
||||
}
|
||||
};
|
119
app/static/js/corpus-analysis/app.js
Normal file
@ -0,0 +1,119 @@
|
||||
nopaque.corpus_analysis.App = class App {
|
||||
constructor(corpusId) {
|
||||
this.corpusId = corpusId;
|
||||
|
||||
this.data = {};
|
||||
|
||||
// HTML elements
|
||||
this.elements = {
|
||||
container: document.querySelector('#corpus-analysis-container'),
|
||||
extensionCards: document.querySelector('#corpus-analysis-extension-cards'),
|
||||
extensionTabs: document.querySelector('#corpus-analysis-extension-tabs'),
|
||||
initModal: document.querySelector('#corpus-analysis-init-modal')
|
||||
};
|
||||
// Materialize elements
|
||||
this.elements.m = {
|
||||
extensionTabs: M.Tabs.init(this.elements.extensionTabs),
|
||||
initModal: M.Modal.init(this.elements.initModal, {dismissible: false})
|
||||
};
|
||||
|
||||
this.extensions = {};
|
||||
|
||||
this.settings = {};
|
||||
}
|
||||
|
||||
async init() {
|
||||
this.disableActionElements();
|
||||
this.elements.m.initModal.open();
|
||||
|
||||
try {
|
||||
// Setup CQi over SocketIO connection and gather data from the CQPServer
|
||||
const statusTextElement = this.elements.initModal.querySelector('.status-text');
|
||||
statusTextElement.innerText = 'Creating CQi over SocketIO client...';
|
||||
const cqiClient = new nopaque.corpus_analysis.cqi.Client('/cqi_over_sio');
|
||||
statusTextElement.innerText += ' Done';
|
||||
statusTextElement.innerHTML = 'Waiting for the CQP server...';
|
||||
const response = await cqiClient.api.socket.emitWithAck('init', this.corpusId);
|
||||
if (response.code !== 200) {throw new Error();}
|
||||
statusTextElement.innerText += ' Done';
|
||||
statusTextElement.innerHTML = 'Connecting to the CQP server...';
|
||||
await cqiClient.connect('anonymous', '');
|
||||
statusTextElement.innerText += ' Done';
|
||||
statusTextElement.innerHTML = 'Building and receiving corpus data cache from the server (This may take a while)...';
|
||||
const cqiCorpus = await cqiClient.corpora.get(`NOPAQUE-${this.corpusId.toUpperCase()}`);
|
||||
statusTextElement.innerText += ' Done';
|
||||
// TODO: Don't do this hgere
|
||||
await cqiCorpus.updateDb();
|
||||
this.data.cqiClient = cqiClient;
|
||||
this.data.cqiCorpus = cqiCorpus;
|
||||
this.data.corpus = {o: cqiCorpus}; // legacy
|
||||
// Initialize extensions
|
||||
for (const extension of Object.values(this.extensions)) {
|
||||
statusTextElement.innerHTML = `Initializing ${extension.name} extension...`;
|
||||
await extension.init();
|
||||
statusTextElement.innerText += ' Done'
|
||||
}
|
||||
} catch (error) {
|
||||
let errorString = '';
|
||||
if ('code' in error && error.code !== undefined && error.code !== null) {
|
||||
errorString += `[${error.code}] `;
|
||||
}
|
||||
errorString += `${error.constructor.name}`;
|
||||
if ('description' in error && error.description !== undefined && error.description !== null) {
|
||||
errorString += `: ${error.description}`;
|
||||
}
|
||||
const errorsElement = this.elements.initModal.querySelector('.errors');
|
||||
const progressElement = this.elements.initModal.querySelector('.progress');
|
||||
errorsElement.innerText = errorString;
|
||||
errorsElement.classList.remove('hide');
|
||||
progressElement.classList.add('hide');
|
||||
return;
|
||||
}
|
||||
|
||||
for (const extensionSelectorElement of this.elements.extensionCards.querySelectorAll('.extension-selector')) {
|
||||
extensionSelectorElement.addEventListener('click', () => {
|
||||
this.elements.m.extensionTabs.select(extensionSelectorElement.dataset.target);
|
||||
});
|
||||
}
|
||||
|
||||
this.enableActionElements();
|
||||
this.elements.m.initModal.close();
|
||||
}
|
||||
|
||||
registerExtension(extension) {
|
||||
if (extension.name in this.extensions) {return;}
|
||||
this.extensions[extension.name] = extension;
|
||||
}
|
||||
|
||||
disableActionElements() {
|
||||
const actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
|
||||
for (const actionElement of actionElements) {
|
||||
switch(actionElement.nodeName) {
|
||||
case 'INPUT':
|
||||
actionElement.disabled = true;
|
||||
break;
|
||||
case 'SELECT':
|
||||
actionElement.parentNode.querySelector('input.select-dropdown').disabled = true;
|
||||
break;
|
||||
default:
|
||||
actionElement.classList.add('disabled');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enableActionElements() {
|
||||
const actionElements = this.elements.container.querySelectorAll('.corpus-analysis-action');
|
||||
for (const actionElement of actionElements) {
|
||||
switch(actionElement.nodeName) {
|
||||
case 'INPUT':
|
||||
actionElement.disabled = false;
|
||||
break;
|
||||
case 'SELECT':
|
||||
actionElement.parentNode.querySelector('input.select-dropdown').disabled = false;
|
||||
break;
|
||||
default:
|
||||
actionElement.classList.remove('disabled');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
class CorpusAnalysisConcordance {
|
||||
nopaque.corpus_analysis.ConcordanceExtension = class ConcordanceExtension {
|
||||
name = 'Concordance';
|
||||
|
||||
constructor(app) {
|
||||
@ -7,56 +7,50 @@ class CorpusAnalysisConcordance {
|
||||
this.data = {};
|
||||
|
||||
this.elements = {
|
||||
// TODO: Prefix elements with "corpus-analysis-app-"
|
||||
container: document.querySelector('#concordance-extension-container'),
|
||||
error: document.querySelector('#concordance-extension-error'),
|
||||
form: document.querySelector('#concordance-extension-form'),
|
||||
progress: document.querySelector('#concordance-extension-progress'),
|
||||
subcorpusInfo: document.querySelector('#concordance-extension-subcorpus-info'),
|
||||
subcorpusActions: document.querySelector('#concordance-extension-subcorpus-actions'),
|
||||
subcorpusItems: document.querySelector('#concordance-extension-subcorpus-items'),
|
||||
subcorpusList: document.querySelector('#concordance-extension-subcorpus-list'),
|
||||
subcorpusPagination: document.querySelector('#concordance-extension-subcorpus-pagination')
|
||||
container: document.querySelector(`#corpus-analysis-concordance-container`),
|
||||
error: document.querySelector(`#corpus-analysis-concordance-error`),
|
||||
userInterfaceForm: document.querySelector(`#corpus-analysis-concordance-user-interface-form`),
|
||||
expertModeForm: document.querySelector(`#corpus-analysis-concordance-expert-mode-form`),
|
||||
queryBuilderForm: document.querySelector(`#corpus-analysis-concordance-query-builder-form`),
|
||||
progress: document.querySelector(`#corpus-analysis-concordance-progress`),
|
||||
subcorpusInfo: document.querySelector(`#corpus-analysis-concordance-subcorpus-info`),
|
||||
subcorpusActions: document.querySelector(`#corpus-analysis-concordance-subcorpus-actions`),
|
||||
subcorpusItems: document.querySelector(`#corpus-analysis-concordance-subcorpus-items`),
|
||||
subcorpusList: document.querySelector(`#corpus-analysis-concordance-subcorpus-list`),
|
||||
subcorpusPagination: document.querySelector(`#corpus-analysis-concordance-subcorpus-pagination`)
|
||||
};
|
||||
|
||||
this.settings = {
|
||||
context: parseInt(this.elements.form['context'].value),
|
||||
perPage: parseInt(this.elements.form['per-page'].value),
|
||||
context: parseInt(this.elements.userInterfaceForm['context'].value),
|
||||
perPage: parseInt(this.elements.userInterfaceForm['per-page'].value),
|
||||
selectedSubcorpus: undefined,
|
||||
textStyle: parseInt(this.elements.form['text-style'].value),
|
||||
tokenRepresentation: this.elements.form['token-representation'].value
|
||||
textStyle: parseInt(this.elements.userInterfaceForm['text-style'].value),
|
||||
tokenRepresentation: this.elements.userInterfaceForm['token-representation'].value
|
||||
};
|
||||
|
||||
this.app.registerExtension(this);
|
||||
}
|
||||
|
||||
init() {
|
||||
// Init data
|
||||
this.data.corpus = this.app.data.corpus;
|
||||
this.data.subcorpora = {};
|
||||
// Add event listeners
|
||||
this.elements.form.addEventListener('submit', event => {
|
||||
event.preventDefault();
|
||||
async submitForm(queryModeId) {
|
||||
this.app.disableActionElements();
|
||||
let query = this.elements.form.query.value.trim();
|
||||
let subcorpusName = this.elements.form['subcorpus-name'].value;
|
||||
let queryBuilderQuery = nopaque.Utils.unescape(document.querySelector('#corpus-analysis-concordance-query-preview').innerHTML.trim());
|
||||
let expertModeQuery = this.elements.expertModeForm.query.value.trim();
|
||||
let query = queryModeId === 'corpus-analysis-concordance-expert-mode-form' ? expertModeQuery : queryBuilderQuery;
|
||||
let form = queryModeId === 'corpus-analysis-concordance-expert-mode-form' ? this.elements.expertModeForm : this.elements.queryBuilderForm;
|
||||
|
||||
let subcorpusName = form['subcorpus-name'].value;
|
||||
this.elements.error.innerText = '';
|
||||
this.elements.error.classList.add('hide');
|
||||
this.elements.progress.classList.remove('hide');
|
||||
let subcorpus = {};
|
||||
this.data.corpus.o.query(subcorpusName, query)
|
||||
.then(cQiStatus => {
|
||||
try {
|
||||
const subcorpus = {};
|
||||
subcorpus.q = query;
|
||||
subcorpus.selectedItems = new Set();
|
||||
await this.data.corpus.o.query(subcorpusName, query);
|
||||
if (subcorpusName !== 'Last') {this.data.subcorpora.Last = subcorpus;}
|
||||
return this.data.corpus.o.subcorpora.get(subcorpusName);
|
||||
})
|
||||
.then(cQiSubcorpus => {
|
||||
subcorpus.o = cQiSubcorpus;
|
||||
return cQiSubcorpus.paginate(1, this.settings.perPage, this.settings.context);
|
||||
})
|
||||
.then(
|
||||
paginatedSubcorpus => {
|
||||
const cqiSubcorpus = await this.data.corpus.o.subcorpora.get(subcorpusName);
|
||||
subcorpus.o = cqiSubcorpus;
|
||||
const paginatedSubcorpus = await cqiSubcorpus.paginate(this.settings.context, 1, this.settings.perPage);
|
||||
subcorpus.p = paginatedSubcorpus;
|
||||
this.data.subcorpora[subcorpusName] = subcorpus;
|
||||
this.settings.selectedSubcorpus = subcorpusName;
|
||||
@ -66,34 +60,46 @@ class CorpusAnalysisConcordance {
|
||||
this.renderSubcorpusItems();
|
||||
this.renderSubcorpusPagination();
|
||||
this.elements.progress.classList.add('hide');
|
||||
this.app.enableActionElements();
|
||||
},
|
||||
cQiError => {
|
||||
this.elements.error.innerText = JSON.stringify(cQiError);
|
||||
} catch (error) {
|
||||
let errorString = '';
|
||||
if ('code' in error) {errorString += `[${error.code}] `;}
|
||||
errorString += `${error.constructor.name}`;
|
||||
this.elements.error.innerText = errorString;
|
||||
this.elements.error.classList.remove('hide');
|
||||
if ('payload' in cQiError && 'code' in cQiError.payload && 'msg' in cQiError.payload) {
|
||||
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
|
||||
}
|
||||
app.flash(errorString, 'error');
|
||||
this.elements.progress.classList.add('hide');
|
||||
}
|
||||
this.app.enableActionElements();
|
||||
}
|
||||
);
|
||||
|
||||
async init() {
|
||||
// Init data
|
||||
this.data.corpus = this.app.data.corpus;
|
||||
this.data.subcorpora = {};
|
||||
// Add event listeners
|
||||
this.elements.expertModeForm.addEventListener('submit', (event) => {
|
||||
event.preventDefault();
|
||||
this.submitForm(this.elements.expertModeForm.id);
|
||||
});
|
||||
this.elements.form.addEventListener('change', event => {
|
||||
if (event.target === this.elements.form['context']) {
|
||||
this.settings.context = parseInt(this.elements.form['context'].value);
|
||||
this.elements.form.submit.click();
|
||||
this.elements.queryBuilderForm.addEventListener('submit', (event) => {
|
||||
event.preventDefault();
|
||||
this.submitForm(this.elements.queryBuilderForm.id);
|
||||
});
|
||||
this.elements.userInterfaceForm.addEventListener('change', (event) => {
|
||||
if (event.target === this.elements.userInterfaceForm['context']) {
|
||||
this.settings.context = parseInt(this.elements.userInterfaceForm['context'].value);
|
||||
this.submitForm();
|
||||
}
|
||||
if (event.target === this.elements.form['per-page']) {
|
||||
this.settings.perPage = parseInt(this.elements.form['per-page'].value);
|
||||
this.elements.form.submit.click();
|
||||
if (event.target === this.elements.userInterfaceForm['per-page']) {
|
||||
this.settings.perPage = parseInt(this.elements.userInterfaceForm['per-page'].value);
|
||||
this.submitForm();
|
||||
}
|
||||
if (event.target === this.elements.form['text-style']) {
|
||||
this.settings.textStyle = parseInt(this.elements.form['text-style'].value);
|
||||
if (event.target === this.elements.userInterfaceForm['text-style']) {
|
||||
this.settings.textStyle = parseInt(this.elements.userInterfaceForm['text-style'].value);
|
||||
this.setTextStyle();
|
||||
}
|
||||
if (event.target === this.elements.form['token-representation']) {
|
||||
this.settings.tokenRepresentation = this.elements.form['token-representation'].value;
|
||||
if (event.target === this.elements.userInterfaceForm['token-representation']) {
|
||||
this.settings.tokenRepresentation = this.elements.userInterfaceForm['token-representation'].value;
|
||||
this.setTokenRepresentation();
|
||||
}
|
||||
});
|
||||
@ -162,14 +168,14 @@ class CorpusAnalysisConcordance {
|
||||
</a>
|
||||
`.trim();
|
||||
M.Tooltip.init(this.elements.subcorpusActions.querySelectorAll('.tooltipped'));
|
||||
this.elements.subcorpusActions.querySelector('.subcorpus-export-trigger').addEventListener('click', event => {
|
||||
this.elements.subcorpusActions.querySelector('.subcorpus-export-trigger').addEventListener('click', (event) => {
|
||||
event.preventDefault();
|
||||
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
|
||||
let modalElementId = Utils.generateElementId('export-subcorpus-modal-');
|
||||
let exportFormatSelectElementId = Utils.generateElementId('export-format-select-');
|
||||
let exportSelectedMatchesOnlyCheckboxElementId = Utils.generateElementId('export-selected-matches-only-checkbox-');
|
||||
let exportFileNameInputElementId = Utils.generateElementId('export-file-name-input-');
|
||||
let modalElement = Utils.HTMLToElement(
|
||||
let modalElementId = nopaque.Utils.generateElementId('export-subcorpus-modal-');
|
||||
let exportFormatSelectElementId = nopaque.Utils.generateElementId('export-format-select-');
|
||||
let exportSelectedMatchesOnlyCheckboxElementId = nopaque.Utils.generateElementId('export-selected-matches-only-checkbox-');
|
||||
let exportFileNameInputElementId = nopaque.Utils.generateElementId('export-file-name-input-');
|
||||
let modalElement = nopaque.Utils.HTMLToElement(
|
||||
`
|
||||
<div class="modal" id="${modalElementId}">
|
||||
<div class="modal-content">
|
||||
@ -219,7 +225,7 @@ class CorpusAnalysisConcordance {
|
||||
}
|
||||
}
|
||||
);
|
||||
exportButton.addEventListener('click', event => {
|
||||
exportButton.addEventListener('click', (event) => {
|
||||
event.preventDefault();
|
||||
this.app.disableActionElements();
|
||||
this.elements.progress.classList.remove('hide');
|
||||
@ -236,12 +242,12 @@ class CorpusAnalysisConcordance {
|
||||
app.flash('No matches selected', 'error');
|
||||
return;
|
||||
}
|
||||
promise = subcorpus.o.partial_export([...subcorpus.selectedItems], 50);
|
||||
promise = subcorpus.o.partialExport([...subcorpus.selectedItems], 50);
|
||||
} else {
|
||||
promise = subcorpus.o.export(50);
|
||||
}
|
||||
promise.then(
|
||||
data => {
|
||||
(data) => {
|
||||
let blob;
|
||||
if (exportFormat === 'csv') {
|
||||
let csvContent = 'sep=,\r\n';
|
||||
@ -287,11 +293,11 @@ class CorpusAnalysisConcordance {
|
||||
});
|
||||
modal.open();
|
||||
});
|
||||
this.elements.subcorpusActions.querySelector('.subcorpus-delete-trigger').addEventListener('click', event => {
|
||||
this.elements.subcorpusActions.querySelector('.subcorpus-delete-trigger').addEventListener('click', (event) => {
|
||||
event.preventDefault();
|
||||
let subcorpus = this.data.subcorpora[this.settings.selectedSubcorpus];
|
||||
subcorpus.o.drop().then(
|
||||
cQiStatus => {
|
||||
(cQiStatus) => {
|
||||
app.flash(`${subcorpus.o.name} deleted`, 'corpus');
|
||||
delete this.data.subcorpora[subcorpus.o.name];
|
||||
this.settings.selectedSubcorpus = undefined;
|
||||
@ -312,8 +318,9 @@ class CorpusAnalysisConcordance {
|
||||
this.clearSubcorpusPagination();
|
||||
}
|
||||
},
|
||||
cQiError => {
|
||||
app.flash(`${cQiError.payload.code}: ${cQiError.payload.msg}`, 'error');
|
||||
(cqiError) => {
|
||||
let errorString = `${cqiError.code}: ${cqiError.constructor.name}`;
|
||||
app.flash(errorString, 'error');
|
||||
}
|
||||
);
|
||||
});
|
||||
@ -362,7 +369,7 @@ class CorpusAnalysisConcordance {
|
||||
this.setTextStyle();
|
||||
this.setTokenRepresentation();
|
||||
for (let gotoReaderTriggerElement of this.elements.subcorpusItems.querySelectorAll('.goto-reader-trigger')) {
|
||||
gotoReaderTriggerElement.addEventListener('click', event => {
|
||||
gotoReaderTriggerElement.addEventListener('click', (event) => {
|
||||
event.preventDefault();
|
||||
let corpusAnalysisReader = this.app.extensions.Reader;
|
||||
let itemId = parseInt(gotoReaderTriggerElement.closest('.item').dataset.id);
|
||||
@ -380,11 +387,13 @@ class CorpusAnalysisConcordance {
|
||||
document.getSelection().removeAllRanges();
|
||||
document.getSelection().addRange(range);
|
||||
});
|
||||
this.app.elements.m.extensionTabs.select('reader-extension-container');
|
||||
this.app.elements.m.extensionTabs.select(
|
||||
this.app.extensions.Reader.elements.container.id
|
||||
);
|
||||
});
|
||||
}
|
||||
for (let selectTriggerElement of this.elements.subcorpusItems.querySelectorAll('.select-trigger')) {
|
||||
selectTriggerElement.addEventListener('click', event => {
|
||||
selectTriggerElement.addEventListener('click', (event) => {
|
||||
event.preventDefault();
|
||||
let itemElement = selectTriggerElement.closest('.item');
|
||||
let itemId = parseInt(itemElement.dataset.id);
|
||||
@ -446,14 +455,14 @@ class CorpusAnalysisConcordance {
|
||||
</li>
|
||||
`.trim();
|
||||
for (let paginationTriggerElement of this.elements.subcorpusPagination.querySelectorAll('.pagination-trigger[data-target]')) {
|
||||
paginationTriggerElement.addEventListener('click', event => {
|
||||
paginationTriggerElement.addEventListener('click', (event) => {
|
||||
event.preventDefault();
|
||||
this.app.disableActionElements();
|
||||
this.elements.progress.classList.remove('hide');
|
||||
let page = parseInt(paginationTriggerElement.dataset.target);
|
||||
subcorpus.o.paginate(page, this.settings.perPage, this.settings.context)
|
||||
subcorpus.o.paginate(this.settings.context, page, this.settings.perPage)
|
||||
.then(
|
||||
paginatedSubcorpus => {
|
||||
(paginatedSubcorpus) => {
|
||||
subcorpus.p = paginatedSubcorpus;
|
||||
this.renderSubcorpusItems();
|
||||
this.renderSubcorpusPagination();
|
@ -1,11 +1,16 @@
|
||||
cqi.api.APIClient = class APIClient {
|
||||
constructor(host, corpus_id, version = '0.1') {
|
||||
nopaque.corpus_analysis.cqi.api.Client = class Client {
|
||||
/**
|
||||
* @param {string} host
|
||||
* @param {number} [timeout=60] timeout
|
||||
* @param {string} [version=0.1] version
|
||||
*/
|
||||
constructor(host, timeout = 60, version = '0.1') {
|
||||
this.host = host;
|
||||
this.timeout = timeout * 1000; // convert seconds to milliseconds
|
||||
this.version = version;
|
||||
this.socket = io(
|
||||
this.host,
|
||||
{
|
||||
auth: {corpus_id: corpus_id},
|
||||
transports: ['websocket'],
|
||||
upgrade: false
|
||||
}
|
||||
@ -15,43 +20,43 @@ cqi.api.APIClient = class APIClient {
|
||||
/**
|
||||
* @param {string} fn_name
|
||||
* @param {object} [fn_args={}]
|
||||
* @returns {Promise<cqi.status.StatusConnectOk>}
|
||||
* @returns {Promise}
|
||||
*/
|
||||
#request(fn_name, fn_args = {}) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.socket.emit('cqi_client.api', {fn_name: fn_name, fn_args: fn_args}, (response) => {
|
||||
async #request(fn_name, fn_args = {}) {
|
||||
// TODO: implement timeout
|
||||
let response = await this.socket.emitWithAck('exec', fn_name, fn_args);
|
||||
if (response.code === 200) {
|
||||
resolve(response.payload);
|
||||
return response.payload;
|
||||
} else if (response.code === 500) {
|
||||
throw new Error(`[${response.code}] ${response.msg}`);
|
||||
} else if (response.code === 502) {
|
||||
if (response.payload.code in nopaque.corpus_analysis.cqi.errors.lookup) {
|
||||
throw new nopaque.corpus_analysis.cqi.errors.lookup[response.payload.code]();
|
||||
} else {
|
||||
throw new nopaque.corpus_analysis.cqi.errors.CQiError();
|
||||
}
|
||||
if (response.code === 500) {
|
||||
reject(new Error(`[${response.code}] ${response.msg}`));
|
||||
}
|
||||
if (response.code === 502) {
|
||||
reject(new cqi.errors.lookup[response.payload.code]());
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} username
|
||||
* @param {string} password
|
||||
* @returns {Promise<cqi.status.StatusConnectOk>}
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusConnectOk>}
|
||||
*/
|
||||
async ctrl_connect(username, password) {
|
||||
const fn_name = 'ctrl_connect';
|
||||
const fn_args = {username: username, password: password};
|
||||
let payload = await this.#request(fn_name, fn_args);
|
||||
return new cqi.status.lookup[payload.code]();
|
||||
return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {Promise<cqi.status.StatusByeOk>}
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusByeOk>}
|
||||
*/
|
||||
async ctrl_bye() {
|
||||
const fn_name = 'ctrl_bye';
|
||||
let payload = await this.#request(fn_name);
|
||||
return new cqi.status.lookup[payload.code]();
|
||||
return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -63,12 +68,12 @@ cqi.api.APIClient = class APIClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {Promise<cqi.status.StatusPingOk>}
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusPingOk>}
|
||||
*/
|
||||
async ctrl_ping() {
|
||||
const fn_name = 'ctrl_ping';
|
||||
let payload = await this.#request(fn_name);
|
||||
return new cqi.status.lookup[payload.code]();
|
||||
return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -203,13 +208,13 @@ cqi.api.APIClient = class APIClient {
|
||||
* try to unload a corpus and all its attributes from memory
|
||||
*
|
||||
* @param {string} corpus
|
||||
* @returns {Promise<cqi.status.StatusOk>}
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
|
||||
*/
|
||||
async corpus_drop_corpus(corpus) {
|
||||
const fn_name = 'corpus_drop_corpus';
|
||||
const fn_args = {corpus: corpus};
|
||||
let payload = await this.#request(fn_name, fn_args);
|
||||
return new cqi.status.lookup[payload.code]();
|
||||
return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -245,13 +250,13 @@ cqi.api.APIClient = class APIClient {
|
||||
* unload attribute from memory
|
||||
*
|
||||
* @param {string} attribute
|
||||
* @returns {Promise<cqi.status.StatusOk>}
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
|
||||
*/
|
||||
async cl_drop_attribute(attribute) {
|
||||
const fn_name = 'cl_drop_attribute';
|
||||
const fn_args = {attribute: attribute};
|
||||
let payload = await this.#request(fn_name, fn_args);
|
||||
return new cqi.status.lookup[payload.code]();
|
||||
return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -477,13 +482,13 @@ cqi.api.APIClient = class APIClient {
|
||||
* @param {string} mother_corpus
|
||||
* @param {string} subcorpus_name
|
||||
* @param {string} query
|
||||
* @returns {Promise<cqi.status.StatusOk>}
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
|
||||
*/
|
||||
async cqp_query(mother_corpus, subcorpus_name, query) {
|
||||
const fn_name = 'cqp_query';
|
||||
const fn_args = {mother_corpus: mother_corpus, subcorpus_name: subcorpus_name, query: query};
|
||||
let payload = await this.#request(fn_name, fn_args);
|
||||
return new cqi.status.lookup[payload.code]();
|
||||
return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -519,7 +524,7 @@ cqi.api.APIClient = class APIClient {
|
||||
|
||||
/**
|
||||
* Dump the values of <field> for match ranges <first> .. <last>
|
||||
* in <subcorpus>. <field> is one of the CQI_CONST_FIELD_* constants.
|
||||
* in <subcorpus>. <field> is one of the nopaque.corpus_analysis.cqi.constants.FIELD_* constants.
|
||||
*
|
||||
* @param {string} subcorpus
|
||||
* @param {number} field
|
||||
@ -537,13 +542,13 @@ cqi.api.APIClient = class APIClient {
|
||||
* delete a subcorpus from memory
|
||||
*
|
||||
* @param {string} subcorpus
|
||||
* @returns {Promise<cqi.status.StatusOk>}
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
|
||||
*/
|
||||
async cqp_drop_subcorpus(subcorpus) {
|
||||
const fn_name = 'cqp_drop_subcorpus';
|
||||
const fn_args = {subcorpus: subcorpus};
|
||||
let payload = await this.#request(fn_name, fn_args);
|
||||
return new cqi.status.lookup[payload.code]();
|
||||
return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -556,9 +561,9 @@ cqi.api.APIClient = class APIClient {
|
||||
*
|
||||
* returns <n> (id, frequency) pairs flattened into a list of size 2*<n>
|
||||
* field is one of
|
||||
* - CQI_CONST_FIELD_MATCH
|
||||
* - CQI_CONST_FIELD_TARGET
|
||||
* - CQI_CONST_FIELD_KEYWORD
|
||||
* - nopaque.corpus_analysis.cqi.constants.FIELD_MATCH
|
||||
* - nopaque.corpus_analysis.cqi.constants.FIELD_TARGET
|
||||
* - nopaque.corpus_analysis.cqi.constants.FIELD_KEYWORD
|
||||
*
|
||||
* NB: pairs are sorted by frequency desc.
|
||||
*
|
||||
@ -595,4 +600,89 @@ cqi.api.APIClient = class APIClient {
|
||||
const fn_args = {subcorpus: subcorpus, cutoff: cutoff, field1: field1, attribute1: attribute1, field2: field2, attribute2: attribute2};
|
||||
return await this.#request(fn_name, fn_args);
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
* NOTE: The following is not included in the CQi specification. *
|
||||
**************************************************************************/
|
||||
/**************************************************************************
|
||||
* Custom additions for nopaque *
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @param {string} corpus
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusOk>}
|
||||
*/
|
||||
async ext_corpus_update_db(corpus) {
|
||||
const fn_name = 'ext_corpus_update_db';
|
||||
const fn_args = {corpus: corpus};
|
||||
let payload = await this.#request(fn_name, fn_args);
|
||||
return new nopaque.corpus_analysis.cqi.status.lookup[payload.code]();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} corpus
|
||||
* @returns {Promise<object>}
|
||||
*/
|
||||
async ext_corpus_static_data(corpus) {
|
||||
const fn_name = 'ext_corpus_static_data';
|
||||
const fn_args = {corpus: corpus};
|
||||
let compressedEncodedData = await this.#request(fn_name, fn_args);
|
||||
let data = pako.inflate(compressedEncodedData, {to: 'string'});
|
||||
return JSON.parse(data);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} corpus
|
||||
* @param {number=} page
|
||||
* @param {number=} per_page
|
||||
* @returns {Promise<object>}
|
||||
*/
|
||||
async ext_corpus_paginate_corpus(corpus, page, per_page) {
|
||||
const fn_name = 'ext_corpus_paginate_corpus';
|
||||
const fn_args = {corpus: corpus}
|
||||
if (page !== undefined) {fn_args.page = page;}
|
||||
if (per_page !== undefined) {fn_args.per_page = per_page;}
|
||||
return await this.#request(fn_name, fn_args);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} subcorpus
|
||||
* @param {number=} context
|
||||
* @param {number=} page
|
||||
* @param {number=} per_page
|
||||
* @returns {Promise<object>}
|
||||
*/
|
||||
async ext_cqp_paginate_subcorpus(subcorpus, context, page, per_page) {
|
||||
const fn_name = 'ext_cqp_paginate_subcorpus';
|
||||
const fn_args = {subcorpus: subcorpus}
|
||||
if (context !== undefined) {fn_args.context = context;}
|
||||
if (page !== undefined) {fn_args.page = page;}
|
||||
if (per_page !== undefined) {fn_args.per_page = per_page;}
|
||||
return await this.#request(fn_name, fn_args);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} subcorpus
|
||||
* @param {number[]} match_id_list
|
||||
* @param {number=} context
|
||||
* @returns {Promise<object>}
|
||||
*/
|
||||
async ext_cqp_partial_export_subcorpus(subcorpus, match_id_list, context) {
|
||||
const fn_name = 'ext_cqp_partial_export_subcorpus';
|
||||
const fn_args = {subcorpus: subcorpus, match_id_list: match_id_list};
|
||||
if (context !== undefined) {fn_args.context = context;}
|
||||
return await this.#request(fn_name, fn_args);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} subcorpus
|
||||
* @param {number=} context
|
||||
* @returns {Promise<object>}
|
||||
*/
|
||||
async ext_cqp_export_subcorpus(subcorpus, context) {
|
||||
const fn_name = 'ext_cqp_export_subcorpus';
|
||||
const fn_args = {subcorpus: subcorpus};
|
||||
if (context !== undefined) {fn_args.context = context;}
|
||||
return await this.#request(fn_name, fn_args);
|
||||
}
|
||||
};
|
1
app/static/js/corpus-analysis/cqi/api/index.js
Normal file
@ -0,0 +1 @@
|
||||
nopaque.corpus_analysis.cqi.api = {};
|
57
app/static/js/corpus-analysis/cqi/client.js
Normal file
@ -0,0 +1,57 @@
|
||||
nopaque.corpus_analysis.cqi.Client = class Client {
|
||||
/**
|
||||
* @param {string} host
|
||||
* @param {number} [timeout=60] timeout
|
||||
* @param {string} [version=0.1] version
|
||||
*/
|
||||
constructor(host, timeout = 60, version = '0.1') {
|
||||
/** @type {nopaque.corpus_analysis.cqi.api.Client} */
|
||||
this.api = new nopaque.corpus_analysis.cqi.api.Client(host, timeout, version);
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {nopaque.corpus_analysis.cqi.models.corpora.CorpusCollection}
|
||||
*/
|
||||
get corpora() {
|
||||
return new nopaque.corpus_analysis.cqi.models.corpora.CorpusCollection(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusByeOk>}
|
||||
*/
|
||||
async bye() {
|
||||
return await this.api.ctrl_bye();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} username
|
||||
* @param {string} password
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusConnectOk>}
|
||||
*/
|
||||
async connect(username, password) {
|
||||
return await this.api.ctrl_connect(username, password);
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusPingOk>}
|
||||
*/
|
||||
async ping() {
|
||||
return await this.api.ctrl_ping();
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {Promise<null>}
|
||||
*/
|
||||
async userAbort() {
|
||||
return await this.api.ctrl_user_abort();
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias for "bye" method
|
||||
*
|
||||
* @returns {Promise<nopaque.corpus_analysis.cqi.status.StatusByeOk>}
|
||||
*/
|
||||
async disconnect() {
|
||||
return await this.api.ctrl_bye();
|
||||
}
|
||||
};
|