mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-07-12 15:43:16 +00:00
Compare commits
1 Commits
1.1.0
...
05bccc7f88
Author | SHA1 | Date | |
---|---|---|---|
05bccc7f88 |
@ -5,9 +5,8 @@
|
|||||||
!app
|
!app
|
||||||
!migrations
|
!migrations
|
||||||
!tests
|
!tests
|
||||||
|
!.flaskenv
|
||||||
!boot.sh
|
!boot.sh
|
||||||
!config.py
|
!config.py
|
||||||
!docker-nopaque-entrypoint.sh
|
!nopaque.py
|
||||||
!requirements.txt
|
!requirements.txt
|
||||||
!requirements.freezed.txt
|
|
||||||
!wsgi.py
|
|
||||||
|
204
.env.tpl
204
.env.tpl
@ -1,20 +1,204 @@
|
|||||||
##############################################################################
|
################################################################################
|
||||||
# Environment variables used by Docker Compose config files. #
|
# Docker #
|
||||||
##############################################################################
|
################################################################################
|
||||||
|
# DEFAULT: ./data
|
||||||
|
# NOTE: Use `.` as <project-basedir>
|
||||||
|
# HOST_DATA_DIR=
|
||||||
|
|
||||||
|
# Example: 1000
|
||||||
# HINT: Use this bash command `id -u`
|
# HINT: Use this bash command `id -u`
|
||||||
# NOTE: 0 (= root user) is not allowed
|
|
||||||
HOST_UID=
|
HOST_UID=
|
||||||
|
|
||||||
|
# Example: 1000
|
||||||
# HINT: Use this bash command `id -g`
|
# HINT: Use this bash command `id -g`
|
||||||
# NOTE: 0 (= root group) is not allowed
|
|
||||||
HOST_GID=
|
HOST_GID=
|
||||||
|
|
||||||
|
# Example: 999
|
||||||
# HINT: Use this bash command `getent group docker | cut -d: -f3`
|
# HINT: Use this bash command `getent group docker | cut -d: -f3`
|
||||||
HOST_DOCKER_GID=
|
HOST_DOCKER_GID=
|
||||||
|
|
||||||
# DEFAULT: nopaque
|
# DEFAULT: ./logs
|
||||||
NOPAQUE_DOCKER_NETWORK_NAME=nopaque
|
# NOTES: Use `.` as <project-basedir>
|
||||||
|
# HOST_LOG_DIR=
|
||||||
|
|
||||||
# NOTE: This must be a network share and it must be available on all
|
# DEFAULT: nopaque_default
|
||||||
# Docker Swarm nodes, mounted to the same path.
|
# DOCKER_NETWORK_NAME=
|
||||||
HOST_NOPAQUE_DATA_PATH=/mnt/nopaque
|
|
||||||
|
################################################################################
|
||||||
|
# Flask #
|
||||||
|
# https://flask.palletsprojects.com/en/1.1.x/config/ #
|
||||||
|
################################################################################
|
||||||
|
# CHOOSE ONE: http, https
|
||||||
|
# DEFAULT: http
|
||||||
|
# PREFERRED_URL_SCHEME=
|
||||||
|
|
||||||
|
# DEFAULT: hard to guess string
|
||||||
|
# HINT: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"`
|
||||||
|
# SECRET_KEY=
|
||||||
|
|
||||||
|
# DEFAULT: localhost:5000
|
||||||
|
# Example: nopaque.example.com/nopaque.example.com:5000
|
||||||
|
# HINT: If your instance is publicly available on a different Port then 80/443,
|
||||||
|
# you will have to add this to the server name
|
||||||
|
# SERVER_NAME=
|
||||||
|
|
||||||
|
# CHOOSE ONE: False, True
|
||||||
|
# DEFAULT: False
|
||||||
|
# HINT: Set to true if you redirect http to https
|
||||||
|
# SESSION_COOKIE_SECURE=
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# Flask-Assets #
|
||||||
|
# https://webassets.readthedocs.io/en/latest/ #
|
||||||
|
################################################################################
|
||||||
|
# CHOOSE ONE: False, True
|
||||||
|
# DEFAULT: False
|
||||||
|
# ASSETS_DEBUG=
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# Flask-Hashids #
|
||||||
|
# https://github.com/Pevtrick/Flask-Hashids #
|
||||||
|
################################################################################
|
||||||
|
# DEFAULT: 16
|
||||||
|
# HASHIDS_MIN_LENGTH=
|
||||||
|
|
||||||
|
# NOTE: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"`
|
||||||
|
# It is strongly recommended that this is NEVER the same as the SECRET_KEY
|
||||||
|
HASHIDS_SALT=
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# Flask-Login #
|
||||||
|
# https://flask-login.readthedocs.io/en/latest/ #
|
||||||
|
################################################################################
|
||||||
|
# CHOOSE ONE: False, True
|
||||||
|
# DEFAULT: False
|
||||||
|
# HINT: Set to true if you redirect http to https
|
||||||
|
# REMEMBER_COOKIE_SECURE=
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# Flask-Mail #
|
||||||
|
# https://pythonhosted.org/Flask-Mail/ #
|
||||||
|
################################################################################
|
||||||
|
# EXAMPLE: nopaque Admin <nopaque@example.com>
|
||||||
|
MAIL_DEFAULT_SENDER=
|
||||||
|
|
||||||
|
MAIL_PASSWORD=
|
||||||
|
|
||||||
|
# EXAMPLE: smtp.example.com
|
||||||
|
MAIL_SERVER=
|
||||||
|
|
||||||
|
# EXAMPLE: 587
|
||||||
|
MAIL_PORT=
|
||||||
|
|
||||||
|
# CHOOSE ONE: False, True
|
||||||
|
# DEFAULT: False
|
||||||
|
# MAIL_USE_SSL=
|
||||||
|
|
||||||
|
# CHOOSE ONE: False, True
|
||||||
|
# DEFAULT: False
|
||||||
|
# MAIL_USE_TLS=
|
||||||
|
|
||||||
|
# EXAMPLE: nopaque@example.com
|
||||||
|
MAIL_USERNAME=
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# Flask-SQLAlchemy #
|
||||||
|
# https://flask-sqlalchemy.palletsprojects.com/en/2.x/config/ #
|
||||||
|
################################################################################
|
||||||
|
# DEFAULT: 'sqlite:///<nopaque-basedir>/data.sqlite'
|
||||||
|
# NOTE: Use `.` as <nopaque-basedir>,
|
||||||
|
# Don't use a SQLite database when using Docker
|
||||||
|
# SQLALCHEMY_DATABASE_URI=
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# nopaque #
|
||||||
|
################################################################################
|
||||||
|
# An account is registered with this email adress gets automatically assigned
|
||||||
|
# the administrator role.
|
||||||
|
# EXAMPLE: admin.nopaque@example.com
|
||||||
|
NOPAQUE_ADMIN=
|
||||||
|
|
||||||
|
# DEFAULT: /mnt/nopaque
|
||||||
|
# NOTE: This must be a network share and it must be available on all Docker
|
||||||
|
# Swarm nodes
|
||||||
|
# NOPAQUE_DATA_DIR=
|
||||||
|
|
||||||
|
# CHOOSE ONE: False, True
|
||||||
|
# DEFAULT: True
|
||||||
|
# NOPAQUE_IS_PRIMARY_INSTANCE=
|
||||||
|
|
||||||
|
# transport://[userid:password]@hostname[:port]/[virtual_host]
|
||||||
|
NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI=
|
||||||
|
|
||||||
|
# NOTE: Get these from the nopaque development team
|
||||||
|
NOPAQUE_DOCKER_REGISTRY_USERNAME=
|
||||||
|
NOPAQUE_DOCKER_REGISTRY_PASSWORD=
|
||||||
|
|
||||||
|
# DEFAULT: %Y-%m-%d %H:%M:%S
|
||||||
|
# NOPAQUE_LOG_DATE_FORMAT=
|
||||||
|
|
||||||
|
# DEFAULT: [%(asctime)s] %(levelname)s in %(pathname)s (function: %(funcName)s, line: %(lineno)d): %(message)s
|
||||||
|
# NOPAQUE_LOG_FORMAT=
|
||||||
|
|
||||||
|
# DEFAULT: INFO
|
||||||
|
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
|
||||||
|
# NOPAQUE_LOG_LEVEL=
|
||||||
|
|
||||||
|
# CHOOSE ONE: False, True
|
||||||
|
# DEFAULT: True
|
||||||
|
# NOPAQUE_LOG_FILE_ENABLED=
|
||||||
|
|
||||||
|
# DEFAULT: <nopaque-basedir>/logs
|
||||||
|
# NOTE: Use `.` as <nopaque-basedir>
|
||||||
|
# NOPAQUE_LOG_FILE_DIR=
|
||||||
|
|
||||||
|
# DEFAULT: NOPAQUE_LOG_LEVEL
|
||||||
|
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
|
||||||
|
# NOPAQUE_LOG_FILE_LEVEL=
|
||||||
|
|
||||||
|
# CHOOSE ONE: False, True
|
||||||
|
# DEFAULT: False
|
||||||
|
# NOPAQUE_LOG_STDERR_ENABLED=
|
||||||
|
|
||||||
|
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
|
||||||
|
# DEFAULT: NOPAQUE_LOG_LEVEL
|
||||||
|
# NOPAQUE_LOG_STDERR_LEVEL=
|
||||||
|
|
||||||
|
# CHOOSE ONE: False, True
|
||||||
|
# DEFAULT: False
|
||||||
|
# HINT: Set this to True only if you are using a proxy in front of nopaque
|
||||||
|
# NOPAQUE_PROXY_FIX_ENABLED=
|
||||||
|
|
||||||
|
# DEFAULT: 0
|
||||||
|
# Number of values to trust for X-Forwarded-For
|
||||||
|
# NOPAQUE_PROXY_FIX_X_FOR=
|
||||||
|
|
||||||
|
# DEFAULT: 0
|
||||||
|
# Number of values to trust for X-Forwarded-Host
|
||||||
|
# NOPAQUE_PROXY_FIX_X_HOST=
|
||||||
|
|
||||||
|
# DEFAULT: 0
|
||||||
|
# Number of values to trust for X-Forwarded-Port
|
||||||
|
# NOPAQUE_PROXY_FIX_X_PORT=
|
||||||
|
|
||||||
|
# DEFAULT: 0
|
||||||
|
# Number of values to trust for X-Forwarded-Prefix
|
||||||
|
# NOPAQUE_PROXY_FIX_X_PREFIX=
|
||||||
|
|
||||||
|
# DEFAULT: 0
|
||||||
|
# Number of values to trust for X-Forwarded-Proto
|
||||||
|
# NOPAQUE_PROXY_FIX_X_PROTO=
|
||||||
|
|
||||||
|
# CHOOSE ONE: False, True
|
||||||
|
# DEFAULT: False
|
||||||
|
# NOPAQUE_TRANSKRIBUS_ENABLED=
|
||||||
|
|
||||||
|
# READ-COOP account data: https://readcoop.eu/
|
||||||
|
# NOPAQUE_READCOOP_USERNAME=
|
||||||
|
# NOPAQUE_READCOOP_PASSWORD=
|
||||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,7 +1,9 @@
|
|||||||
# nopaque specifics
|
# nopaque specifics
|
||||||
app/static/gen/
|
app/static/gen/
|
||||||
volumes/
|
data/
|
||||||
docker-compose.override.yml
|
docker-compose.override.yml
|
||||||
|
logs/
|
||||||
|
!logs/dummy
|
||||||
*.env
|
*.env
|
||||||
|
|
||||||
*.pjentsch-testing
|
*.pjentsch-testing
|
||||||
|
@ -1,84 +0,0 @@
|
|||||||
include:
|
|
||||||
- template: Security/Container-Scanning.gitlab-ci.yml
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Pipeline stages in order of execution #
|
|
||||||
##############################################################################
|
|
||||||
stages:
|
|
||||||
- build
|
|
||||||
- publish
|
|
||||||
- sca
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Pipeline behavior #
|
|
||||||
##############################################################################
|
|
||||||
workflow:
|
|
||||||
rules:
|
|
||||||
# Run the pipeline on commits to the default branch
|
|
||||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
|
||||||
variables:
|
|
||||||
# Set the Docker image tag to `latest`
|
|
||||||
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:latest
|
|
||||||
when: always
|
|
||||||
# Run the pipeline on tag creation
|
|
||||||
- if: $CI_COMMIT_TAG
|
|
||||||
variables:
|
|
||||||
# Set the Docker image tag to the Git tag name
|
|
||||||
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
|
|
||||||
when: always
|
|
||||||
# Don't run the pipeline on all other occasions
|
|
||||||
- when: never
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Default values for pipeline jobs #
|
|
||||||
##############################################################################
|
|
||||||
default:
|
|
||||||
image: docker:24.0.6
|
|
||||||
services:
|
|
||||||
- docker:24.0.6-dind
|
|
||||||
tags:
|
|
||||||
- docker
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# CI/CD variables for all jobs in the pipeline #
|
|
||||||
##############################################################################
|
|
||||||
variables:
|
|
||||||
DOCKER_TLS_CERTDIR: /certs
|
|
||||||
DOCKER_BUILD_PATH: .
|
|
||||||
DOCKERFILE: Dockerfile
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Pipeline jobs #
|
|
||||||
##############################################################################
|
|
||||||
build:
|
|
||||||
stage: build
|
|
||||||
script:
|
|
||||||
- docker build --tag $DOCKER_IMAGE --file $DOCKERFILE $DOCKER_BUILD_PATH
|
|
||||||
- docker save $DOCKER_IMAGE > docker_image.tar
|
|
||||||
artifacts:
|
|
||||||
paths:
|
|
||||||
- docker_image.tar
|
|
||||||
|
|
||||||
publish:
|
|
||||||
stage: publish
|
|
||||||
before_script:
|
|
||||||
- docker login --username gitlab-ci-token --password $CI_JOB_TOKEN $CI_REGISTRY
|
|
||||||
script:
|
|
||||||
- docker load --input docker_image.tar
|
|
||||||
- docker push $DOCKER_IMAGE
|
|
||||||
after_script:
|
|
||||||
- docker logout $CI_REGISTRY
|
|
||||||
|
|
||||||
container_scanning:
|
|
||||||
stage: sca
|
|
||||||
rules:
|
|
||||||
# Run the job on commits to the default branch
|
|
||||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
|
||||||
when: always
|
|
||||||
# Run the job on tag creation
|
|
||||||
- if: $CI_COMMIT_TAG
|
|
||||||
when: always
|
|
||||||
# Don't run the job on all other occasions
|
|
||||||
- when: never
|
|
||||||
variables:
|
|
||||||
CS_IMAGE: $DOCKER_IMAGE
|
|
5
.vscode/extensions.json
vendored
5
.vscode/extensions.json
vendored
@ -1,8 +1,7 @@
|
|||||||
{
|
{
|
||||||
"recommendations": [
|
"recommendations": [
|
||||||
"irongeek.vscode-env",
|
"samuelcolvin.jinjahtml",
|
||||||
"ms-azuretools.vscode-docker",
|
"ms-azuretools.vscode-docker",
|
||||||
"ms-python.python",
|
"ms-python.python"
|
||||||
"samuelcolvin.jinjahtml"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
21
.vscode/settings.json
vendored
21
.vscode/settings.json
vendored
@ -1,17 +1,13 @@
|
|||||||
{
|
{
|
||||||
"editor.rulers": [79],
|
"editor.rulers": [79],
|
||||||
"editor.tabSize": 4,
|
|
||||||
"emmet.includeLanguages": {
|
|
||||||
"jinja-html": "html"
|
|
||||||
},
|
|
||||||
"files.associations": {
|
|
||||||
".flaskenv": "env",
|
|
||||||
"*.env.tpl": "env",
|
|
||||||
"*.txt.j2": "jinja"
|
|
||||||
},
|
|
||||||
"files.insertFinalNewline": true,
|
"files.insertFinalNewline": true,
|
||||||
"files.trimFinalNewlines": true,
|
"python.terminal.activateEnvironment": false,
|
||||||
"files.trimTrailingWhitespace": true,
|
"[css]": {
|
||||||
|
"editor.tabSize": 2
|
||||||
|
},
|
||||||
|
"[scss]": {
|
||||||
|
"editor.tabSize": 2
|
||||||
|
},
|
||||||
"[html]": {
|
"[html]": {
|
||||||
"editor.tabSize": 2
|
"editor.tabSize": 2
|
||||||
},
|
},
|
||||||
@ -20,5 +16,8 @@
|
|||||||
},
|
},
|
||||||
"[jinja-html]": {
|
"[jinja-html]": {
|
||||||
"editor.tabSize": 2
|
"editor.tabSize": 2
|
||||||
|
},
|
||||||
|
"[jinja-js]": {
|
||||||
|
"editor.tabSize": 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
41
Dockerfile
41
Dockerfile
@ -1,57 +1,50 @@
|
|||||||
FROM python:3.10.13-slim-bookworm
|
FROM python:3.8.10-slim-buster
|
||||||
|
|
||||||
|
|
||||||
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
|
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
|
||||||
|
|
||||||
|
|
||||||
# Set environment variables
|
ARG DOCKER_GID
|
||||||
|
ARG UID
|
||||||
|
ARG GID
|
||||||
|
|
||||||
|
|
||||||
ENV LANG="C.UTF-8"
|
ENV LANG="C.UTF-8"
|
||||||
ENV PYTHONDONTWRITEBYTECODE="1"
|
ENV PYTHONDONTWRITEBYTECODE="1"
|
||||||
ENV PYTHONUNBUFFERED="1"
|
ENV PYTHONUNBUFFERED="1"
|
||||||
|
|
||||||
|
|
||||||
# Install system dependencies
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install --no-install-recommends --yes \
|
&& apt-get install --no-install-recommends --yes \
|
||||||
build-essential \
|
build-essential \
|
||||||
gosu \
|
|
||||||
libpq-dev \
|
libpq-dev \
|
||||||
&& rm --recursive /var/lib/apt/lists/*
|
&& rm --recursive /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
||||||
# Create a non-root user
|
RUN groupadd --gid "${DOCKER_GID}" docker \
|
||||||
RUN useradd --create-home --no-log-init nopaque \
|
&& groupadd --gid "${GID}" nopaque \
|
||||||
&& groupadd docker \
|
&& useradd --create-home --gid nopaque --groups "${DOCKER_GID}" --no-log-init --uid "${UID}" nopaque
|
||||||
&& usermod --append --groups docker nopaque
|
|
||||||
|
|
||||||
USER nopaque
|
USER nopaque
|
||||||
WORKDIR /home/nopaque
|
WORKDIR /home/nopaque
|
||||||
|
|
||||||
|
|
||||||
# Create a Python virtual environment
|
ENV PYTHON3_VENV_PATH="/home/nopaque/venv"
|
||||||
ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv"
|
RUN python3 -m venv "${PYTHON3_VENV_PATH}"
|
||||||
RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}"
|
ENV PATH="${PYTHON3_VENV_PATH}/bin:${PATH}"
|
||||||
ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}"
|
|
||||||
|
|
||||||
|
|
||||||
# Install Python dependencies
|
COPY --chown=nopaque:nopaque requirements.txt .
|
||||||
COPY --chown=nopaque:nopaque requirements.freezed.txt requirements.freezed.txt
|
RUN python3 -m pip install --requirement requirements.txt \
|
||||||
RUN python3 -m pip install --requirement requirements.freezed.txt \
|
&& rm requirements.txt
|
||||||
&& rm requirements.freezed.txt
|
|
||||||
|
|
||||||
|
|
||||||
# Install the application
|
|
||||||
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
|
|
||||||
COPY --chown=nopaque:nopaque app app
|
COPY --chown=nopaque:nopaque app app
|
||||||
COPY --chown=nopaque:nopaque migrations migrations
|
COPY --chown=nopaque:nopaque migrations migrations
|
||||||
COPY --chown=nopaque:nopaque tests tests
|
COPY --chown=nopaque:nopaque tests tests
|
||||||
COPY --chown=nopaque:nopaque boot.sh config.py wsgi.py ./
|
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py ./
|
||||||
|
|
||||||
|
|
||||||
EXPOSE 5000
|
EXPOSE 5000
|
||||||
|
|
||||||
|
|
||||||
USER root
|
ENTRYPOINT ["./boot.sh"]
|
||||||
|
|
||||||
|
|
||||||
ENTRYPOINT ["docker-nopaque-entrypoint.sh"]
|
|
||||||
|
@ -1,8 +1,5 @@
|
|||||||
# nopaque
|
# nopaque
|
||||||
|
|
||||||

|
|
||||||

|
|
||||||
|
|
||||||
nopaque bundles various tools and services that provide humanities scholars with DH methods and thus can support their various individual research processes. Using nopaque, researchers can subject digitized sources to Optical Character Recognition (OCR). The resulting text files can then be used as a data basis for Natural Language Processing (NLP). The texts are automatically subjected to various linguistic annotations. The data processed via NLP can then be summarized in the web application as corpora and analyzed by means of an information retrieval system through complex search queries. The range of functions of the web application will be successively extended according to the needs of the researchers.
|
nopaque bundles various tools and services that provide humanities scholars with DH methods and thus can support their various individual research processes. Using nopaque, researchers can subject digitized sources to Optical Character Recognition (OCR). The resulting text files can then be used as a data basis for Natural Language Processing (NLP). The texts are automatically subjected to various linguistic annotations. The data processed via NLP can then be summarized in the web application as corpora and analyzed by means of an information retrieval system through complex search queries. The range of functions of the web application will be successively extended according to the needs of the researchers.
|
||||||
|
|
||||||
## Prerequisites and requirements
|
## Prerequisites and requirements
|
||||||
@ -35,7 +32,7 @@ username@hostname:~$ sudo mount --types cifs --options gid=${USER},password=nopa
|
|||||||
# Clone the nopaque repository
|
# Clone the nopaque repository
|
||||||
username@hostname:~$ git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
|
username@hostname:~$ git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
|
||||||
# Create data directories
|
# Create data directories
|
||||||
username@hostname:~$ mkdir -p volumes/{db,mq}
|
username@hostname:~$ mkdir data/{db,logs,mq}
|
||||||
username@hostname:~$ cp db.env.tpl db.env
|
username@hostname:~$ cp db.env.tpl db.env
|
||||||
username@hostname:~$ cp .env.tpl .env
|
username@hostname:~$ cp .env.tpl .env
|
||||||
# Fill out the variables within these files.
|
# Fill out the variables within these files.
|
||||||
|
@ -9,8 +9,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Amharic'
|
# - title: 'Amharic'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
|
||||||
@ -22,8 +20,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Arabic'
|
- title: 'Arabic'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
|
||||||
@ -35,8 +31,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Assamese'
|
# - title: 'Assamese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
|
||||||
@ -48,8 +42,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Azerbaijani'
|
# - title: 'Azerbaijani'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
|
||||||
@ -61,8 +53,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Azerbaijani - Cyrillic'
|
# - title: 'Azerbaijani - Cyrillic'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
|
||||||
@ -74,8 +64,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Belarusian'
|
# - title: 'Belarusian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
|
||||||
@ -87,8 +75,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Bengali'
|
# - title: 'Bengali'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
|
||||||
@ -100,8 +86,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Tibetan'
|
# - title: 'Tibetan'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
|
||||||
@ -113,8 +97,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Bosnian'
|
# - title: 'Bosnian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
|
||||||
@ -126,8 +108,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Bulgarian'
|
# - title: 'Bulgarian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
|
||||||
@ -139,8 +119,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Catalan; Valencian'
|
# - title: 'Catalan; Valencian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
|
||||||
@ -152,8 +130,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Cebuano'
|
# - title: 'Cebuano'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
|
||||||
@ -165,8 +141,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Czech'
|
# - title: 'Czech'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
|
||||||
@ -178,8 +152,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Chinese - Simplified'
|
# - title: 'Chinese - Simplified'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
|
||||||
@ -191,8 +163,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Chinese - Traditional'
|
- title: 'Chinese - Traditional'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
|
||||||
@ -204,8 +174,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Cherokee'
|
# - title: 'Cherokee'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
|
||||||
@ -217,8 +185,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Welsh'
|
# - title: 'Welsh'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
|
||||||
@ -230,8 +196,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Danish'
|
- title: 'Danish'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
|
||||||
@ -243,8 +207,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'German'
|
- title: 'German'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
|
||||||
@ -256,8 +218,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Dzongkha'
|
# - title: 'Dzongkha'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
|
||||||
@ -269,8 +229,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Greek, Modern (1453-)'
|
- title: 'Greek, Modern (1453-)'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
|
||||||
@ -282,8 +240,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'English'
|
- title: 'English'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
|
||||||
@ -295,8 +251,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'English, Middle (1100-1500)'
|
- title: 'English, Middle (1100-1500)'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
|
||||||
@ -308,8 +262,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Esperanto'
|
# - title: 'Esperanto'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
|
||||||
@ -321,8 +273,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Estonian'
|
# - title: 'Estonian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
|
||||||
@ -334,8 +284,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Basque'
|
# - title: 'Basque'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
|
||||||
@ -347,8 +295,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Persian'
|
# - title: 'Persian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
|
||||||
@ -360,8 +306,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Finnish'
|
# - title: 'Finnish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
|
||||||
@ -373,8 +317,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'French'
|
- title: 'French'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
|
||||||
@ -386,8 +328,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'German Fraktur'
|
- title: 'German Fraktur'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
|
||||||
@ -399,8 +339,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'French, Middle (ca. 1400-1600)'
|
- title: 'French, Middle (ca. 1400-1600)'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
|
||||||
@ -412,8 +350,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Irish'
|
# - title: 'Irish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
|
||||||
@ -425,8 +361,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Galician'
|
# - title: 'Galician'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
|
||||||
@ -438,8 +372,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Greek, Ancient (-1453)'
|
- title: 'Greek, Ancient (-1453)'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
|
||||||
@ -451,8 +383,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Gujarati'
|
# - title: 'Gujarati'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
|
||||||
@ -464,8 +394,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Haitian; Haitian Creole'
|
# - title: 'Haitian; Haitian Creole'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
|
||||||
@ -477,8 +405,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Hebrew'
|
# - title: 'Hebrew'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
|
||||||
@ -490,8 +416,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Hindi'
|
# - title: 'Hindi'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
|
||||||
@ -503,8 +427,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Croatian'
|
# - title: 'Croatian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
|
||||||
@ -516,8 +438,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Hungarian'
|
# - title: 'Hungarian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
|
||||||
@ -529,8 +449,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Inuktitut'
|
# - title: 'Inuktitut'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
|
||||||
@ -542,8 +460,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Indonesian'
|
# - title: 'Indonesian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
|
||||||
@ -555,8 +471,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Icelandic'
|
# - title: 'Icelandic'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
|
||||||
@ -568,8 +482,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Italian'
|
- title: 'Italian'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
|
||||||
@ -581,8 +493,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'Italian - Old'
|
- title: 'Italian - Old'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
|
||||||
@ -594,8 +504,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Javanese'
|
# - title: 'Javanese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
|
||||||
@ -607,8 +515,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Japanese'
|
# - title: 'Japanese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
|
||||||
@ -620,8 +526,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Kannada'
|
# - title: 'Kannada'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
|
||||||
@ -633,8 +537,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Georgian'
|
# - title: 'Georgian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
|
||||||
@ -646,8 +548,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Georgian - Old'
|
# - title: 'Georgian - Old'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
|
||||||
@ -659,8 +559,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Kazakh'
|
# - title: 'Kazakh'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
|
||||||
@ -672,8 +570,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Central Khmer'
|
# - title: 'Central Khmer'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
|
||||||
@ -685,8 +581,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Kirghiz; Kyrgyz'
|
# - title: 'Kirghiz; Kyrgyz'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
|
||||||
@ -698,8 +592,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Korean'
|
# - title: 'Korean'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
|
||||||
@ -711,8 +603,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Kurdish'
|
# - title: 'Kurdish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
|
||||||
@ -724,8 +614,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Lao'
|
# - title: 'Lao'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
|
||||||
@ -737,8 +625,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Latin'
|
# - title: 'Latin'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
|
||||||
@ -750,8 +636,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Latvian'
|
# - title: 'Latvian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
|
||||||
@ -763,8 +647,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Lithuanian'
|
# - title: 'Lithuanian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
|
||||||
@ -776,8 +658,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Malayalam'
|
# - title: 'Malayalam'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
|
||||||
@ -789,8 +669,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Marathi'
|
# - title: 'Marathi'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
|
||||||
@ -802,8 +680,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Macedonian'
|
# - title: 'Macedonian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
|
||||||
@ -815,8 +691,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Maltese'
|
# - title: 'Maltese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
|
||||||
@ -828,8 +702,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Malay'
|
# - title: 'Malay'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
|
||||||
@ -841,8 +713,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Burmese'
|
# - title: 'Burmese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
|
||||||
@ -854,8 +724,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Nepali'
|
# - title: 'Nepali'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
|
||||||
@ -867,8 +735,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Dutch; Flemish'
|
# - title: 'Dutch; Flemish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
|
||||||
@ -880,8 +746,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Norwegian'
|
# - title: 'Norwegian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
|
||||||
@ -893,8 +757,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Oriya'
|
# - title: 'Oriya'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
|
||||||
@ -906,8 +768,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Panjabi; Punjabi'
|
# - title: 'Panjabi; Punjabi'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
|
||||||
@ -919,8 +779,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Polish'
|
# - title: 'Polish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
|
||||||
@ -932,8 +790,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Portuguese'
|
- title: 'Portuguese'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
|
||||||
@ -945,8 +801,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Pushto; Pashto'
|
# - title: 'Pushto; Pashto'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
|
||||||
@ -958,8 +812,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Romanian; Moldavian; Moldovan'
|
# - title: 'Romanian; Moldavian; Moldovan'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
|
||||||
@ -971,8 +823,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Russian'
|
- title: 'Russian'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
|
||||||
@ -984,8 +834,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Sanskrit'
|
# - title: 'Sanskrit'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
|
||||||
@ -997,8 +845,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Sinhala; Sinhalese'
|
# - title: 'Sinhala; Sinhalese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
|
||||||
@ -1010,8 +856,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Slovak'
|
# - title: 'Slovak'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
|
||||||
@ -1023,8 +867,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Slovenian'
|
# - title: 'Slovenian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
|
||||||
@ -1036,8 +878,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Spanish; Castilian'
|
- title: 'Spanish; Castilian'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
|
||||||
@ -1049,8 +889,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'Spanish; Castilian - Old'
|
- title: 'Spanish; Castilian - Old'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
|
||||||
@ -1062,8 +900,6 @@
|
|||||||
compatible_service_versions:
|
compatible_service_versions:
|
||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Albanian'
|
# - title: 'Albanian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
|
||||||
@ -1075,8 +911,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Serbian'
|
# - title: 'Serbian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
|
||||||
@ -1088,8 +922,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Serbian - Latin'
|
# - title: 'Serbian - Latin'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
|
||||||
@ -1101,8 +933,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Swahili'
|
# - title: 'Swahili'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
|
||||||
@ -1114,8 +944,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Swedish'
|
# - title: 'Swedish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
|
||||||
@ -1127,8 +955,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Syriac'
|
# - title: 'Syriac'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
|
||||||
@ -1140,8 +966,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Tamil'
|
# - title: 'Tamil'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
|
||||||
@ -1153,8 +977,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Telugu'
|
# - title: 'Telugu'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
|
||||||
@ -1166,8 +988,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Tajik'
|
# - title: 'Tajik'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
|
||||||
@ -1179,8 +999,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Tagalog'
|
# - title: 'Tagalog'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
|
||||||
@ -1192,8 +1010,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Thai'
|
# - title: 'Thai'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
|
||||||
@ -1205,8 +1021,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Tigrinya'
|
# - title: 'Tigrinya'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
|
||||||
@ -1218,8 +1032,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Turkish'
|
# - title: 'Turkish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
|
||||||
@ -1231,8 +1043,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Uighur; Uyghur'
|
# - title: 'Uighur; Uyghur'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
|
||||||
@ -1244,8 +1054,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Ukrainian'
|
# - title: 'Ukrainian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
|
||||||
@ -1257,8 +1065,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Urdu'
|
# - title: 'Urdu'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
|
||||||
@ -1270,8 +1076,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Uzbek'
|
# - title: 'Uzbek'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
|
||||||
@ -1283,8 +1087,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Uzbek - Cyrillic'
|
# - title: 'Uzbek - Cyrillic'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
|
||||||
@ -1296,8 +1098,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Vietnamese'
|
# - title: 'Vietnamese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
|
||||||
@ -1309,8 +1109,6 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Yiddish'
|
# - title: 'Yiddish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
|
||||||
@ -1322,5 +1120,3 @@
|
|||||||
# compatible_service_versions:
|
# compatible_service_versions:
|
||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
|
||||||
# - '0.1.3b'
|
|
116
app/__init__.py
116
app/__init__.py
@ -2,9 +2,9 @@ from apifairy import APIFairy
|
|||||||
from config import Config
|
from config import Config
|
||||||
from docker import DockerClient
|
from docker import DockerClient
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
from flask.logging import default_handler
|
|
||||||
from flask_apscheduler import APScheduler
|
from flask_apscheduler import APScheduler
|
||||||
from flask_assets import Environment
|
from flask_assets import Environment
|
||||||
|
from flask_breadcrumbs import Breadcrumbs, default_breadcrumb_root
|
||||||
from flask_login import LoginManager
|
from flask_login import LoginManager
|
||||||
from flask_mail import Mail
|
from flask_mail import Mail
|
||||||
from flask_marshmallow import Marshmallow
|
from flask_marshmallow import Marshmallow
|
||||||
@ -13,142 +13,96 @@ from flask_paranoid import Paranoid
|
|||||||
from flask_socketio import SocketIO
|
from flask_socketio import SocketIO
|
||||||
from flask_sqlalchemy import SQLAlchemy
|
from flask_sqlalchemy import SQLAlchemy
|
||||||
from flask_hashids import Hashids
|
from flask_hashids import Hashids
|
||||||
from logging import Formatter, StreamHandler
|
from werkzeug.exceptions import HTTPException
|
||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
|
||||||
|
|
||||||
|
|
||||||
docker_client = DockerClient.from_env()
|
|
||||||
|
|
||||||
apifairy = APIFairy()
|
apifairy = APIFairy()
|
||||||
assets = Environment()
|
assets = Environment()
|
||||||
|
breadcrumbs = Breadcrumbs()
|
||||||
db = SQLAlchemy()
|
db = SQLAlchemy()
|
||||||
|
docker_client = DockerClient()
|
||||||
hashids = Hashids()
|
hashids = Hashids()
|
||||||
login = LoginManager()
|
login = LoginManager()
|
||||||
|
login.login_view = 'auth.login'
|
||||||
|
login.login_message = 'Please log in to access this page.'
|
||||||
ma = Marshmallow()
|
ma = Marshmallow()
|
||||||
mail = Mail()
|
mail = Mail()
|
||||||
migrate = Migrate(compare_type=True)
|
migrate = Migrate(compare_type=True)
|
||||||
paranoid = Paranoid()
|
paranoid = Paranoid()
|
||||||
|
paranoid.redirect_view = '/'
|
||||||
scheduler = APScheduler()
|
scheduler = APScheduler()
|
||||||
socketio = SocketIO()
|
socketio = SocketIO()
|
||||||
|
|
||||||
|
|
||||||
def create_app(config: Config = Config) -> Flask:
|
def create_app(config: Config = Config) -> Flask:
|
||||||
''' Creates an initialized Flask object. '''
|
''' Creates an initialized Flask (WSGI Application) object. '''
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
app.config.from_object(config)
|
app.config.from_object(config)
|
||||||
|
config.init_app(app)
|
||||||
# region Logging
|
|
||||||
log_formatter = Formatter(
|
|
||||||
fmt=app.config['NOPAQUE_LOG_FORMAT'],
|
|
||||||
datefmt=app.config['NOPAQUE_LOG_DATE_FORMAT']
|
|
||||||
)
|
|
||||||
|
|
||||||
log_handler = StreamHandler()
|
|
||||||
log_handler.setFormatter(log_formatter)
|
|
||||||
log_handler.setLevel(app.config['NOPAQUE_LOG_LEVEL'])
|
|
||||||
|
|
||||||
app.logger.setLevel('DEBUG')
|
|
||||||
app.logger.removeHandler(default_handler)
|
|
||||||
app.logger.addHandler(log_handler)
|
|
||||||
# endregion Logging
|
|
||||||
|
|
||||||
# region Middlewares
|
|
||||||
if app.config['NOPAQUE_PROXY_FIX_ENABLED']:
|
|
||||||
app.wsgi_app = ProxyFix(
|
|
||||||
app.wsgi_app,
|
|
||||||
x_for=app.config['NOPAQUE_PROXY_FIX_X_FOR'],
|
|
||||||
x_host=app.config['NOPAQUE_PROXY_FIX_X_HOST'],
|
|
||||||
x_port=app.config['NOPAQUE_PROXY_FIX_X_PORT'],
|
|
||||||
x_prefix=app.config['NOPAQUE_PROXY_FIX_X_PREFIX'],
|
|
||||||
x_proto=app.config['NOPAQUE_PROXY_FIX_X_PROTO']
|
|
||||||
)
|
|
||||||
# endregion Middlewares
|
|
||||||
|
|
||||||
# region Extensions
|
|
||||||
docker_client.login(
|
docker_client.login(
|
||||||
app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
|
app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
|
||||||
password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
|
password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
|
||||||
registry=app.config['NOPAQUE_DOCKER_REGISTRY']
|
registry=app.config['NOPAQUE_DOCKER_REGISTRY']
|
||||||
)
|
)
|
||||||
|
|
||||||
from .models import AnonymousUser, User
|
|
||||||
|
|
||||||
apifairy.init_app(app)
|
apifairy.init_app(app)
|
||||||
assets.init_app(app)
|
assets.init_app(app)
|
||||||
|
breadcrumbs.init_app(app)
|
||||||
db.init_app(app)
|
db.init_app(app)
|
||||||
hashids.init_app(app)
|
hashids.init_app(app)
|
||||||
login.init_app(app)
|
login.init_app(app)
|
||||||
login.anonymous_user = AnonymousUser
|
|
||||||
login.login_view = 'auth.login'
|
|
||||||
login.user_loader(lambda user_id: User.query.get(int(user_id)))
|
|
||||||
ma.init_app(app)
|
ma.init_app(app)
|
||||||
mail.init_app(app)
|
mail.init_app(app)
|
||||||
migrate.init_app(app, db)
|
migrate.init_app(app, db)
|
||||||
paranoid.init_app(app)
|
paranoid.init_app(app)
|
||||||
paranoid.redirect_view = '/'
|
|
||||||
scheduler.init_app(app)
|
scheduler.init_app(app)
|
||||||
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI'])
|
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
|
||||||
# endregion Extensions
|
|
||||||
|
|
||||||
# region Blueprints
|
from .admin import bp as admin_blueprint
|
||||||
from .blueprints.admin import bp as admin_blueprint
|
default_breadcrumb_root(admin_blueprint, '.admin')
|
||||||
app.register_blueprint(admin_blueprint, url_prefix='/admin')
|
app.register_blueprint(admin_blueprint, url_prefix='/admin')
|
||||||
|
|
||||||
from .blueprints.api import bp as api_blueprint
|
from .api import bp as api_blueprint
|
||||||
app.register_blueprint(api_blueprint, url_prefix='/api')
|
app.register_blueprint(api_blueprint, url_prefix='/api')
|
||||||
|
|
||||||
from .blueprints.auth import bp as auth_blueprint
|
from .auth import bp as auth_blueprint
|
||||||
|
default_breadcrumb_root(auth_blueprint, '.')
|
||||||
app.register_blueprint(auth_blueprint)
|
app.register_blueprint(auth_blueprint)
|
||||||
|
|
||||||
from .blueprints.contributions import bp as contributions_blueprint
|
from .contributions import bp as contributions_blueprint
|
||||||
|
default_breadcrumb_root(contributions_blueprint, '.contributions')
|
||||||
app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
|
app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
|
||||||
|
|
||||||
from .blueprints.corpora import bp as corpora_blueprint
|
from .corpora import bp as corpora_blueprint
|
||||||
|
from .corpora.cqi_over_sio import CQiNamespace
|
||||||
|
default_breadcrumb_root(corpora_blueprint, '.corpora')
|
||||||
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
|
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
|
||||||
|
socketio.on_namespace(CQiNamespace('/cqi_over_sio'))
|
||||||
|
|
||||||
from .blueprints.errors import bp as errors_bp
|
from .errors import bp as errors_bp
|
||||||
app.register_blueprint(errors_bp)
|
app.register_blueprint(errors_bp)
|
||||||
|
|
||||||
from .blueprints.jobs import bp as jobs_blueprint
|
from .jobs import bp as jobs_blueprint
|
||||||
|
default_breadcrumb_root(jobs_blueprint, '.jobs')
|
||||||
app.register_blueprint(jobs_blueprint, url_prefix='/jobs')
|
app.register_blueprint(jobs_blueprint, url_prefix='/jobs')
|
||||||
|
|
||||||
from .blueprints.main import bp as main_blueprint
|
from .main import bp as main_blueprint
|
||||||
|
default_breadcrumb_root(main_blueprint, '.')
|
||||||
app.register_blueprint(main_blueprint, cli_group=None)
|
app.register_blueprint(main_blueprint, cli_group=None)
|
||||||
|
|
||||||
from .blueprints.services import bp as services_blueprint
|
from .services import bp as services_blueprint
|
||||||
|
default_breadcrumb_root(services_blueprint, '.services')
|
||||||
app.register_blueprint(services_blueprint, url_prefix='/services')
|
app.register_blueprint(services_blueprint, url_prefix='/services')
|
||||||
|
|
||||||
from .blueprints.settings import bp as settings_blueprint
|
from .settings import bp as settings_blueprint
|
||||||
|
default_breadcrumb_root(settings_blueprint, '.settings')
|
||||||
app.register_blueprint(settings_blueprint, url_prefix='/settings')
|
app.register_blueprint(settings_blueprint, url_prefix='/settings')
|
||||||
|
|
||||||
from .blueprints.users import bp as users_blueprint
|
from .users import bp as users_blueprint
|
||||||
app.register_blueprint(users_blueprint, cli_group='user', url_prefix='/users')
|
default_breadcrumb_root(users_blueprint, '.users')
|
||||||
|
app.register_blueprint(users_blueprint, url_prefix='/users')
|
||||||
|
|
||||||
from .blueprints.workshops import bp as workshops_blueprint
|
from .workshops import bp as workshops_blueprint
|
||||||
app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
|
app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
|
||||||
# endregion Blueprints
|
|
||||||
|
|
||||||
# region SocketIO Namespaces
|
|
||||||
from .namespaces.cqi_over_sio import CQiOverSocketIONamespace
|
|
||||||
socketio.on_namespace(CQiOverSocketIONamespace('/cqi_over_sio'))
|
|
||||||
|
|
||||||
from .namespaces.users import UsersNamespace
|
|
||||||
socketio.on_namespace(UsersNamespace('/users'))
|
|
||||||
# endregion SocketIO Namespaces
|
|
||||||
|
|
||||||
# region Database event Listeners
|
|
||||||
from .models.event_listeners import register_event_listeners
|
|
||||||
register_event_listeners()
|
|
||||||
# endregion Database event Listeners
|
|
||||||
|
|
||||||
# region Add scheduler jobs
|
|
||||||
if app.config['NOPAQUE_IS_PRIMARY_INSTANCE']:
|
|
||||||
from .jobs import handle_corpora
|
|
||||||
scheduler.add_job('handle_corpora', handle_corpora, seconds=3, trigger='interval')
|
|
||||||
|
|
||||||
from .jobs import handle_jobs
|
|
||||||
scheduler.add_job('handle_jobs', handle_jobs, seconds=3, trigger='interval')
|
|
||||||
# endregion Add scheduler jobs
|
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from flask import abort, request
|
from flask import abort, request
|
||||||
from app.decorators import content_negotiation
|
|
||||||
from app import db
|
from app import db
|
||||||
|
from app.decorators import content_negotiation
|
||||||
from app.models import User
|
from app.models import User
|
||||||
from . import bp
|
from . import bp
|
||||||
|
|
@ -1,7 +1,8 @@
|
|||||||
from flask import abort, flash, redirect, render_template, url_for
|
from flask import abort, flash, redirect, render_template, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from app import db, hashids
|
from app import db, hashids
|
||||||
from app.models import Avatar, Corpus, Role, User
|
from app.models import Avatar, Corpus, Role, User
|
||||||
from app.blueprints.users.settings.forms import (
|
from app.users.settings.forms import (
|
||||||
UpdateAvatarForm,
|
UpdateAvatarForm,
|
||||||
UpdatePasswordForm,
|
UpdatePasswordForm,
|
||||||
UpdateNotificationsForm,
|
UpdateNotificationsForm,
|
||||||
@ -10,9 +11,14 @@ from app.blueprints.users.settings.forms import (
|
|||||||
)
|
)
|
||||||
from . import bp
|
from . import bp
|
||||||
from .forms import UpdateUserForm
|
from .forms import UpdateUserForm
|
||||||
|
from app.users.utils import (
|
||||||
|
user_endpoint_arguments_constructor as user_eac,
|
||||||
|
user_dynamic_list_constructor as user_dlc
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('')
|
@bp.route('')
|
||||||
|
@register_breadcrumb(bp, '.', '<i class="material-icons left">admin_panel_settings</i>Administration')
|
||||||
def admin():
|
def admin():
|
||||||
return render_template(
|
return render_template(
|
||||||
'admin/admin.html.j2',
|
'admin/admin.html.j2',
|
||||||
@ -21,6 +27,7 @@ def admin():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/corpora')
|
@bp.route('/corpora')
|
||||||
|
@register_breadcrumb(bp, '.corpora', 'Corpora')
|
||||||
def corpora():
|
def corpora():
|
||||||
corpora = Corpus.query.all()
|
corpora = Corpus.query.all()
|
||||||
return render_template(
|
return render_template(
|
||||||
@ -31,6 +38,7 @@ def corpora():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/users')
|
@bp.route('/users')
|
||||||
|
@register_breadcrumb(bp, '.users', '<i class="material-icons left">group</i>Users')
|
||||||
def users():
|
def users():
|
||||||
users = User.query.all()
|
users = User.query.all()
|
||||||
return render_template(
|
return render_template(
|
||||||
@ -41,6 +49,7 @@ def users():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/users/<hashid:user_id>')
|
@bp.route('/users/<hashid:user_id>')
|
||||||
|
@register_breadcrumb(bp, '.users.entity', '', dynamic_list_constructor=user_dlc)
|
||||||
def user(user_id):
|
def user(user_id):
|
||||||
user = User.query.get_or_404(user_id)
|
user = User.query.get_or_404(user_id)
|
||||||
corpora = Corpus.query.filter(Corpus.user == user).all()
|
corpora = Corpus.query.filter(Corpus.user == user).all()
|
||||||
@ -53,6 +62,7 @@ def user(user_id):
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/users/<hashid:user_id>/settings', methods=['GET', 'POST'])
|
@bp.route('/users/<hashid:user_id>/settings', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.users.entity.settings', '<i class="material-icons left">settings</i>Settings')
|
||||||
def user_settings(user_id):
|
def user_settings(user_id):
|
||||||
user = User.query.get_or_404(user_id)
|
user = User.query.get_or_404(user_id)
|
||||||
update_account_information_form = UpdateAccountInformationForm(user)
|
update_account_information_form = UpdateAccountInformationForm(user)
|
@ -5,8 +5,8 @@ from flask import abort, Blueprint
|
|||||||
from werkzeug.exceptions import InternalServerError
|
from werkzeug.exceptions import InternalServerError
|
||||||
from app import db, hashids
|
from app import db, hashids
|
||||||
from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel
|
from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel
|
||||||
from .auth import auth_error_responses, token_auth
|
|
||||||
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema
|
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema
|
||||||
|
from .auth import auth_error_responses, token_auth
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('jobs', __name__)
|
bp = Blueprint('jobs', __name__)
|
||||||
@ -77,7 +77,7 @@ def delete_job(job_id):
|
|||||||
job = Job.query.get(job_id)
|
job = Job.query.get(job_id)
|
||||||
if job is None:
|
if job is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
if not (job.user == current_user or current_user.is_administrator):
|
if not (job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
try:
|
try:
|
||||||
job.delete()
|
job.delete()
|
||||||
@ -97,6 +97,6 @@ def get_job(job_id):
|
|||||||
job = Job.query.get(job_id)
|
job = Job.query.get(job_id)
|
||||||
if job is None:
|
if job is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
if not (job.user == current_user or current_user.is_administrator):
|
if not (job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return job
|
return job
|
@ -10,7 +10,7 @@ from app.models import (
|
|||||||
User,
|
User,
|
||||||
UserSettingJobStatusMailNotificationLevel
|
UserSettingJobStatusMailNotificationLevel
|
||||||
)
|
)
|
||||||
from app.blueprints.services import SERVICES
|
from app.services import SERVICES
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -3,11 +3,11 @@ from apifairy import authenticate, body, response
|
|||||||
from apifairy.decorators import other_responses
|
from apifairy.decorators import other_responses
|
||||||
from flask import abort, Blueprint
|
from flask import abort, Blueprint
|
||||||
from werkzeug.exceptions import InternalServerError
|
from werkzeug.exceptions import InternalServerError
|
||||||
from app.email import create_message, send
|
|
||||||
from app import db
|
from app import db
|
||||||
|
from app.email import create_message, send
|
||||||
from app.models import User
|
from app.models import User
|
||||||
from .auth import auth_error_responses, token_auth
|
|
||||||
from .schemas import EmptySchema, UserSchema
|
from .schemas import EmptySchema, UserSchema
|
||||||
|
from .auth import auth_error_responses, token_auth
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('users', __name__)
|
bp = Blueprint('users', __name__)
|
||||||
@ -60,7 +60,7 @@ def delete_user(user_id):
|
|||||||
user = User.query.get(user_id)
|
user = User.query.get(user_id)
|
||||||
if user is None:
|
if user is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
if not (user == current_user or current_user.is_administrator):
|
if not (user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
user.delete()
|
user.delete()
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
@ -78,7 +78,7 @@ def get_user(user_id):
|
|||||||
user = User.query.get(user_id)
|
user = User.query.get(user_id)
|
||||||
if user is None:
|
if user is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
if not (user == current_user or current_user.is_administrator):
|
if not (user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return user
|
return user
|
||||||
|
|
||||||
@ -94,6 +94,6 @@ def get_user_by_username(username):
|
|||||||
user = User.query.filter(User.username == username).first()
|
user = User.query.filter(User.username == username).first()
|
||||||
if user is None:
|
if user is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
if not (user == current_user or current_user.is_administrator):
|
if not (user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return user
|
return user
|
5
app/auth/__init__.py
Normal file
5
app/auth/__init__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from flask import Blueprint
|
||||||
|
|
||||||
|
|
||||||
|
bp = Blueprint('auth', __name__)
|
||||||
|
from . import routes
|
@ -60,11 +60,7 @@ class RegistrationForm(FlaskForm):
|
|||||||
|
|
||||||
def validate_username(self, field):
|
def validate_username(self, field):
|
||||||
if User.query.filter_by(username=field.data).first():
|
if User.query.filter_by(username=field.data).first():
|
||||||
raise ValidationError('Username already registered')
|
raise ValidationError('Username already in use')
|
||||||
|
|
||||||
def validate_terms_of_use_accepted(self, field):
|
|
||||||
if not field.data:
|
|
||||||
raise ValidationError('Terms of Use not accepted')
|
|
||||||
|
|
||||||
|
|
||||||
class LoginForm(FlaskForm):
|
class LoginForm(FlaskForm):
|
@ -1,4 +1,5 @@
|
|||||||
from flask import abort, flash, redirect, render_template, request, url_for
|
from flask import abort, flash, redirect, render_template, request, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from flask_login import current_user, login_user, login_required, logout_user
|
from flask_login import current_user, login_user, login_required, logout_user
|
||||||
from app import db
|
from app import db
|
||||||
from app.email import create_message, send
|
from app.email import create_message, send
|
||||||
@ -12,7 +13,24 @@ from .forms import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@bp.before_app_request
|
||||||
|
def before_request():
|
||||||
|
"""
|
||||||
|
Checks if a user is unconfirmed when visiting specific sites. Redirects to
|
||||||
|
unconfirmed view if user is unconfirmed.
|
||||||
|
"""
|
||||||
|
if current_user.is_authenticated:
|
||||||
|
current_user.ping()
|
||||||
|
db.session.commit()
|
||||||
|
if (not current_user.confirmed
|
||||||
|
and request.endpoint
|
||||||
|
and request.blueprint != 'auth'
|
||||||
|
and request.endpoint != 'static'):
|
||||||
|
return redirect(url_for('auth.unconfirmed'))
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/register', methods=['GET', 'POST'])
|
@bp.route('/register', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.register', 'Register')
|
||||||
def register():
|
def register():
|
||||||
if current_user.is_authenticated:
|
if current_user.is_authenticated:
|
||||||
return redirect(url_for('main.dashboard'))
|
return redirect(url_for('main.dashboard'))
|
||||||
@ -49,6 +67,7 @@ def register():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/login', methods=['GET', 'POST'])
|
@bp.route('/login', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.login', 'Login')
|
||||||
def login():
|
def login():
|
||||||
if current_user.is_authenticated:
|
if current_user.is_authenticated:
|
||||||
return redirect(url_for('main.dashboard'))
|
return redirect(url_for('main.dashboard'))
|
||||||
@ -79,6 +98,7 @@ def logout():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/unconfirmed')
|
@bp.route('/unconfirmed')
|
||||||
|
@register_breadcrumb(bp, '.unconfirmed', 'Unconfirmed')
|
||||||
@login_required
|
@login_required
|
||||||
def unconfirmed():
|
def unconfirmed():
|
||||||
if current_user.confirmed:
|
if current_user.confirmed:
|
||||||
@ -121,6 +141,7 @@ def confirm(token):
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/reset-password-request', methods=['GET', 'POST'])
|
@bp.route('/reset-password-request', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.reset_password_request', 'Password Reset')
|
||||||
def reset_password_request():
|
def reset_password_request():
|
||||||
if current_user.is_authenticated:
|
if current_user.is_authenticated:
|
||||||
return redirect(url_for('main.dashboard'))
|
return redirect(url_for('main.dashboard'))
|
||||||
@ -150,6 +171,7 @@ def reset_password_request():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/reset-password/<token>', methods=['GET', 'POST'])
|
@bp.route('/reset-password/<token>', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.reset_password', 'Password Reset')
|
||||||
def reset_password(token):
|
def reset_password(token):
|
||||||
if current_user.is_authenticated:
|
if current_user.is_authenticated:
|
||||||
return redirect(url_for('main.dashboard'))
|
return redirect(url_for('main.dashboard'))
|
@ -1,29 +0,0 @@
|
|||||||
from flask import Blueprint, redirect, request, url_for
|
|
||||||
from flask_login import current_user
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('auth', __name__)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.before_app_request
|
|
||||||
def before_request():
|
|
||||||
if not current_user.is_authenticated:
|
|
||||||
return
|
|
||||||
|
|
||||||
current_user.ping()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
if (
|
|
||||||
not current_user.confirmed
|
|
||||||
and request.endpoint
|
|
||||||
and request.blueprint != 'auth'
|
|
||||||
and request.endpoint != 'static'
|
|
||||||
):
|
|
||||||
return redirect(url_for('auth.unconfirmed'))
|
|
||||||
|
|
||||||
if not current_user.terms_of_use_accepted:
|
|
||||||
return redirect(url_for('main.terms_of_use'))
|
|
||||||
|
|
||||||
|
|
||||||
from . import routes
|
|
@ -1,25 +0,0 @@
|
|||||||
from flask import Blueprint
|
|
||||||
from flask_login import login_required
|
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('contributions', __name__)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.before_request
|
|
||||||
@login_required
|
|
||||||
def before_request():
|
|
||||||
'''
|
|
||||||
Ensures that the routes in this package can only be visited by users that
|
|
||||||
are logged in.
|
|
||||||
'''
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
from . import routes
|
|
||||||
|
|
||||||
|
|
||||||
from .spacy_nlp_pipeline_models import bp as spacy_nlp_pipeline_models_bp
|
|
||||||
bp.register_blueprint(spacy_nlp_pipeline_models_bp, url_prefix='/spacy-nlp-pipeline-models')
|
|
||||||
|
|
||||||
from .tesseract_ocr_pipeline_models import bp as tesseract_ocr_pipeline_models_bp
|
|
||||||
bp.register_blueprint(tesseract_ocr_pipeline_models_bp, url_prefix='/tesseract-ocr-pipeline-models')
|
|
@ -1,7 +0,0 @@
|
|||||||
from flask import render_template
|
|
||||||
from . import bp
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('')
|
|
||||||
def index():
|
|
||||||
return render_template('contributions/index.html.j2', title='Contributions')
|
|
@ -1,18 +0,0 @@
|
|||||||
from flask import current_app, Blueprint
|
|
||||||
from flask_login import login_required
|
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('spacy_nlp_pipeline_models', __name__)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.before_request
|
|
||||||
@login_required
|
|
||||||
def before_request():
|
|
||||||
'''
|
|
||||||
Ensures that the routes in this package can only be visited by users that
|
|
||||||
are logged in.
|
|
||||||
'''
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
from . import routes, json_routes
|
|
@ -1,18 +0,0 @@
|
|||||||
from flask import Blueprint
|
|
||||||
from flask_login import login_required
|
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('tesseract_ocr_pipeline_models', __name__)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.before_request
|
|
||||||
@login_required
|
|
||||||
def before_request():
|
|
||||||
'''
|
|
||||||
Ensures that the routes in this package can only be visited by users that
|
|
||||||
are logged in.
|
|
||||||
'''
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
from . import json_routes, routes
|
|
@ -1,76 +0,0 @@
|
|||||||
from flask import abort, flash, jsonify, make_response, request
|
|
||||||
from flask_login import current_user
|
|
||||||
from app import db
|
|
||||||
from app.decorators import content_negotiation
|
|
||||||
from app.models import (
|
|
||||||
Corpus,
|
|
||||||
CorpusFollowerAssociation,
|
|
||||||
CorpusFollowerRole,
|
|
||||||
User
|
|
||||||
)
|
|
||||||
from ..decorators import corpus_follower_permission_required
|
|
||||||
from . import bp
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/followers', methods=['POST'])
|
|
||||||
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
|
||||||
@content_negotiation(consumes='application/json', produces='application/json')
|
|
||||||
def create_corpus_followers(corpus_id):
|
|
||||||
usernames = request.json
|
|
||||||
if not (isinstance(usernames, list) or all(isinstance(u, str) for u in usernames)):
|
|
||||||
abort(400)
|
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
|
||||||
for username in usernames:
|
|
||||||
user = User.query.filter_by(username=username, is_public=True).first_or_404()
|
|
||||||
user.follow_corpus(corpus)
|
|
||||||
db.session.commit()
|
|
||||||
response_data = {
|
|
||||||
'message': f'Users are now following "{corpus.title}"',
|
|
||||||
'category': 'corpus'
|
|
||||||
}
|
|
||||||
return response_data, 200
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>/role', methods=['PUT'])
|
|
||||||
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
|
||||||
@content_negotiation(consumes='application/json', produces='application/json')
|
|
||||||
def update_corpus_follower_role(corpus_id, follower_id):
|
|
||||||
role_name = request.json
|
|
||||||
if not isinstance(role_name, str):
|
|
||||||
abort(400)
|
|
||||||
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
|
||||||
if cfr is None:
|
|
||||||
abort(400)
|
|
||||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
|
|
||||||
cfa.role = cfr
|
|
||||||
db.session.commit()
|
|
||||||
response_data = {
|
|
||||||
'message': f'User "{cfa.follower.username}" is now {cfa.role.name}',
|
|
||||||
'category': 'corpus'
|
|
||||||
}
|
|
||||||
return response_data, 200
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>', methods=['DELETE'])
|
|
||||||
def delete_corpus_follower(corpus_id, follower_id):
|
|
||||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
|
|
||||||
if not (
|
|
||||||
current_user.id == follower_id
|
|
||||||
or current_user == cfa.corpus.user
|
|
||||||
or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
|
|
||||||
or current_user.is_administrator):
|
|
||||||
abort(403)
|
|
||||||
if current_user.id == follower_id:
|
|
||||||
flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')
|
|
||||||
response = make_response()
|
|
||||||
response.status_code = 204
|
|
||||||
else:
|
|
||||||
response_data = {
|
|
||||||
'message': f'"{cfa.follower.username}" is not following "{cfa.corpus.title}" anymore',
|
|
||||||
'category': 'corpus'
|
|
||||||
}
|
|
||||||
response = jsonify(response_data)
|
|
||||||
response.status_code = 200
|
|
||||||
cfa.follower.unfollow_corpus(cfa.corpus)
|
|
||||||
db.session.commit()
|
|
||||||
return response
|
|
@ -1,12 +0,0 @@
|
|||||||
from app.models import User
|
|
||||||
from app import db
|
|
||||||
from . import bp
|
|
||||||
|
|
||||||
|
|
||||||
@bp.cli.command('reset')
|
|
||||||
def reset():
|
|
||||||
''' Reset terms of use accept '''
|
|
||||||
for user in [x for x in User.query.all() if x.terms_of_use_accepted]:
|
|
||||||
print(f'Resetting user {user.username}')
|
|
||||||
user.terms_of_use_accepted = False
|
|
||||||
db.session.commit()
|
|
@ -1,43 +0,0 @@
|
|||||||
from flask import (
|
|
||||||
abort,
|
|
||||||
redirect,
|
|
||||||
render_template,
|
|
||||||
send_from_directory,
|
|
||||||
url_for
|
|
||||||
)
|
|
||||||
from flask_login import current_user
|
|
||||||
from app.models import User
|
|
||||||
from . import bp
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('')
|
|
||||||
def users():
|
|
||||||
return redirect(url_for('main.social_area', _anchor='users'))
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:user_id>')
|
|
||||||
def user(user_id):
|
|
||||||
user = User.query.get_or_404(user_id)
|
|
||||||
if not (user.is_public or user == current_user or current_user.is_administrator):
|
|
||||||
abort(403)
|
|
||||||
return render_template(
|
|
||||||
'users/user.html.j2',
|
|
||||||
title=user.username,
|
|
||||||
user=user
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:user_id>/avatar')
|
|
||||||
def user_avatar(user_id):
|
|
||||||
user = User.query.get_or_404(user_id)
|
|
||||||
if not (user.is_public or user == current_user or current_user.is_administrator):
|
|
||||||
abort(403)
|
|
||||||
if user.avatar is None:
|
|
||||||
return redirect(url_for('static', filename='images/user_avatar.png'))
|
|
||||||
return send_from_directory(
|
|
||||||
user.avatar.path.parent,
|
|
||||||
user.avatar.path.name,
|
|
||||||
as_attachment=True,
|
|
||||||
download_name=user.avatar.filename,
|
|
||||||
mimetype=user.avatar.mimetype
|
|
||||||
)
|
|
@ -2,7 +2,7 @@ from flask import Blueprint
|
|||||||
from flask_login import login_required
|
from flask_login import login_required
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('jobs', __name__)
|
bp = Blueprint('contributions', __name__)
|
||||||
|
|
||||||
|
|
||||||
@bp.before_request
|
@bp.before_request
|
||||||
@ -15,4 +15,9 @@ def before_request():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
from . import routes, json_routes
|
from . import (
|
||||||
|
routes,
|
||||||
|
spacy_nlp_pipeline_models,
|
||||||
|
tesseract_ocr_pipeline_models,
|
||||||
|
transkribus_htr_pipeline_models
|
||||||
|
)
|
9
app/contributions/routes.py
Normal file
9
app/contributions/routes.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from flask import redirect, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route('')
|
||||||
|
@register_breadcrumb(bp, '.', '<i class="material-icons left">new_label</i>My Contributions')
|
||||||
|
def contributions():
|
||||||
|
return redirect(url_for('main.dashboard', _anchor='contributions'))
|
@ -1,7 +1,7 @@
|
|||||||
from flask_wtf.file import FileField, FileRequired
|
from flask_wtf.file import FileField, FileRequired
|
||||||
from wtforms import StringField, ValidationError
|
from wtforms import StringField, ValidationError
|
||||||
from wtforms.validators import InputRequired, Length
|
from wtforms.validators import InputRequired, Length
|
||||||
from app.blueprints.services import SERVICES
|
from app.services import SERVICES
|
||||||
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
|
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
|
||||||
|
|
||||||
|
|
||||||
@ -16,8 +16,8 @@ class CreateSpaCyNLPPipelineModelForm(ContributionBaseForm):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def validate_spacy_model_file(self, field):
|
def validate_spacy_model_file(self, field):
|
||||||
if not field.data.filename.lower().endswith(('.tar.gz', ('.whl'))):
|
if not field.data.filename.lower().endswith('.tar.gz'):
|
||||||
raise ValidationError('.tar.gz or .whl files only!')
|
raise ValidationError('.tar.gz files only!')
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
if 'prefix' not in kwargs:
|
if 'prefix' not in kwargs:
|
@ -1,14 +1,13 @@
|
|||||||
from flask import abort, current_app, request
|
from flask import abort, current_app, request
|
||||||
from flask_login import current_user, login_required
|
from flask_login import current_user
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from app import db
|
from app import db
|
||||||
from app.decorators import content_negotiation, permission_required
|
from app.decorators import content_negotiation, permission_required
|
||||||
from app.models import SpaCyNLPPipelineModel
|
from app.models import SpaCyNLPPipelineModel
|
||||||
from . import bp
|
from .. import bp
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
|
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
|
||||||
@login_required
|
|
||||||
@content_negotiation(produces='application/json')
|
@content_negotiation(produces='application/json')
|
||||||
def delete_spacy_model(spacy_nlp_pipeline_model_id):
|
def delete_spacy_model(spacy_nlp_pipeline_model_id):
|
||||||
def _delete_spacy_model(app, spacy_nlp_pipeline_model_id):
|
def _delete_spacy_model(app, spacy_nlp_pipeline_model_id):
|
||||||
@ -18,7 +17,7 @@ def delete_spacy_model(spacy_nlp_pipeline_model_id):
|
|||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||||
if not (snpm.user == current_user or current_user.is_administrator):
|
if not (snpm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
thread = Thread(
|
thread = Thread(
|
||||||
target=_delete_spacy_model,
|
target=_delete_spacy_model,
|
||||||
@ -32,7 +31,7 @@ def delete_spacy_model(spacy_nlp_pipeline_model_id):
|
|||||||
return response_data, 202
|
return response_data, 202
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>/is_public', methods=['PUT'])
|
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>/is_public', methods=['PUT'])
|
||||||
@permission_required('CONTRIBUTE')
|
@permission_required('CONTRIBUTE')
|
||||||
@content_negotiation(consumes='application/json', produces='application/json')
|
@content_negotiation(consumes='application/json', produces='application/json')
|
||||||
def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
|
def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
|
||||||
@ -40,7 +39,7 @@ def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
|
|||||||
if not isinstance(is_public, bool):
|
if not isinstance(is_public, bool):
|
||||||
abort(400)
|
abort(400)
|
||||||
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||||
if not (snpm.user == current_user or current_user.is_administrator):
|
if not (snpm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
snpm.is_public = is_public
|
snpm.is_public = is_public
|
||||||
db.session.commit()
|
db.session.commit()
|
@ -1,5 +1,6 @@
|
|||||||
from flask import abort, flash, redirect, render_template, url_for
|
from flask import abort, flash, redirect, render_template, url_for
|
||||||
from flask_login import current_user, login_required
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
|
from flask_login import current_user
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import SpaCyNLPPipelineModel
|
from app.models import SpaCyNLPPipelineModel
|
||||||
from . import bp
|
from . import bp
|
||||||
@ -7,17 +8,23 @@ from .forms import (
|
|||||||
CreateSpaCyNLPPipelineModelForm,
|
CreateSpaCyNLPPipelineModelForm,
|
||||||
UpdateSpaCyNLPPipelineModelForm
|
UpdateSpaCyNLPPipelineModelForm
|
||||||
)
|
)
|
||||||
|
from .utils import (
|
||||||
|
spacy_nlp_pipeline_model_dlc as spacy_nlp_pipeline_model_dlc
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/')
|
@bp.route('/spacy-nlp-pipeline-models')
|
||||||
@login_required
|
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models', 'SpaCy NLP Pipeline Models')
|
||||||
def index():
|
def spacy_nlp_pipeline_models():
|
||||||
return redirect(url_for('contributions.index', _anchor='spacy-nlp-pipeline-models'))
|
return render_template(
|
||||||
|
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_models.html.j2',
|
||||||
|
title='SpaCy NLP Pipeline Models'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/create', methods=['GET', 'POST'])
|
@bp.route('/spacy-nlp-pipeline-models/create', methods=['GET', 'POST'])
|
||||||
@login_required
|
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.create', 'Create')
|
||||||
def create():
|
def create_spacy_nlp_pipeline_model():
|
||||||
form = CreateSpaCyNLPPipelineModelForm()
|
form = CreateSpaCyNLPPipelineModelForm()
|
||||||
if form.is_submitted():
|
if form.is_submitted():
|
||||||
if not form.validate():
|
if not form.validate():
|
||||||
@ -41,7 +48,7 @@ def create():
|
|||||||
abort(500)
|
abort(500)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
flash(f'SpaCy NLP Pipeline model "{snpm.title}" created')
|
flash(f'SpaCy NLP Pipeline model "{snpm.title}" created')
|
||||||
return {}, 201, {'Location': url_for('.index')}
|
return {}, 201, {'Location': url_for('.spacy_nlp_pipeline_models')}
|
||||||
return render_template(
|
return render_template(
|
||||||
'contributions/spacy_nlp_pipeline_models/create.html.j2',
|
'contributions/spacy_nlp_pipeline_models/create.html.j2',
|
||||||
title='Create SpaCy NLP Pipeline Model',
|
title='Create SpaCy NLP Pipeline Model',
|
||||||
@ -49,11 +56,11 @@ def create():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
|
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
|
||||||
@login_required
|
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.entity', '', dynamic_list_constructor=spacy_nlp_pipeline_model_dlc)
|
||||||
def entity(spacy_nlp_pipeline_model_id):
|
def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id):
|
||||||
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||||
if not (snpm.user == current_user or current_user.is_administrator):
|
if not (snpm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
form = UpdateSpaCyNLPPipelineModelForm(data=snpm.to_json_serializeable())
|
form = UpdateSpaCyNLPPipelineModelForm(data=snpm.to_json_serializeable())
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
@ -61,9 +68,9 @@ def entity(spacy_nlp_pipeline_model_id):
|
|||||||
if db.session.is_modified(snpm):
|
if db.session.is_modified(snpm):
|
||||||
flash(f'SpaCy NLP Pipeline model "{snpm.title}" updated')
|
flash(f'SpaCy NLP Pipeline model "{snpm.title}" updated')
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return redirect(url_for('.index'))
|
return redirect(url_for('.spacy_nlp_pipeline_models'))
|
||||||
return render_template(
|
return render_template(
|
||||||
'contributions/spacy_nlp_pipeline_models/entity.html.j2',
|
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_model.html.j2',
|
||||||
title=f'{snpm.title} {snpm.version}',
|
title=f'{snpm.title} {snpm.version}',
|
||||||
form=form,
|
form=form,
|
||||||
spacy_nlp_pipeline_model=snpm
|
spacy_nlp_pipeline_model=snpm
|
13
app/contributions/spacy_nlp_pipeline_models/utils.py
Normal file
13
app/contributions/spacy_nlp_pipeline_models/utils.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from flask import request, url_for
|
||||||
|
from app.models import SpaCyNLPPipelineModel
|
||||||
|
|
||||||
|
|
||||||
|
def spacy_nlp_pipeline_model_dlc():
|
||||||
|
snpm_id = request.view_args['spacy_nlp_pipeline_model_id']
|
||||||
|
snpm = SpaCyNLPPipelineModel.query.get_or_404(snpm_id)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'text': f'{snpm.title} {snpm.version}',
|
||||||
|
'url': url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=snpm_id)
|
||||||
|
}
|
||||||
|
]
|
@ -1,6 +1,6 @@
|
|||||||
from flask_wtf.file import FileField, FileRequired
|
from flask_wtf.file import FileField, FileRequired
|
||||||
from wtforms import ValidationError
|
from wtforms import ValidationError
|
||||||
from app.blueprints.services import SERVICES
|
from app.services import SERVICES
|
||||||
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
|
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
|
||||||
|
|
||||||
|
|
@ -7,7 +7,7 @@ from app.models import TesseractOCRPipelineModel
|
|||||||
from . import bp
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
|
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
|
||||||
@content_negotiation(produces='application/json')
|
@content_negotiation(produces='application/json')
|
||||||
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
||||||
def _delete_tesseract_ocr_pipeline_model(app, tesseract_ocr_pipeline_model_id):
|
def _delete_tesseract_ocr_pipeline_model(app, tesseract_ocr_pipeline_model_id):
|
||||||
@ -17,7 +17,7 @@ def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
|||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||||
if not (topm.user == current_user or current_user.is_administrator):
|
if not (topm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
thread = Thread(
|
thread = Thread(
|
||||||
target=_delete_tesseract_ocr_pipeline_model,
|
target=_delete_tesseract_ocr_pipeline_model,
|
||||||
@ -31,7 +31,7 @@ def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
|||||||
return response_data, 202
|
return response_data, 202
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>/is_public', methods=['PUT'])
|
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>/is_public', methods=['PUT'])
|
||||||
@permission_required('CONTRIBUTE')
|
@permission_required('CONTRIBUTE')
|
||||||
@content_negotiation(consumes='application/json', produces='application/json')
|
@content_negotiation(consumes='application/json', produces='application/json')
|
||||||
def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_id):
|
def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_id):
|
||||||
@ -39,7 +39,7 @@ def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_i
|
|||||||
if not isinstance(is_public, bool):
|
if not isinstance(is_public, bool):
|
||||||
abort(400)
|
abort(400)
|
||||||
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||||
if not (topm.user == current_user or current_user.is_administrator):
|
if not (topm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
topm.is_public = is_public
|
topm.is_public = is_public
|
||||||
db.session.commit()
|
db.session.commit()
|
@ -1,4 +1,5 @@
|
|||||||
from flask import abort, flash, redirect, render_template, url_for
|
from flask import abort, flash, redirect, render_template, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import TesseractOCRPipelineModel
|
from app.models import TesseractOCRPipelineModel
|
||||||
@ -7,15 +8,23 @@ from .forms import (
|
|||||||
CreateTesseractOCRPipelineModelForm,
|
CreateTesseractOCRPipelineModelForm,
|
||||||
UpdateTesseractOCRPipelineModelForm
|
UpdateTesseractOCRPipelineModelForm
|
||||||
)
|
)
|
||||||
|
from .utils import (
|
||||||
|
tesseract_ocr_pipeline_model_dlc as tesseract_ocr_pipeline_model_dlc
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/')
|
@bp.route('/tesseract-ocr-pipeline-models')
|
||||||
def index():
|
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models', 'Tesseract OCR Pipeline Models')
|
||||||
return redirect(url_for('contributions.index', _anchor='tesseract-ocr-pipeline-models'))
|
def tesseract_ocr_pipeline_models():
|
||||||
|
return render_template(
|
||||||
|
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_models.html.j2',
|
||||||
|
title='Tesseract OCR Pipeline Models'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/create', methods=['GET', 'POST'])
|
@bp.route('/tesseract-ocr-pipeline-models/create', methods=['GET', 'POST'])
|
||||||
def create():
|
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.create', 'Create')
|
||||||
|
def create_tesseract_ocr_pipeline_model():
|
||||||
form = CreateTesseractOCRPipelineModelForm()
|
form = CreateTesseractOCRPipelineModelForm()
|
||||||
if form.is_submitted():
|
if form.is_submitted():
|
||||||
if not form.validate():
|
if not form.validate():
|
||||||
@ -38,7 +47,7 @@ def create():
|
|||||||
abort(500)
|
abort(500)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
flash(f'Tesseract OCR Pipeline model "{topm.title}" created')
|
flash(f'Tesseract OCR Pipeline model "{topm.title}" created')
|
||||||
return {}, 201, {'Location': url_for('.index')}
|
return {}, 201, {'Location': url_for('.tesseract_ocr_pipeline_models')}
|
||||||
return render_template(
|
return render_template(
|
||||||
'contributions/tesseract_ocr_pipeline_models/create.html.j2',
|
'contributions/tesseract_ocr_pipeline_models/create.html.j2',
|
||||||
title='Create Tesseract OCR Pipeline Model',
|
title='Create Tesseract OCR Pipeline Model',
|
||||||
@ -46,10 +55,11 @@ def create():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
|
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
|
||||||
def entity(tesseract_ocr_pipeline_model_id):
|
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.entity', '', dynamic_list_constructor=tesseract_ocr_pipeline_model_dlc)
|
||||||
|
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
|
||||||
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||||
if not (topm.user == current_user or current_user.is_administrator):
|
if not (topm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
form = UpdateTesseractOCRPipelineModelForm(data=topm.to_json_serializeable())
|
form = UpdateTesseractOCRPipelineModelForm(data=topm.to_json_serializeable())
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
@ -57,9 +67,9 @@ def entity(tesseract_ocr_pipeline_model_id):
|
|||||||
if db.session.is_modified(topm):
|
if db.session.is_modified(topm):
|
||||||
flash(f'Tesseract OCR Pipeline model "{topm.title}" updated')
|
flash(f'Tesseract OCR Pipeline model "{topm.title}" updated')
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return redirect(url_for('.index'))
|
return redirect(url_for('.tesseract_ocr_pipeline_models'))
|
||||||
return render_template(
|
return render_template(
|
||||||
'contributions/tesseract_ocr_pipeline_models/entity.html.j2',
|
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_model.html.j2',
|
||||||
title=f'{topm.title} {topm.version}',
|
title=f'{topm.title} {topm.version}',
|
||||||
form=form,
|
form=form,
|
||||||
tesseract_ocr_pipeline_model=topm
|
tesseract_ocr_pipeline_model=topm
|
13
app/contributions/tesseract_ocr_pipeline_models/utils.py
Normal file
13
app/contributions/tesseract_ocr_pipeline_models/utils.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from flask import request, url_for
|
||||||
|
from app.models import TesseractOCRPipelineModel
|
||||||
|
|
||||||
|
|
||||||
|
def tesseract_ocr_pipeline_model_dlc():
|
||||||
|
topm_id = request.view_args['tesseract_ocr_pipeline_model_id']
|
||||||
|
topm = TesseractOCRPipelineModel.query.get_or_404(topm_id)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'text': f'{topm.title} {topm.version}',
|
||||||
|
'url': url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=topm_id)
|
||||||
|
}
|
||||||
|
]
|
@ -0,0 +1,2 @@
|
|||||||
|
from .. import bp
|
||||||
|
from . import routes
|
@ -0,0 +1,7 @@
|
|||||||
|
from flask import abort
|
||||||
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route('/transkribus_htr_pipeline_models')
|
||||||
|
def transkribus_htr_pipeline_models():
|
||||||
|
return abort(503)
|
@ -1,69 +1,81 @@
|
|||||||
from datetime import datetime
|
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
from pathlib import Path
|
|
||||||
import json
|
|
||||||
import shutil
|
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import User, Corpus, CorpusFile
|
from app.models import User, Corpus, CorpusFile
|
||||||
|
from datetime import datetime
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
class SandpaperConverter:
|
class SandpaperConverter:
|
||||||
def __init__(self, json_db_file: Path, data_dir: Path):
|
def __init__(self, json_db_file, data_dir):
|
||||||
self.json_db_file = json_db_file
|
self.json_db_file = json_db_file
|
||||||
self.data_dir = data_dir
|
self.data_dir = data_dir
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
with self.json_db_file.open('r') as f:
|
with open(self.json_db_file, 'r') as f:
|
||||||
json_db: list[dict] = json.load(f)
|
json_db = json.loads(f.read())
|
||||||
|
|
||||||
for json_user in json_db:
|
for json_user in json_db:
|
||||||
if not json_user['confirmed']:
|
if not json_user['confirmed']:
|
||||||
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
|
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
|
||||||
continue
|
continue
|
||||||
user_dir = self.data_dir / f'{json_user["id"]}'
|
user_dir = os.path.join(self.data_dir, str(json_user['id']))
|
||||||
self.convert_user(json_user, user_dir)
|
self.convert_user(json_user, user_dir)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
|
||||||
def convert_user(self, json_user: dict, user_dir: Path):
|
def convert_user(self, json_user, user_dir):
|
||||||
current_app.logger.info(f'Create User {json_user["username"]}...')
|
current_app.logger.info(f'Create User {json_user["username"]}...')
|
||||||
|
user = User(
|
||||||
|
confirmed=json_user['confirmed'],
|
||||||
|
email=json_user['email'],
|
||||||
|
last_seen=datetime.fromtimestamp(json_user['last_seen']),
|
||||||
|
member_since=datetime.fromtimestamp(json_user['member_since']),
|
||||||
|
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
|
||||||
|
username=json_user['username']
|
||||||
|
)
|
||||||
|
db.session.add(user)
|
||||||
|
db.session.flush(objects=[user])
|
||||||
|
db.session.refresh(user)
|
||||||
try:
|
try:
|
||||||
user = User.create(
|
user.makedirs()
|
||||||
confirmed=json_user['confirmed'],
|
except OSError as e:
|
||||||
email=json_user['email'],
|
current_app.logger.error(e)
|
||||||
last_seen=datetime.fromtimestamp(json_user['last_seen']),
|
db.session.rollback()
|
||||||
member_since=datetime.fromtimestamp(json_user['member_since']),
|
|
||||||
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
|
|
||||||
username=json_user['username']
|
|
||||||
)
|
|
||||||
except OSError:
|
|
||||||
raise Exception('Internal Server Error')
|
raise Exception('Internal Server Error')
|
||||||
for json_corpus in json_user['corpora'].values():
|
for json_corpus in json_user['corpora'].values():
|
||||||
if not json_corpus['files'].values():
|
if not json_corpus['files'].values():
|
||||||
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
|
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
|
||||||
continue
|
continue
|
||||||
corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}'
|
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
|
||||||
self.convert_corpus(json_corpus, user, corpus_dir)
|
self.convert_corpus(json_corpus, user, corpus_dir)
|
||||||
current_app.logger.info('Done')
|
current_app.logger.info('Done')
|
||||||
|
|
||||||
|
|
||||||
def convert_corpus(self, json_corpus: dict, user: User, corpus_dir: Path):
|
def convert_corpus(self, json_corpus, user, corpus_dir):
|
||||||
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
|
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
|
||||||
|
corpus = Corpus(
|
||||||
|
user=user,
|
||||||
|
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
|
||||||
|
description=json_corpus['description'],
|
||||||
|
title=json_corpus['title']
|
||||||
|
)
|
||||||
|
db.session.add(corpus)
|
||||||
|
db.session.flush(objects=[corpus])
|
||||||
|
db.session.refresh(corpus)
|
||||||
try:
|
try:
|
||||||
corpus = Corpus.create(
|
corpus.makedirs()
|
||||||
user=user,
|
except OSError as e:
|
||||||
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
|
current_app.logger.error(e)
|
||||||
description=json_corpus['description'],
|
db.session.rollback()
|
||||||
title=json_corpus['title']
|
|
||||||
)
|
|
||||||
except OSError:
|
|
||||||
raise Exception('Internal Server Error')
|
raise Exception('Internal Server Error')
|
||||||
for json_corpus_file in json_corpus['files'].values():
|
for json_corpus_file in json_corpus['files'].values():
|
||||||
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
|
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
|
||||||
current_app.logger.info('Done')
|
current_app.logger.info('Done')
|
||||||
|
|
||||||
|
|
||||||
def convert_corpus_file(self, json_corpus_file: dict, corpus: Corpus, corpus_dir: Path):
|
def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
|
||||||
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
|
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
|
||||||
corpus_file = CorpusFile(
|
corpus_file = CorpusFile(
|
||||||
corpus=corpus,
|
corpus=corpus,
|
||||||
@ -87,13 +99,13 @@ class SandpaperConverter:
|
|||||||
db.session.refresh(corpus_file)
|
db.session.refresh(corpus_file)
|
||||||
try:
|
try:
|
||||||
shutil.copy2(
|
shutil.copy2(
|
||||||
corpus_dir / json_corpus_file['filename'],
|
os.path.join(corpus_dir, json_corpus_file['filename']),
|
||||||
corpus_file.path
|
corpus_file.path
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
current_app.logger.warning(
|
current_app.logger.warning(
|
||||||
'Can not convert corpus file: '
|
'Can not convert corpus file: '
|
||||||
f'{corpus_dir / json_corpus_file["filename"]}'
|
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
|
||||||
' -> '
|
' -> '
|
||||||
f'{corpus_file.path}'
|
f'{corpus_file.path}'
|
||||||
)
|
)
|
||||||
|
@ -1,25 +1,69 @@
|
|||||||
from flask import current_app
|
from flask import current_app
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_vrt_file(input_file: Path, output_file: Path):
|
def normalize_vrt_file(input_file, output_file):
|
||||||
|
def check_pos_attribute_order(vrt_lines):
|
||||||
|
# The following orders are possible:
|
||||||
|
# since 26.02.2019: 'word,lemma,simple_pos,pos,ner'
|
||||||
|
# since 26.03.2021: 'word,pos,lemma,simple_pos,ner'
|
||||||
|
# since 27.01.2022: 'word,pos,lemma,simple_pos'
|
||||||
|
# This Function tries to find out which order we have by looking at the
|
||||||
|
# number of attributes and the position of the simple_pos attribute
|
||||||
|
SIMPLE_POS_LABELS = [
|
||||||
|
'ADJ', 'ADP', 'ADV', 'AUX', 'CONJ',
|
||||||
|
'DET', 'INTJ', 'NOUN', 'NUM', 'PART',
|
||||||
|
'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',
|
||||||
|
'VERB', 'X'
|
||||||
|
]
|
||||||
|
for line in vrt_lines:
|
||||||
|
if line.startswith('<'):
|
||||||
|
continue
|
||||||
|
pos_attrs = line.rstrip('\n').split('\t')
|
||||||
|
num_pos_attrs = len(pos_attrs)
|
||||||
|
if num_pos_attrs == 4:
|
||||||
|
if pos_attrs[3] in SIMPLE_POS_LABELS:
|
||||||
|
return ['word', 'pos', 'lemma', 'simple_pos']
|
||||||
|
continue
|
||||||
|
elif num_pos_attrs == 5:
|
||||||
|
if pos_attrs[2] in SIMPLE_POS_LABELS:
|
||||||
|
return ['word', 'lemma', 'simple_pos', 'pos', 'ner']
|
||||||
|
elif pos_attrs[3] in SIMPLE_POS_LABELS:
|
||||||
|
return ['word', 'pos', 'lemma', 'simple_pos', 'ner']
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def check_has_ent_as_s_attr(vrt_lines):
|
||||||
|
for line in vrt_lines:
|
||||||
|
if line.startswith('<ent'):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def pos_attrs_to_string_1(pos_attrs):
|
||||||
|
return f'{pos_attrs[0]}\t{pos_attrs[3]}\t{pos_attrs[1]}\t{pos_attrs[2]}\n'
|
||||||
|
|
||||||
|
|
||||||
|
def pos_attrs_to_string_2(pos_attrs):
|
||||||
|
return f'{pos_attrs[0]}\t{pos_attrs[1]}\t{pos_attrs[2]}\t{pos_attrs[3]}\n'
|
||||||
|
|
||||||
current_app.logger.info(f'Converting {input_file}...')
|
current_app.logger.info(f'Converting {input_file}...')
|
||||||
|
|
||||||
with input_file.open() as f:
|
with open(input_file) as f:
|
||||||
input_vrt_lines = f.readlines()
|
input_vrt_lines = f.readlines()
|
||||||
|
|
||||||
pos_attr_order = _check_pos_attribute_order(input_vrt_lines)
|
pos_attr_order = check_pos_attribute_order(input_vrt_lines)
|
||||||
has_ent_as_s_attr = _check_has_ent_as_s_attr(input_vrt_lines)
|
has_ent_as_s_attr = check_has_ent_as_s_attr(input_vrt_lines)
|
||||||
|
|
||||||
current_app.logger.info(f'Detected pos_attr_order: [{",".join(pos_attr_order)}]')
|
current_app.logger.info(f'Detected pos_attr_order: [{",".join(pos_attr_order)}]')
|
||||||
current_app.logger.info(f'Detected has_ent_as_s_attr: {has_ent_as_s_attr}')
|
current_app.logger.info(f'Detected has_ent_as_s_attr: {has_ent_as_s_attr}')
|
||||||
|
|
||||||
if pos_attr_order == ['word', 'lemma', 'simple_pos', 'pos', 'ner']:
|
if pos_attr_order == ['word', 'lemma', 'simple_pos', 'pos', 'ner']:
|
||||||
pos_attrs_to_string_function = _pos_attrs_to_string_1
|
pos_attrs_to_string_function = pos_attrs_to_string_1
|
||||||
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos', 'ner']:
|
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos', 'ner']:
|
||||||
pos_attrs_to_string_function = _pos_attrs_to_string_2
|
pos_attrs_to_string_function = pos_attrs_to_string_2
|
||||||
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos']:
|
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos']:
|
||||||
pos_attrs_to_string_function = _pos_attrs_to_string_2
|
pos_attrs_to_string_function = pos_attrs_to_string_2
|
||||||
else:
|
else:
|
||||||
raise Exception('Can not handle format')
|
raise Exception('Can not handle format')
|
||||||
|
|
||||||
@ -69,49 +113,5 @@ def normalize_vrt_file(input_file: Path, output_file: Path):
|
|||||||
current_ent = pos_attrs[4]
|
current_ent = pos_attrs[4]
|
||||||
output_vrt += pos_attrs_to_string_function(pos_attrs)
|
output_vrt += pos_attrs_to_string_function(pos_attrs)
|
||||||
|
|
||||||
with output_file.open(mode='w') as f:
|
with open(output_file, 'w') as f:
|
||||||
f.write(output_vrt)
|
f.write(output_vrt)
|
||||||
|
|
||||||
|
|
||||||
def _check_pos_attribute_order(vrt_lines: list[str]) -> list[str]:
|
|
||||||
# The following orders are possible:
|
|
||||||
# since 26.02.2019: 'word,lemma,simple_pos,pos,ner'
|
|
||||||
# since 26.03.2021: 'word,pos,lemma,simple_pos,ner'
|
|
||||||
# since 27.01.2022: 'word,pos,lemma,simple_pos'
|
|
||||||
# This Function tries to find out which order we have by looking at the
|
|
||||||
# number of attributes and the position of the simple_pos attribute
|
|
||||||
SIMPLE_POS_LABELS = [
|
|
||||||
'ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM',
|
|
||||||
'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X'
|
|
||||||
]
|
|
||||||
for line in vrt_lines:
|
|
||||||
if line.startswith('<'):
|
|
||||||
continue
|
|
||||||
pos_attrs = line.rstrip('\n').split('\t')
|
|
||||||
num_pos_attrs = len(pos_attrs)
|
|
||||||
if num_pos_attrs == 4:
|
|
||||||
if pos_attrs[3] in SIMPLE_POS_LABELS:
|
|
||||||
return ['word', 'pos', 'lemma', 'simple_pos']
|
|
||||||
continue
|
|
||||||
elif num_pos_attrs == 5:
|
|
||||||
if pos_attrs[2] in SIMPLE_POS_LABELS:
|
|
||||||
return ['word', 'lemma', 'simple_pos', 'pos', 'ner']
|
|
||||||
elif pos_attrs[3] in SIMPLE_POS_LABELS:
|
|
||||||
return ['word', 'pos', 'lemma', 'simple_pos', 'ner']
|
|
||||||
continue
|
|
||||||
# TODO: raise exception "can't determine attribute order"
|
|
||||||
|
|
||||||
|
|
||||||
def _check_has_ent_as_s_attr(vrt_lines: list[str]) -> bool:
|
|
||||||
for line in vrt_lines:
|
|
||||||
if line.startswith('<ent'):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _pos_attrs_to_string_1(pos_attrs: list[str]) -> str:
|
|
||||||
return f'{pos_attrs[0]}\t{pos_attrs[3]}\t{pos_attrs[1]}\t{pos_attrs[2]}\n'
|
|
||||||
|
|
||||||
|
|
||||||
def _pos_attrs_to_string_2(pos_attrs: list[str]) -> str:
|
|
||||||
return f'{pos_attrs[0]}\t{pos_attrs[1]}\t{pos_attrs[2]}\t{pos_attrs[3]}\n'
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from flask import current_app
|
from app.models import Corpus, CorpusStatus
|
||||||
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import Corpus, CorpusStatus
|
|
||||||
from . import bp
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
@ -18,17 +18,10 @@ def reset():
|
|||||||
]
|
]
|
||||||
for corpus in [x for x in Corpus.query.all() if x.status in status]:
|
for corpus in [x for x in Corpus.query.all() if x.status in status]:
|
||||||
print(f'Resetting corpus {corpus}')
|
print(f'Resetting corpus {corpus}')
|
||||||
corpus_cwb_dir = corpus.path / 'cwb'
|
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
|
||||||
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
|
os.mkdir(os.path.join(corpus.path, 'cwb'))
|
||||||
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
|
os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
|
||||||
try:
|
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
|
||||||
shutil.rmtree(corpus.path / 'cwb', ignore_errors=True)
|
|
||||||
corpus_cwb_dir.mkdir()
|
|
||||||
corpus_cwb_data_dir.mkdir()
|
|
||||||
corpus_cwb_registry_dir.mkdir()
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
raise
|
|
||||||
corpus.status = CorpusStatus.UNPREPARED
|
corpus.status = CorpusStatus.UNPREPARED
|
||||||
corpus.num_analysis_sessions = 0
|
corpus.num_analysis_sessions = 0
|
||||||
db.session.commit()
|
db.session.commit()
|
@ -1,16 +1,16 @@
|
|||||||
from cqi import CQiClient
|
from cqi import CQiClient
|
||||||
from cqi.errors import CQiException
|
from cqi.errors import CQiException
|
||||||
from cqi.status import CQiStatus
|
from cqi.status import CQiStatus
|
||||||
from flask import current_app
|
from flask import session
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from flask_socketio import Namespace
|
from flask_socketio import Namespace
|
||||||
from inspect import signature
|
from inspect import signature
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
from app import db, docker_client, hashids, socketio
|
from typing import Callable, Dict, List
|
||||||
|
from app import db, hashids, socketio
|
||||||
from app.decorators import socketio_login_required
|
from app.decorators import socketio_login_required
|
||||||
from app.models import Corpus, CorpusStatus
|
from app.models import Corpus, CorpusStatus
|
||||||
from . import cqi_extension_functions
|
from . import extensions
|
||||||
from .utils import SessionManager
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -18,7 +18,7 @@ This package tunnels the Corpus Query interface (CQi) protocol through
|
|||||||
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
|
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
|
||||||
|
|
||||||
Basic concept:
|
Basic concept:
|
||||||
1. A client connects to the namespace.
|
1. A client connects to the "/cqi_over_sio" namespace.
|
||||||
2. The client emits the "init" event and provides a corpus id for the corpus
|
2. The client emits the "init" event and provides a corpus id for the corpus
|
||||||
that should be analysed in this session.
|
that should be analysed in this session.
|
||||||
1.1 The analysis session counter of the corpus is incremented.
|
1.1 The analysis session counter of the corpus is incremented.
|
||||||
@ -27,17 +27,17 @@ Basic concept:
|
|||||||
1.4 Connect the CQiClient to the server.
|
1.4 Connect the CQiClient to the server.
|
||||||
1.5 Save the CQiClient, the Lock and the corpus id in the session for
|
1.5 Save the CQiClient, the Lock and the corpus id in the session for
|
||||||
subsequential use.
|
subsequential use.
|
||||||
3. The client emits "exec" events, within which it provides the name of a CQi
|
2. The client emits the "exec" event provides the name of a CQi API function
|
||||||
API function and the corresponding arguments.
|
arguments (optional).
|
||||||
3.1 The "exec" event handler will execute the function, make sure that
|
- The event "exec" handler will execute the function, make sure that the
|
||||||
the result is serializable and returns the result back to the client.
|
result is serializable and returns the result back to the client.
|
||||||
4. The client disconnects from the namespace
|
4. Wait for more events
|
||||||
4.1 The analysis session counter of the corpus is decremented.
|
5. The client disconnects from the "/cqi_over_sio" namespace
|
||||||
4.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
1.1 The analysis session counter of the corpus is decremented.
|
||||||
|
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
CQI_API_FUNCTION_NAMES: List[str] = [
|
||||||
CQI_API_FUNCTION_NAMES = [
|
|
||||||
'ask_feature_cl_2_3',
|
'ask_feature_cl_2_3',
|
||||||
'ask_feature_cqi_1_0',
|
'ask_feature_cqi_1_0',
|
||||||
'ask_feature_cqp_2_3',
|
'ask_feature_cqp_2_3',
|
||||||
@ -85,90 +85,63 @@ CQI_API_FUNCTION_NAMES = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
CQI_EXTENSION_FUNCTION_NAMES = [
|
class CQiNamespace(Namespace):
|
||||||
'ext_corpus_update_db',
|
|
||||||
'ext_corpus_static_data',
|
|
||||||
'ext_corpus_paginate_corpus',
|
|
||||||
'ext_cqp_paginate_subcorpus',
|
|
||||||
'ext_cqp_partial_export_subcorpus',
|
|
||||||
'ext_cqp_export_subcorpus',
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class CQiOverSocketIONamespace(Namespace):
|
|
||||||
@socketio_login_required
|
@socketio_login_required
|
||||||
def on_connect(self):
|
def on_connect(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@socketio_login_required
|
@socketio_login_required
|
||||||
def on_init(self, corpus_hashid: str) -> dict:
|
def on_init(self, db_corpus_hashid: str):
|
||||||
corpus_id = hashids.decode(corpus_hashid)
|
db_corpus_id = hashids.decode(db_corpus_hashid)
|
||||||
|
db_corpus = Corpus.query.get(db_corpus_id)
|
||||||
if not isinstance(corpus_id, int):
|
if db_corpus is None:
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
|
||||||
|
|
||||||
corpus = Corpus.query.get(corpus_id)
|
|
||||||
|
|
||||||
if corpus is None:
|
|
||||||
return {'code': 404, 'msg': 'Not Found'}
|
return {'code': 404, 'msg': 'Not Found'}
|
||||||
|
if not (db_corpus.user == current_user
|
||||||
if not (
|
or current_user.is_following_corpus(db_corpus)
|
||||||
corpus.user == current_user
|
or current_user.is_administrator()):
|
||||||
or current_user.is_following_corpus(corpus)
|
|
||||||
or current_user.is_administrator
|
|
||||||
):
|
|
||||||
return {'code': 403, 'msg': 'Forbidden'}
|
return {'code': 403, 'msg': 'Forbidden'}
|
||||||
|
if db_corpus.status not in [
|
||||||
if corpus.status not in [
|
|
||||||
CorpusStatus.BUILT,
|
CorpusStatus.BUILT,
|
||||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
||||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
||||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||||
]:
|
]:
|
||||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||||
|
if db_corpus.num_analysis_sessions is None:
|
||||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
db_corpus.num_analysis_sessions = 0
|
||||||
|
db.session.commit()
|
||||||
|
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
retry_counter = 20
|
retry_counter = 20
|
||||||
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
while db_corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
||||||
if retry_counter == 0:
|
if retry_counter == 0:
|
||||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return {'code': 408, 'msg': 'Request Timeout'}
|
return {'code': 408, 'msg': 'Request Timeout'}
|
||||||
socketio.sleep(3)
|
socketio.sleep(3)
|
||||||
retry_counter -= 1
|
retry_counter -= 1
|
||||||
db.session.refresh(corpus)
|
db.session.refresh(db_corpus)
|
||||||
|
cqi_client = CQiClient(f'cqpserver_{db_corpus_id}', timeout=float('inf'))
|
||||||
cqpserver_container_name = f'nopaque-cqpserver-{corpus_id}'
|
session['cqi_over_sio'] = {}
|
||||||
cqpserver_container = docker_client.containers.get(cqpserver_container_name)
|
session['cqi_over_sio']['cqi_client'] = cqi_client
|
||||||
cqpserver_ip_address = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
|
session['cqi_over_sio']['cqi_client_lock'] = Lock()
|
||||||
cqi_client = CQiClient(cqpserver_ip_address)
|
session['cqi_over_sio']['db_corpus_id'] = db_corpus_id
|
||||||
cqi_client_lock = Lock()
|
|
||||||
|
|
||||||
SessionManager.setup()
|
|
||||||
SessionManager.set_corpus_id(corpus_id)
|
|
||||||
SessionManager.set_cqi_client(cqi_client)
|
|
||||||
SessionManager.set_cqi_client_lock(cqi_client_lock)
|
|
||||||
|
|
||||||
return {'code': 200, 'msg': 'OK'}
|
return {'code': 200, 'msg': 'OK'}
|
||||||
|
|
||||||
@socketio_login_required
|
@socketio_login_required
|
||||||
def on_exec(self, fn_name: str, fn_args: dict = {}) -> dict:
|
def on_exec(self, fn_name: str, fn_args: Dict = {}):
|
||||||
try:
|
try:
|
||||||
cqi_client = SessionManager.get_cqi_client()
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
cqi_client_lock = SessionManager.get_cqi_client_lock()
|
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||||
|
|
||||||
if fn_name in CQI_API_FUNCTION_NAMES:
|
if fn_name in CQI_API_FUNCTION_NAMES:
|
||||||
fn = getattr(cqi_client.api, fn_name)
|
fn: Callable = getattr(cqi_client.api, fn_name)
|
||||||
elif fn_name in CQI_EXTENSION_FUNCTION_NAMES:
|
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
|
||||||
fn = getattr(cqi_extension_functions, fn_name)
|
fn: Callable = getattr(extensions, fn_name)
|
||||||
else:
|
else:
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
return {'code': 400, 'msg': 'Bad Request'}
|
||||||
|
|
||||||
for param in signature(fn).parameters.values():
|
for param in signature(fn).parameters.values():
|
||||||
# Check if the parameter is optional or required
|
|
||||||
if param.default is param.empty:
|
if param.default is param.empty:
|
||||||
if param.name not in fn_args:
|
if param.name not in fn_args:
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
return {'code': 400, 'msg': 'Bad Request'}
|
||||||
@ -177,7 +150,6 @@ class CQiOverSocketIONamespace(Namespace):
|
|||||||
continue
|
continue
|
||||||
if type(fn_args[param.name]) is not param.annotation:
|
if type(fn_args[param.name]) is not param.annotation:
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
return {'code': 400, 'msg': 'Bad Request'}
|
||||||
|
|
||||||
cqi_client_lock.acquire()
|
cqi_client_lock.acquire()
|
||||||
try:
|
try:
|
||||||
fn_return_value = fn(**fn_args)
|
fn_return_value = fn(**fn_args)
|
||||||
@ -195,7 +167,6 @@ class CQiOverSocketIONamespace(Namespace):
|
|||||||
}
|
}
|
||||||
finally:
|
finally:
|
||||||
cqi_client_lock.release()
|
cqi_client_lock.release()
|
||||||
|
|
||||||
if isinstance(fn_return_value, CQiStatus):
|
if isinstance(fn_return_value, CQiStatus):
|
||||||
payload = {
|
payload = {
|
||||||
'code': fn_return_value.code,
|
'code': fn_return_value.code,
|
||||||
@ -203,31 +174,26 @@ class CQiOverSocketIONamespace(Namespace):
|
|||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
payload = fn_return_value
|
payload = fn_return_value
|
||||||
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
def on_disconnect(self):
|
def on_disconnect(self):
|
||||||
try:
|
try:
|
||||||
corpus_id = SessionManager.get_corpus_id()
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
cqi_client = SessionManager.get_cqi_client()
|
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||||
cqi_client_lock = SessionManager.get_cqi_client_lock()
|
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||||
SessionManager.teardown()
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return
|
return
|
||||||
|
|
||||||
cqi_client_lock.acquire()
|
cqi_client_lock.acquire()
|
||||||
|
try:
|
||||||
|
session.pop('cqi_over_sio')
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
try:
|
try:
|
||||||
cqi_client.api.ctrl_bye()
|
cqi_client.api.ctrl_bye()
|
||||||
except (BrokenPipeError, CQiException):
|
except (BrokenPipeError, CQiException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
cqi_client_lock.release()
|
cqi_client_lock.release()
|
||||||
|
db_corpus = Corpus.query.get(db_corpus_id)
|
||||||
corpus = Corpus.query.get(corpus_id)
|
if db_corpus is not None:
|
||||||
|
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||||
if corpus is None:
|
db.session.commit()
|
||||||
return
|
|
||||||
|
|
||||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
|
||||||
db.session.commit()
|
|
316
app/corpora/cqi_over_sio/extensions.py
Normal file
316
app/corpora/cqi_over_sio/extensions.py
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
from collections import Counter
|
||||||
|
from cqi import CQiClient
|
||||||
|
from cqi.models.corpora import Corpus as CQiCorpus
|
||||||
|
from cqi.models.attributes import (
|
||||||
|
PositionalAttribute as CQiPositionalAttribute,
|
||||||
|
StructuralAttribute as CQiStructuralAttribute
|
||||||
|
)
|
||||||
|
from cqi.status import StatusOk as CQiStatusOk
|
||||||
|
from flask import session
|
||||||
|
from typing import Dict, List
|
||||||
|
import gzip
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
from app import db
|
||||||
|
from app.models import Corpus
|
||||||
|
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
|
||||||
|
|
||||||
|
|
||||||
|
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
|
||||||
|
'ext_corpus_update_db',
|
||||||
|
'ext_corpus_static_data',
|
||||||
|
'ext_corpus_paginate_corpus',
|
||||||
|
'ext_cqp_paginate_subcorpus',
|
||||||
|
'ext_cqp_partial_export_subcorpus',
|
||||||
|
'ext_cqp_export_subcorpus',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
|
||||||
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
|
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||||
|
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
|
||||||
|
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
|
||||||
|
db_corpus.num_tokens = cqi_corpus.size
|
||||||
|
db.session.commit()
|
||||||
|
return CQiStatusOk()
|
||||||
|
|
||||||
|
|
||||||
|
def ext_corpus_static_data(corpus: str) -> Dict:
|
||||||
|
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||||
|
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
|
||||||
|
cache_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
|
||||||
|
if os.path.exists(cache_file_path):
|
||||||
|
with open(cache_file_path, 'rb') as f:
|
||||||
|
return f.read()
|
||||||
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
|
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
|
||||||
|
cqi_p_attrs: Dict[str, CQiPositionalAttribute] = {
|
||||||
|
p_attr.name: p_attr
|
||||||
|
for p_attr in cqi_corpus.positional_attributes.list()
|
||||||
|
}
|
||||||
|
cqi_s_attrs: Dict[str, CQiStructuralAttribute] = {
|
||||||
|
s_attr.name: s_attr
|
||||||
|
for s_attr in cqi_corpus.structural_attributes.list()
|
||||||
|
}
|
||||||
|
static_corpus_data = {
|
||||||
|
'corpus': {
|
||||||
|
'bounds': [0, cqi_corpus.size - 1],
|
||||||
|
'counts': {
|
||||||
|
'token': cqi_corpus.size
|
||||||
|
},
|
||||||
|
'freqs': {}
|
||||||
|
},
|
||||||
|
'p_attrs': {},
|
||||||
|
's_attrs': {},
|
||||||
|
'values': {'p_attrs': {}, 's_attrs': {}}
|
||||||
|
}
|
||||||
|
for p_attr in cqi_p_attrs.values():
|
||||||
|
static_corpus_data['corpus']['freqs'][p_attr.name] = {}
|
||||||
|
chunk_size = 10000
|
||||||
|
p_attr_id_list = list(range(p_attr.lexicon_size))
|
||||||
|
chunks = [p_attr_id_list[i:i+chunk_size] for i in range(0, len(p_attr_id_list), chunk_size)]
|
||||||
|
del p_attr_id_list
|
||||||
|
for chunk in chunks:
|
||||||
|
# print(f'corpus.freqs.{p_attr.name}: {chunk[0]} - {chunk[-1]}')
|
||||||
|
static_corpus_data['corpus']['freqs'][p_attr.name].update(
|
||||||
|
dict(zip(chunk, p_attr.freqs_by_ids(chunk)))
|
||||||
|
)
|
||||||
|
del chunks
|
||||||
|
static_corpus_data['p_attrs'][p_attr.name] = {}
|
||||||
|
cpos_list = list(range(cqi_corpus.size))
|
||||||
|
chunks = [cpos_list[i:i+chunk_size] for i in range(0, len(cpos_list), chunk_size)]
|
||||||
|
del cpos_list
|
||||||
|
for chunk in chunks:
|
||||||
|
# print(f'p_attrs.{p_attr.name}: {chunk[0]} - {chunk[-1]}')
|
||||||
|
static_corpus_data['p_attrs'][p_attr.name].update(
|
||||||
|
dict(zip(chunk, p_attr.ids_by_cpos(chunk)))
|
||||||
|
)
|
||||||
|
del chunks
|
||||||
|
static_corpus_data['values']['p_attrs'][p_attr.name] = {}
|
||||||
|
p_attr_id_list = list(range(p_attr.lexicon_size))
|
||||||
|
chunks = [p_attr_id_list[i:i+chunk_size] for i in range(0, len(p_attr_id_list), chunk_size)]
|
||||||
|
del p_attr_id_list
|
||||||
|
for chunk in chunks:
|
||||||
|
# print(f'values.p_attrs.{p_attr.name}: {chunk[0]} - {chunk[-1]}')
|
||||||
|
static_corpus_data['values']['p_attrs'][p_attr.name].update(
|
||||||
|
dict(zip(chunk, p_attr.values_by_ids(chunk)))
|
||||||
|
)
|
||||||
|
del chunks
|
||||||
|
for s_attr in cqi_s_attrs.values():
|
||||||
|
if s_attr.has_values:
|
||||||
|
continue
|
||||||
|
static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
|
||||||
|
static_corpus_data['values']['s_attrs'][s_attr.name] = {}
|
||||||
|
##########################################################################
|
||||||
|
# A faster way to get cpos boundaries for smaller s_attrs #
|
||||||
|
##########################################################################
|
||||||
|
# if s_attr.name in ['s', 'ent']:
|
||||||
|
# cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
|
||||||
|
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
|
||||||
|
# first_match = 0
|
||||||
|
# last_match = cqi_subcorpus.size - 1
|
||||||
|
# match_boundaries = zip(
|
||||||
|
# range(first_match, last_match + 1),
|
||||||
|
# cqi_subcorpus.dump(cqi_subcorpus.fields['match'], first_match, last_match),
|
||||||
|
# cqi_subcorpus.dump(cqi_subcorpus.fields['matchend'], first_match, last_match)
|
||||||
|
# )
|
||||||
|
# for id, lbound, rbound in match_boundaries:
|
||||||
|
# static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
|
||||||
|
# static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||||
|
# static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
|
||||||
|
# static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
|
||||||
|
# cqi_subcorpus.drop()
|
||||||
|
for id in range(0, s_attr.size):
|
||||||
|
# print(f's_attrs.{s_attr.name}.lexicon.{id}')
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {
|
||||||
|
'bounds': None,
|
||||||
|
'counts': None,
|
||||||
|
'freqs': None
|
||||||
|
}
|
||||||
|
if s_attr.name != 'text':
|
||||||
|
continue
|
||||||
|
lbound, rbound = s_attr.cpos_by_id(id)
|
||||||
|
# print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||||
|
# print(f's_attrs.{s_attr.name}.lexicon.{id}.counts')
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
|
||||||
|
cpos_list = list(range(lbound, rbound + 1))
|
||||||
|
chunks = [cpos_list[i:i+chunk_size] for i in range(0, len(cpos_list), chunk_size)]
|
||||||
|
del cpos_list
|
||||||
|
ent_ids = set()
|
||||||
|
for chunk in chunks:
|
||||||
|
# print(f'Gather ent_ids from cpos: {chunk[0]} - {chunk[-1]}')
|
||||||
|
ent_ids.update({x for x in cqi_s_attrs['ent'].ids_by_cpos(chunk) if x != -1})
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len(ent_ids)
|
||||||
|
del ent_ids
|
||||||
|
s_ids = set()
|
||||||
|
for chunk in chunks:
|
||||||
|
# print(f'Gather s_ids from cpos: {chunk[0]} - {chunk[-1]}')
|
||||||
|
s_ids.update({x for x in cqi_s_attrs['s'].ids_by_cpos(chunk) if x != -1})
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len(s_ids)
|
||||||
|
del s_ids
|
||||||
|
# print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs')
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
||||||
|
for p_attr in cqi_p_attrs.values():
|
||||||
|
p_attr_ids = []
|
||||||
|
for chunk in chunks:
|
||||||
|
# print(f'Gather p_attr_ids from cpos: {chunk[0]} - {chunk[-1]}')
|
||||||
|
p_attr_ids.extend(p_attr.ids_by_cpos(chunk))
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
|
||||||
|
del p_attr_ids
|
||||||
|
del chunks
|
||||||
|
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
||||||
|
s_attr_value_names: List[str] = [
|
||||||
|
sub_s_attr.name[(len(s_attr.name) + 1):]
|
||||||
|
for sub_s_attr in sub_s_attrs
|
||||||
|
]
|
||||||
|
s_attr_id_list = list(range(s_attr.size))
|
||||||
|
chunks = [s_attr_id_list[i:i+chunk_size] for i in range(0, len(s_attr_id_list), chunk_size)]
|
||||||
|
del s_attr_id_list
|
||||||
|
sub_s_attr_values = []
|
||||||
|
for sub_s_attr in sub_s_attrs:
|
||||||
|
tmp = []
|
||||||
|
for chunk in chunks:
|
||||||
|
tmp.extend(sub_s_attr.values_by_ids(chunk))
|
||||||
|
sub_s_attr_values.append(tmp)
|
||||||
|
del tmp
|
||||||
|
del chunks
|
||||||
|
# print(f's_attrs.{s_attr.name}.values')
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['values'] = s_attr_value_names
|
||||||
|
# print(f'values.s_attrs.{s_attr.name}')
|
||||||
|
static_corpus_data['values']['s_attrs'][s_attr.name] = {
|
||||||
|
s_attr_id: {
|
||||||
|
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
|
||||||
|
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
||||||
|
static_corpus_data['s_attrs'][s_attr.name]['values']
|
||||||
|
)
|
||||||
|
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
|
||||||
|
}
|
||||||
|
del sub_s_attr_values
|
||||||
|
with gzip.open(cache_file_path, 'wt') as f:
|
||||||
|
json.dump(static_corpus_data, f)
|
||||||
|
del static_corpus_data
|
||||||
|
with open(cache_file_path, 'rb') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
|
def ext_corpus_paginate_corpus(
|
||||||
|
corpus: str,
|
||||||
|
page: int = 1,
|
||||||
|
per_page: int = 20
|
||||||
|
) -> Dict:
|
||||||
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
|
cqi_corpus = cqi_client.corpora.get(corpus)
|
||||||
|
# Sanity checks
|
||||||
|
if (
|
||||||
|
per_page < 1
|
||||||
|
or page < 1
|
||||||
|
or (
|
||||||
|
cqi_corpus.size > 0
|
||||||
|
and page > math.ceil(cqi_corpus.size / per_page)
|
||||||
|
)
|
||||||
|
):
|
||||||
|
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||||
|
first_cpos = (page - 1) * per_page
|
||||||
|
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
|
||||||
|
cpos_list = [*range(first_cpos, last_cpos)]
|
||||||
|
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
|
||||||
|
payload = {}
|
||||||
|
# the items for the current page
|
||||||
|
payload['items'] = [cpos_list]
|
||||||
|
# the lookups for the items
|
||||||
|
payload['lookups'] = lookups
|
||||||
|
# the total number of items matching the query
|
||||||
|
payload['total'] = cqi_corpus.size
|
||||||
|
# the number of items to be displayed on a page.
|
||||||
|
payload['per_page'] = per_page
|
||||||
|
# The total number of pages
|
||||||
|
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||||
|
# the current page number (1 indexed)
|
||||||
|
payload['page'] = page if payload['pages'] > 0 else None
|
||||||
|
# True if a previous page exists
|
||||||
|
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||||
|
# True if a next page exists.
|
||||||
|
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||||
|
# Number of the previous page.
|
||||||
|
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||||
|
# Number of the next page
|
||||||
|
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def ext_cqp_paginate_subcorpus(
|
||||||
|
subcorpus: str,
|
||||||
|
context: int = 50,
|
||||||
|
page: int = 1,
|
||||||
|
per_page: int = 20
|
||||||
|
) -> Dict:
|
||||||
|
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||||
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
|
# Sanity checks
|
||||||
|
if (
|
||||||
|
per_page < 1
|
||||||
|
or page < 1
|
||||||
|
or (
|
||||||
|
cqi_subcorpus.size > 0
|
||||||
|
and page > math.ceil(cqi_subcorpus.size / per_page)
|
||||||
|
)
|
||||||
|
):
|
||||||
|
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||||
|
offset = (page - 1) * per_page
|
||||||
|
cutoff = per_page
|
||||||
|
cqi_results_export = export_subcorpus(
|
||||||
|
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
||||||
|
payload = {}
|
||||||
|
# the items for the current page
|
||||||
|
payload['items'] = cqi_results_export.pop('matches')
|
||||||
|
# the lookups for the items
|
||||||
|
payload['lookups'] = cqi_results_export
|
||||||
|
# the total number of items matching the query
|
||||||
|
payload['total'] = cqi_subcorpus.size
|
||||||
|
# the number of items to be displayed on a page.
|
||||||
|
payload['per_page'] = per_page
|
||||||
|
# The total number of pages
|
||||||
|
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||||
|
# the current page number (1 indexed)
|
||||||
|
payload['page'] = page if payload['pages'] > 0 else None
|
||||||
|
# True if a previous page exists
|
||||||
|
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||||
|
# True if a next page exists.
|
||||||
|
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||||
|
# Number of the previous page.
|
||||||
|
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||||
|
# Number of the next page
|
||||||
|
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def ext_cqp_partial_export_subcorpus(
|
||||||
|
subcorpus: str,
|
||||||
|
match_id_list: list,
|
||||||
|
context: int = 50
|
||||||
|
) -> Dict:
|
||||||
|
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||||
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
|
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
||||||
|
return cqi_subcorpus_partial_export
|
||||||
|
|
||||||
|
|
||||||
|
def ext_cqp_export_subcorpus(
|
||||||
|
subcorpus: str,
|
||||||
|
context: int = 50
|
||||||
|
) -> Dict:
|
||||||
|
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||||
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
|
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
||||||
|
return cqi_subcorpus_export
|
133
app/corpora/cqi_over_sio/utils.py
Normal file
133
app/corpora/cqi_over_sio/utils.py
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
from cqi.models.corpora import Corpus
|
||||||
|
from cqi.models.subcorpora import Subcorpus
|
||||||
|
from typing import Dict, List
|
||||||
|
from app.models import Corpus
|
||||||
|
|
||||||
|
|
||||||
|
def lookups_by_cpos(corpus: Corpus, cpos_list: List[int]) -> Dict:
|
||||||
|
lookups = {}
|
||||||
|
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
||||||
|
for attr in corpus.positional_attributes.list():
|
||||||
|
cpos_attr_values = attr.values_by_cpos(cpos_list)
|
||||||
|
for i, cpos in enumerate(cpos_list):
|
||||||
|
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
|
||||||
|
cpos_attr_values[i]
|
||||||
|
for attr in corpus.structural_attributes.list():
|
||||||
|
# We only want to iterate over non subattributes, identifiable by
|
||||||
|
# attr.attrs['has_values'] == False
|
||||||
|
if attr.attrs['has_values']:
|
||||||
|
continue
|
||||||
|
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
|
||||||
|
for i, cpos in enumerate(cpos_list):
|
||||||
|
if cpos_attr_ids[i] == -1:
|
||||||
|
continue
|
||||||
|
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
|
||||||
|
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
||||||
|
if not occured_attr_ids:
|
||||||
|
continue
|
||||||
|
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
||||||
|
if not subattrs:
|
||||||
|
continue
|
||||||
|
lookup_name = f'{attr.attrs["name"]}_lookup'
|
||||||
|
lookups[lookup_name] = {}
|
||||||
|
for attr_id in occured_attr_ids:
|
||||||
|
lookups[lookup_name][attr_id] = {}
|
||||||
|
for subattr in subattrs:
|
||||||
|
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
|
||||||
|
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
||||||
|
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
||||||
|
return lookups
|
||||||
|
|
||||||
|
|
||||||
|
def partial_export_subcorpus(
|
||||||
|
subcorpus: Subcorpus,
|
||||||
|
match_id_list: List[int],
|
||||||
|
context: int = 25
|
||||||
|
) -> Dict:
|
||||||
|
if subcorpus.size == 0:
|
||||||
|
return {"matches": []}
|
||||||
|
match_boundaries = []
|
||||||
|
for match_id in match_id_list:
|
||||||
|
if match_id < 0 or match_id >= subcorpus.size:
|
||||||
|
continue
|
||||||
|
match_boundaries.append(
|
||||||
|
(
|
||||||
|
match_id,
|
||||||
|
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
|
||||||
|
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
cpos_set = set()
|
||||||
|
matches = []
|
||||||
|
for match_boundary in match_boundaries:
|
||||||
|
match_num, match_start, match_end = match_boundary
|
||||||
|
c = (match_start, match_end)
|
||||||
|
if match_start == 0 or context == 0:
|
||||||
|
lc = None
|
||||||
|
cpos_list_lbound = match_start
|
||||||
|
else:
|
||||||
|
lc_lbound = max(0, (match_start - context))
|
||||||
|
lc_rbound = match_start - 1
|
||||||
|
lc = (lc_lbound, lc_rbound)
|
||||||
|
cpos_list_lbound = lc_lbound
|
||||||
|
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||||
|
rc = None
|
||||||
|
cpos_list_rbound = match_end
|
||||||
|
else:
|
||||||
|
rc_lbound = match_end + 1
|
||||||
|
rc_rbound = min(
|
||||||
|
(match_end + context),
|
||||||
|
(subcorpus.collection.corpus.size - 1)
|
||||||
|
)
|
||||||
|
rc = (rc_lbound, rc_rbound)
|
||||||
|
cpos_list_rbound = rc_rbound
|
||||||
|
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||||
|
matches.append(match)
|
||||||
|
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||||
|
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||||
|
return {'matches': matches, **lookups}
|
||||||
|
|
||||||
|
|
||||||
|
def export_subcorpus(
|
||||||
|
subcorpus: Subcorpus,
|
||||||
|
context: int = 25,
|
||||||
|
cutoff: float = float('inf'),
|
||||||
|
offset: int = 0
|
||||||
|
) -> Dict:
|
||||||
|
if subcorpus.size == 0:
|
||||||
|
return {"matches": []}
|
||||||
|
first_match = max(0, offset)
|
||||||
|
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
|
||||||
|
match_boundaries = zip(
|
||||||
|
range(first_match, last_match + 1),
|
||||||
|
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
|
||||||
|
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
|
||||||
|
)
|
||||||
|
cpos_set = set()
|
||||||
|
matches = []
|
||||||
|
for match_num, match_start, match_end in match_boundaries:
|
||||||
|
c = (match_start, match_end)
|
||||||
|
if match_start == 0 or context == 0:
|
||||||
|
lc = None
|
||||||
|
cpos_list_lbound = match_start
|
||||||
|
else:
|
||||||
|
lc_lbound = max(0, (match_start - context))
|
||||||
|
lc_rbound = match_start - 1
|
||||||
|
lc = (lc_lbound, lc_rbound)
|
||||||
|
cpos_list_lbound = lc_lbound
|
||||||
|
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||||
|
rc = None
|
||||||
|
cpos_list_rbound = match_end
|
||||||
|
else:
|
||||||
|
rc_lbound = match_end + 1
|
||||||
|
rc_rbound = min(
|
||||||
|
(match_end + context),
|
||||||
|
(subcorpus.collection.corpus.size - 1)
|
||||||
|
)
|
||||||
|
rc = (rc_lbound, rc_rbound)
|
||||||
|
cpos_list_rbound = rc_rbound
|
||||||
|
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||||
|
matches.append(match)
|
||||||
|
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||||
|
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||||
|
return {'matches': matches, **lookups}
|
@ -10,7 +10,7 @@ def corpus_follower_permission_required(*permissions):
|
|||||||
def decorated_function(*args, **kwargs):
|
def decorated_function(*args, **kwargs):
|
||||||
corpus_id = kwargs.get('corpus_id')
|
corpus_id = kwargs.get('corpus_id')
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
if not (corpus.user == current_user or current_user.is_administrator):
|
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
|
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
|
||||||
if cfa is None:
|
if cfa is None:
|
||||||
abort(403)
|
abort(403)
|
||||||
@ -26,7 +26,7 @@ def corpus_owner_or_admin_required(f):
|
|||||||
def decorated_function(*args, **kwargs):
|
def decorated_function(*args, **kwargs):
|
||||||
corpus_id = kwargs.get('corpus_id')
|
corpus_id = kwargs.get('corpus_id')
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
if not (corpus.user == current_user or current_user.is_administrator):
|
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
return decorated_function
|
return decorated_function
|
@ -15,7 +15,7 @@ def get_corpus(corpus_hashid):
|
|||||||
if not (
|
if not (
|
||||||
corpus.is_public
|
corpus.is_public
|
||||||
or corpus.user == current_user
|
or corpus.user == current_user
|
||||||
or current_user.is_administrator
|
or current_user.is_administrator()
|
||||||
):
|
):
|
||||||
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
|
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
|
||||||
return {
|
return {
|
||||||
@ -38,7 +38,7 @@ def subscribe_corpus(corpus_hashid):
|
|||||||
if not (
|
if not (
|
||||||
corpus.is_public
|
corpus.is_public
|
||||||
or corpus.user == current_user
|
or corpus.user == current_user
|
||||||
or current_user.is_administrator
|
or current_user.is_administrator()
|
||||||
):
|
):
|
||||||
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
|
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
|
||||||
join_room(f'/corpora/{corpus.hashid}')
|
join_room(f'/corpora/{corpus.hashid}')
|
2
app/corpora/files/__init__.py
Normal file
2
app/corpora/files/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
from .. import bp
|
||||||
|
from . import json_routes, routes
|
@ -1,7 +1,7 @@
|
|||||||
from flask import current_app
|
from flask import abort, current_app
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from app.decorators import content_negotiation
|
|
||||||
from app import db
|
from app import db
|
||||||
|
from app.decorators import content_negotiation
|
||||||
from app.models import CorpusFile
|
from app.models import CorpusFile
|
||||||
from ..decorators import corpus_follower_permission_required
|
from ..decorators import corpus_follower_permission_required
|
||||||
from . import bp
|
from . import bp
|
@ -6,19 +6,25 @@ from flask import (
|
|||||||
send_from_directory,
|
send_from_directory,
|
||||||
url_for
|
url_for
|
||||||
)
|
)
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
|
import os
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import Corpus, CorpusFile, CorpusStatus
|
from app.models import Corpus, CorpusFile, CorpusStatus
|
||||||
from ..decorators import corpus_follower_permission_required
|
from ..decorators import corpus_follower_permission_required
|
||||||
|
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
|
||||||
from . import bp
|
from . import bp
|
||||||
from .forms import CreateCorpusFileForm, UpdateCorpusFileForm
|
from .forms import CreateCorpusFileForm, UpdateCorpusFileForm
|
||||||
|
from .utils import corpus_file_dynamic_list_constructor as corpus_file_dlc
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/files')
|
@bp.route('/<hashid:corpus_id>/files')
|
||||||
|
@register_breadcrumb(bp, '.entity.files', 'Files', endpoint_arguments_constructor=corpus_eac)
|
||||||
def corpus_files(corpus_id):
|
def corpus_files(corpus_id):
|
||||||
return redirect(url_for('.corpus', _anchor='files', corpus_id=corpus_id))
|
return redirect(url_for('.corpus', _anchor='files', corpus_id=corpus_id))
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST'])
|
@bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.entity.files.create', 'Create', endpoint_arguments_constructor=corpus_eac)
|
||||||
@corpus_follower_permission_required('MANAGE_FILES')
|
@corpus_follower_permission_required('MANAGE_FILES')
|
||||||
def create_corpus_file(corpus_id):
|
def create_corpus_file(corpus_id):
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
@ -60,6 +66,7 @@ def create_corpus_file(corpus_id):
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST'])
|
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.entity.files.entity', '', dynamic_list_constructor=corpus_file_dlc)
|
||||||
@corpus_follower_permission_required('MANAGE_FILES')
|
@corpus_follower_permission_required('MANAGE_FILES')
|
||||||
def corpus_file(corpus_id, corpus_file_id):
|
def corpus_file(corpus_id, corpus_file_id):
|
||||||
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||||
@ -85,9 +92,9 @@ def corpus_file(corpus_id, corpus_file_id):
|
|||||||
def download_corpus_file(corpus_id, corpus_file_id):
|
def download_corpus_file(corpus_id, corpus_file_id):
|
||||||
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||||
return send_from_directory(
|
return send_from_directory(
|
||||||
corpus_file.path.parent,
|
os.path.dirname(corpus_file.path),
|
||||||
corpus_file.path.name,
|
os.path.basename(corpus_file.path),
|
||||||
as_attachment=True,
|
as_attachment=True,
|
||||||
download_name=corpus_file.filename,
|
attachment_filename=corpus_file.filename,
|
||||||
mimetype=corpus_file.mimetype
|
mimetype=corpus_file.mimetype
|
||||||
)
|
)
|
15
app/corpora/files/utils.py
Normal file
15
app/corpora/files/utils.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from flask import request, url_for
|
||||||
|
from app.models import CorpusFile
|
||||||
|
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
|
||||||
|
|
||||||
|
|
||||||
|
def corpus_file_dynamic_list_constructor():
|
||||||
|
corpus_id = request.view_args['corpus_id']
|
||||||
|
corpus_file_id = request.view_args['corpus_file_id']
|
||||||
|
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'text': f'{corpus_file.author}: {corpus_file.title} ({corpus_file.publishing_year})',
|
||||||
|
'url': url_for('.corpus_file', corpus_id=corpus_id, corpus_file_id=corpus_file_id)
|
||||||
|
}
|
||||||
|
]
|
76
app/corpora/followers/json_routes.py
Normal file
76
app/corpora/followers/json_routes.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
from flask import abort, flash, jsonify, make_response, request
|
||||||
|
from flask_login import current_user
|
||||||
|
from app import db
|
||||||
|
from app.decorators import content_negotiation
|
||||||
|
from app.models import (
|
||||||
|
Corpus,
|
||||||
|
CorpusFollowerAssociation,
|
||||||
|
CorpusFollowerRole,
|
||||||
|
User
|
||||||
|
)
|
||||||
|
from ..decorators import corpus_follower_permission_required
|
||||||
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
|
# @bp.route('/<hashid:corpus_id>/followers', methods=['POST'])
|
||||||
|
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||||
|
# @content_negotiation(consumes='application/json', produces='application/json')
|
||||||
|
# def create_corpus_followers(corpus_id):
|
||||||
|
# usernames = request.json
|
||||||
|
# if not (isinstance(usernames, list) or all(isinstance(u, str) for u in usernames)):
|
||||||
|
# abort(400)
|
||||||
|
# corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
|
# for username in usernames:
|
||||||
|
# user = User.query.filter_by(username=username, is_public=True).first_or_404()
|
||||||
|
# user.follow_corpus(corpus)
|
||||||
|
# db.session.commit()
|
||||||
|
# response_data = {
|
||||||
|
# 'message': f'Users are now following "{corpus.title}"',
|
||||||
|
# 'category': 'corpus'
|
||||||
|
# }
|
||||||
|
# return response_data, 200
|
||||||
|
|
||||||
|
|
||||||
|
# @bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>/role', methods=['PUT'])
|
||||||
|
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||||
|
# @content_negotiation(consumes='application/json', produces='application/json')
|
||||||
|
# def update_corpus_follower_role(corpus_id, follower_id):
|
||||||
|
# role_name = request.json
|
||||||
|
# if not isinstance(role_name, str):
|
||||||
|
# abort(400)
|
||||||
|
# cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||||
|
# if cfr is None:
|
||||||
|
# abort(400)
|
||||||
|
# cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
|
||||||
|
# cfa.role = cfr
|
||||||
|
# db.session.commit()
|
||||||
|
# response_data = {
|
||||||
|
# 'message': f'User "{cfa.follower.username}" is now {cfa.role.name}',
|
||||||
|
# 'category': 'corpus'
|
||||||
|
# }
|
||||||
|
# return response_data, 200
|
||||||
|
|
||||||
|
|
||||||
|
# @bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>', methods=['DELETE'])
|
||||||
|
# def delete_corpus_follower(corpus_id, follower_id):
|
||||||
|
# cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
|
||||||
|
# if not (
|
||||||
|
# current_user.id == follower_id
|
||||||
|
# or current_user == cfa.corpus.user
|
||||||
|
# or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
|
||||||
|
# or current_user.is_administrator()):
|
||||||
|
# abort(403)
|
||||||
|
# if current_user.id == follower_id:
|
||||||
|
# flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')
|
||||||
|
# response = make_response()
|
||||||
|
# response.status_code = 204
|
||||||
|
# else:
|
||||||
|
# response_data = {
|
||||||
|
# 'message': f'"{cfa.follower.username}" is not following "{cfa.corpus.title}" anymore',
|
||||||
|
# 'category': 'corpus'
|
||||||
|
# }
|
||||||
|
# response = jsonify(response_data)
|
||||||
|
# response.status_code = 200
|
||||||
|
# cfa.follower.unfollow_corpus(cfa.corpus)
|
||||||
|
# db.session.commit()
|
||||||
|
# return response
|
@ -61,7 +61,7 @@ def build_corpus(corpus_id):
|
|||||||
@bp.route('/stopwords')
|
@bp.route('/stopwords')
|
||||||
@content_negotiation(produces='application/json')
|
@content_negotiation(produces='application/json')
|
||||||
def get_stopwords():
|
def get_stopwords():
|
||||||
nltk.download('stopwords', quiet=True)
|
nltk.download('stopwords')
|
||||||
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
|
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
|
||||||
stopwords = {}
|
stopwords = {}
|
||||||
for language in languages:
|
for language in languages:
|
||||||
@ -71,55 +71,55 @@ def get_stopwords():
|
|||||||
response_data = stopwords
|
response_data = stopwords
|
||||||
return response_data, 202
|
return response_data, 202
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
|
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
|
||||||
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||||
@content_negotiation(consumes='application/json', produces='application/json')
|
# @content_negotiation(consumes='application/json', produces='application/json')
|
||||||
def generate_corpus_share_link(corpus_id):
|
# def generate_corpus_share_link(corpus_id):
|
||||||
data = request.json
|
# data = request.json
|
||||||
if not isinstance(data, dict):
|
# if not isinstance(data, dict):
|
||||||
abort(400)
|
# abort(400)
|
||||||
expiration = data.get('expiration')
|
# expiration = data.get('expiration')
|
||||||
if not isinstance(expiration, str):
|
# if not isinstance(expiration, str):
|
||||||
abort(400)
|
# abort(400)
|
||||||
role_name = data.get('role')
|
# role_name = data.get('role')
|
||||||
if not isinstance(role_name, str):
|
# if not isinstance(role_name, str):
|
||||||
abort(400)
|
# abort(400)
|
||||||
expiration_date = datetime.strptime(expiration, '%b %d, %Y')
|
# expiration_date = datetime.strptime(expiration, '%b %d, %Y')
|
||||||
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
# cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||||
if cfr is None:
|
# if cfr is None:
|
||||||
abort(400)
|
# abort(400)
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
# corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date)
|
# token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date)
|
||||||
corpus_share_link = url_for(
|
# corpus_share_link = url_for(
|
||||||
'corpora.follow_corpus',
|
# 'corpora.follow_corpus',
|
||||||
corpus_id=corpus_id,
|
# corpus_id=corpus_id,
|
||||||
token=token,
|
# token=token,
|
||||||
_external=True
|
# _external=True
|
||||||
)
|
# )
|
||||||
response_data = {
|
# response_data = {
|
||||||
'message': 'Corpus share link generated',
|
# 'message': 'Corpus share link generated',
|
||||||
'category': 'corpus',
|
# 'category': 'corpus',
|
||||||
'corpusShareLink': corpus_share_link
|
# 'corpusShareLink': corpus_share_link
|
||||||
}
|
# }
|
||||||
return response_data, 200
|
# return response_data, 200
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
|
# @bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
|
||||||
@corpus_owner_or_admin_required
|
# @corpus_owner_or_admin_required
|
||||||
@content_negotiation(consumes='application/json', produces='application/json')
|
# @content_negotiation(consumes='application/json', produces='application/json')
|
||||||
def update_corpus_is_public(corpus_id):
|
# def update_corpus_is_public(corpus_id):
|
||||||
is_public = request.json
|
# is_public = request.json
|
||||||
if not isinstance(is_public, bool):
|
# if not isinstance(is_public, bool):
|
||||||
abort(400)
|
# abort(400)
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
# corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
corpus.is_public = is_public
|
# corpus.is_public = is_public
|
||||||
db.session.commit()
|
# db.session.commit()
|
||||||
response_data = {
|
# response_data = {
|
||||||
'message': (
|
# 'message': (
|
||||||
f'Corpus "{corpus.title}" is now'
|
# f'Corpus "{corpus.title}" is now'
|
||||||
f' {"public" if is_public else "private"}'
|
# f' {"public" if is_public else "private"}'
|
||||||
),
|
# ),
|
||||||
'category': 'corpus'
|
# 'category': 'corpus'
|
||||||
}
|
# }
|
||||||
return response_data, 200
|
# return response_data, 200
|
@ -1,4 +1,5 @@
|
|||||||
from flask import abort, flash, redirect, render_template, url_for
|
from flask import abort, flash, redirect, render_template, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import (
|
from app.models import (
|
||||||
@ -10,14 +11,20 @@ from app.models import (
|
|||||||
from . import bp
|
from . import bp
|
||||||
from .decorators import corpus_follower_permission_required
|
from .decorators import corpus_follower_permission_required
|
||||||
from .forms import CreateCorpusForm
|
from .forms import CreateCorpusForm
|
||||||
|
from .utils import (
|
||||||
|
corpus_endpoint_arguments_constructor as corpus_eac,
|
||||||
|
corpus_dynamic_list_constructor as corpus_dlc
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('')
|
@bp.route('')
|
||||||
|
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">I</i>My Corpora')
|
||||||
def corpora():
|
def corpora():
|
||||||
return redirect(url_for('main.dashboard', _anchor='corpora'))
|
return redirect(url_for('main.dashboard', _anchor='corpora'))
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/create', methods=['GET', 'POST'])
|
@bp.route('/create', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.create', 'Create')
|
||||||
def create_corpus():
|
def create_corpus():
|
||||||
form = CreateCorpusForm()
|
form = CreateCorpusForm()
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
@ -40,6 +47,7 @@ def create_corpus():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>')
|
@bp.route('/<hashid:corpus_id>')
|
||||||
|
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=corpus_dlc)
|
||||||
def corpus(corpus_id):
|
def corpus(corpus_id):
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
cfrs = CorpusFollowerRole.query.all()
|
cfrs = CorpusFollowerRole.query.all()
|
||||||
@ -47,38 +55,40 @@ def corpus(corpus_id):
|
|||||||
users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all()
|
users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all()
|
||||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
|
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
|
||||||
if cfa is None:
|
if cfa is None:
|
||||||
if corpus.user == current_user or current_user.is_administrator:
|
if corpus.user == current_user or current_user.is_administrator():
|
||||||
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
|
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
|
||||||
else:
|
else:
|
||||||
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
|
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
|
||||||
else:
|
else:
|
||||||
cfr = cfa.role
|
cfr = cfa.role
|
||||||
if corpus.user == current_user or current_user.is_administrator:
|
if corpus.user == current_user or current_user.is_administrator():
|
||||||
return render_template(
|
return render_template(
|
||||||
'corpora/corpus.html.j2',
|
'corpora/corpus.html.j2',
|
||||||
title=corpus.title,
|
title=corpus.title,
|
||||||
corpus=corpus,
|
corpus=corpus,
|
||||||
cfr=cfr,
|
cfr=cfr,
|
||||||
cfrs=cfrs,
|
cfrs=cfrs,
|
||||||
users=users
|
users = users
|
||||||
)
|
)
|
||||||
if (current_user.is_following_corpus(corpus) or corpus.is_public):
|
if (current_user.is_following_corpus(corpus) or corpus.is_public):
|
||||||
cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all()
|
abort(404)
|
||||||
return render_template(
|
# cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all()
|
||||||
'corpora/public_corpus.html.j2',
|
# return render_template(
|
||||||
title=corpus.title,
|
# 'corpora/public_corpus.html.j2',
|
||||||
corpus=corpus,
|
# title=corpus.title,
|
||||||
cfrs=cfrs,
|
# corpus=corpus,
|
||||||
cfr=cfr,
|
# cfrs=cfrs,
|
||||||
cfas=cfas,
|
# cfr=cfr,
|
||||||
users=users
|
# cfas=cfas,
|
||||||
)
|
# users = users
|
||||||
|
# )
|
||||||
abort(403)
|
abort(403)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/analysis')
|
@bp.route('/<hashid:corpus_id>/analysis')
|
||||||
@corpus_follower_permission_required('VIEW')
|
@corpus_follower_permission_required('VIEW')
|
||||||
|
@register_breadcrumb(bp, '.entity.analysis', 'Analysis', endpoint_arguments_constructor=corpus_eac)
|
||||||
def analysis(corpus_id):
|
def analysis(corpus_id):
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
return render_template(
|
return render_template(
|
||||||
@ -88,22 +98,24 @@ def analysis(corpus_id):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/follow/<token>')
|
# @bp.route('/<hashid:corpus_id>/follow/<token>')
|
||||||
def follow_corpus(corpus_id, token):
|
# def follow_corpus(corpus_id, token):
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
# corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
if current_user.follow_corpus_by_token(token):
|
# if current_user.follow_corpus_by_token(token):
|
||||||
db.session.commit()
|
# db.session.commit()
|
||||||
flash(f'You are following "{corpus.title}" now', category='corpus')
|
# flash(f'You are following "{corpus.title}" now', category='corpus')
|
||||||
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
# return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
||||||
abort(403)
|
# abort(403)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/import', methods=['GET', 'POST'])
|
@bp.route('/import', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.import', 'Import')
|
||||||
def import_corpus():
|
def import_corpus():
|
||||||
abort(503)
|
abort(503)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/export')
|
@bp.route('/<hashid:corpus_id>/export')
|
||||||
@corpus_follower_permission_required('VIEW')
|
@corpus_follower_permission_required('VIEW')
|
||||||
|
@register_breadcrumb(bp, '.entity.export', 'Export', endpoint_arguments_constructor=corpus_eac)
|
||||||
def export_corpus(corpus_id):
|
def export_corpus(corpus_id):
|
||||||
abort(503)
|
abort(503)
|
17
app/corpora/utils.py
Normal file
17
app/corpora/utils.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from flask import request, url_for
|
||||||
|
from app.models import Corpus
|
||||||
|
|
||||||
|
|
||||||
|
def corpus_endpoint_arguments_constructor():
|
||||||
|
return {'corpus_id': request.view_args['corpus_id']}
|
||||||
|
|
||||||
|
|
||||||
|
def corpus_dynamic_list_constructor():
|
||||||
|
corpus_id = request.view_args['corpus_id']
|
||||||
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'text': f'<i class="material-icons left">book</i>{corpus.title}',
|
||||||
|
'url': url_for('.corpus', corpus_id=corpus_id)
|
||||||
|
}
|
||||||
|
]
|
11
app/daemon/__init__.py
Normal file
11
app/daemon/__init__.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
from app import db
|
||||||
|
from flask import Flask
|
||||||
|
from .corpus_utils import check_corpora
|
||||||
|
from .job_utils import check_jobs
|
||||||
|
|
||||||
|
|
||||||
|
def daemon(app: Flask):
|
||||||
|
with app.app_context():
|
||||||
|
check_corpora()
|
||||||
|
check_jobs()
|
||||||
|
db.session.commit()
|
@ -1,16 +1,12 @@
|
|||||||
|
from app import docker_client
|
||||||
|
from app.models import Corpus, CorpusStatus
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
import docker
|
import docker
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from app import db, docker_client, scheduler
|
|
||||||
from app.models import Corpus, CorpusStatus
|
|
||||||
|
|
||||||
|
|
||||||
def handle_corpora():
|
def check_corpora():
|
||||||
with scheduler.app.app_context():
|
|
||||||
_handle_corpora()
|
|
||||||
|
|
||||||
def _handle_corpora():
|
|
||||||
corpora = Corpus.query.all()
|
corpora = Corpus.query.all()
|
||||||
for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
|
for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
|
||||||
_create_build_corpus_service(corpus)
|
_create_build_corpus_service(corpus)
|
||||||
@ -21,14 +17,13 @@ def _handle_corpora():
|
|||||||
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
|
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
|
||||||
corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
|
corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||||
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
|
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
|
||||||
_checkout_cqpserver_container(corpus)
|
_checkout_analysing_corpus_container(corpus)
|
||||||
for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
|
for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
|
||||||
_create_cqpserver_container(corpus)
|
_create_cqpserver_container(corpus)
|
||||||
for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
|
for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
|
||||||
_remove_cqpserver_container(corpus)
|
_remove_cqpserver_container(corpus)
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def _create_build_corpus_service(corpus: Corpus):
|
def _create_build_corpus_service(corpus):
|
||||||
''' # Docker service settings # '''
|
''' # Docker service settings # '''
|
||||||
''' ## Command ## '''
|
''' ## Command ## '''
|
||||||
command = ['bash', '-c']
|
command = ['bash', '-c']
|
||||||
@ -50,10 +45,12 @@ def _create_build_corpus_service(corpus: Corpus):
|
|||||||
''' ## Constraints ## '''
|
''' ## Constraints ## '''
|
||||||
constraints = ['node.role==worker']
|
constraints = ['node.role==worker']
|
||||||
''' ## Image ## '''
|
''' ## Image ## '''
|
||||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1887'
|
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
|
||||||
''' ## Labels ## '''
|
''' ## Labels ## '''
|
||||||
labels = {
|
labels = {
|
||||||
'nopaque.server_name': current_app.config['SERVER_NAME']
|
'origin': current_app.config['SERVER_NAME'],
|
||||||
|
'type': 'corpus.build',
|
||||||
|
'corpus_id': str(corpus.id)
|
||||||
}
|
}
|
||||||
''' ## Mounts ## '''
|
''' ## Mounts ## '''
|
||||||
mounts = []
|
mounts = []
|
||||||
@ -98,7 +95,7 @@ def _create_build_corpus_service(corpus: Corpus):
|
|||||||
return
|
return
|
||||||
corpus.status = CorpusStatus.QUEUED
|
corpus.status = CorpusStatus.QUEUED
|
||||||
|
|
||||||
def _checkout_build_corpus_service(corpus: Corpus):
|
def _checkout_build_corpus_service(corpus):
|
||||||
service_name = f'build-corpus_{corpus.id}'
|
service_name = f'build-corpus_{corpus.id}'
|
||||||
try:
|
try:
|
||||||
service = docker_client.services.get(service_name)
|
service = docker_client.services.get(service_name)
|
||||||
@ -126,7 +123,8 @@ def _checkout_build_corpus_service(corpus: Corpus):
|
|||||||
except docker.errors.DockerException as e:
|
except docker.errors.DockerException as e:
|
||||||
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
|
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
|
||||||
|
|
||||||
def _create_cqpserver_container(corpus: Corpus):
|
def _create_cqpserver_container(corpus):
|
||||||
|
''' # Docker container settings # '''
|
||||||
''' ## Command ## '''
|
''' ## Command ## '''
|
||||||
command = []
|
command = []
|
||||||
command.append(
|
command.append(
|
||||||
@ -141,11 +139,11 @@ def _create_cqpserver_container(corpus: Corpus):
|
|||||||
''' ## Entrypoint ## '''
|
''' ## Entrypoint ## '''
|
||||||
entrypoint = ['bash', '-c']
|
entrypoint = ['bash', '-c']
|
||||||
''' ## Image ## '''
|
''' ## Image ## '''
|
||||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1887'
|
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
|
||||||
''' ## Name ## '''
|
''' ## Name ## '''
|
||||||
name = f'nopaque-cqpserver-{corpus.id}'
|
name = f'cqpserver_{corpus.id}'
|
||||||
''' ## Network ## '''
|
''' ## Network ## '''
|
||||||
network = f'{current_app.config["NOPAQUE_DOCKER_NETWORK_NAME"]}'
|
network = f'{current_app.config["DOCKER_NETWORK_NAME"]}'
|
||||||
''' ## Volumes ## '''
|
''' ## Volumes ## '''
|
||||||
volumes = []
|
volumes = []
|
||||||
''' ### Corpus data volume ### '''
|
''' ### Corpus data volume ### '''
|
||||||
@ -200,8 +198,8 @@ def _create_cqpserver_container(corpus: Corpus):
|
|||||||
return
|
return
|
||||||
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
|
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
|
||||||
|
|
||||||
def _checkout_cqpserver_container(corpus: Corpus):
|
def _checkout_analysing_corpus_container(corpus):
|
||||||
container_name = f'nopaque-cqpserver-{corpus.id}'
|
container_name = f'cqpserver_{corpus.id}'
|
||||||
try:
|
try:
|
||||||
docker_client.containers.get(container_name)
|
docker_client.containers.get(container_name)
|
||||||
except docker.errors.NotFound as e:
|
except docker.errors.NotFound as e:
|
||||||
@ -211,8 +209,8 @@ def _checkout_cqpserver_container(corpus: Corpus):
|
|||||||
except docker.errors.DockerException as e:
|
except docker.errors.DockerException as e:
|
||||||
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
|
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
|
||||||
|
|
||||||
def _remove_cqpserver_container(corpus: Corpus):
|
def _remove_cqpserver_container(corpus):
|
||||||
container_name = f'nopaque-cqpserver-{corpus.id}'
|
container_name = f'cqpserver_{corpus.id}'
|
||||||
try:
|
try:
|
||||||
container = docker_client.containers.get(container_name)
|
container = docker_client.containers.get(container_name)
|
||||||
except docker.errors.NotFound:
|
except docker.errors.NotFound:
|
@ -1,11 +1,4 @@
|
|||||||
from datetime import datetime
|
from app import db, docker_client, hashids
|
||||||
from flask import current_app
|
|
||||||
from werkzeug.utils import secure_filename
|
|
||||||
import docker
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
from app import db, docker_client, hashids, scheduler
|
|
||||||
from app.models import (
|
from app.models import (
|
||||||
Job,
|
Job,
|
||||||
JobResult,
|
JobResult,
|
||||||
@ -13,13 +6,16 @@ from app.models import (
|
|||||||
TesseractOCRPipelineModel,
|
TesseractOCRPipelineModel,
|
||||||
SpaCyNLPPipelineModel
|
SpaCyNLPPipelineModel
|
||||||
)
|
)
|
||||||
|
from datetime import datetime
|
||||||
|
from flask import current_app
|
||||||
|
from werkzeug.utils import secure_filename
|
||||||
|
import docker
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
def handle_jobs():
|
def check_jobs():
|
||||||
with scheduler.app.app_context():
|
|
||||||
_handle_jobs()
|
|
||||||
|
|
||||||
def _handle_jobs():
|
|
||||||
jobs = Job.query.all()
|
jobs = Job.query.all()
|
||||||
for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
|
for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
|
||||||
_create_job_service(job)
|
_create_job_service(job)
|
||||||
@ -27,9 +23,8 @@ def _handle_jobs():
|
|||||||
_checkout_job_service(job)
|
_checkout_job_service(job)
|
||||||
for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
|
for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
|
||||||
_remove_job_service(job)
|
_remove_job_service(job)
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def _create_job_service(job: Job):
|
def _create_job_service(job):
|
||||||
''' # Docker service settings # '''
|
''' # Docker service settings # '''
|
||||||
''' ## Service specific settings ## '''
|
''' ## Service specific settings ## '''
|
||||||
if job.service == 'file-setup-pipeline':
|
if job.service == 'file-setup-pipeline':
|
||||||
@ -86,7 +81,9 @@ def _create_job_service(job: Job):
|
|||||||
constraints = ['node.role==worker']
|
constraints = ['node.role==worker']
|
||||||
''' ## Labels ## '''
|
''' ## Labels ## '''
|
||||||
labels = {
|
labels = {
|
||||||
'origin': current_app.config['SERVER_NAME']
|
'origin': current_app.config['SERVER_NAME'],
|
||||||
|
'type': 'job',
|
||||||
|
'job_id': str(job.id)
|
||||||
}
|
}
|
||||||
''' ## Mounts ## '''
|
''' ## Mounts ## '''
|
||||||
mounts = []
|
mounts = []
|
||||||
@ -167,7 +164,7 @@ def _create_job_service(job: Job):
|
|||||||
return
|
return
|
||||||
job.status = JobStatus.QUEUED
|
job.status = JobStatus.QUEUED
|
||||||
|
|
||||||
def _checkout_job_service(job: Job):
|
def _checkout_job_service(job):
|
||||||
service_name = f'job_{job.id}'
|
service_name = f'job_{job.id}'
|
||||||
try:
|
try:
|
||||||
service = docker_client.services.get(service_name)
|
service = docker_client.services.get(service_name)
|
||||||
@ -216,7 +213,7 @@ def _checkout_job_service(job: Job):
|
|||||||
except docker.errors.DockerException as e:
|
except docker.errors.DockerException as e:
|
||||||
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
|
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
|
||||||
|
|
||||||
def _remove_job_service(job: Job):
|
def _remove_job_service(job):
|
||||||
service_name = f'job_{job.id}'
|
service_name = f'job_{job.id}'
|
||||||
try:
|
try:
|
||||||
service = docker_client.services.get(service_name)
|
service = docker_client.services.get(service_name)
|
@ -1,7 +1,8 @@
|
|||||||
from flask import abort, request
|
from flask import abort, current_app, request
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from typing import Optional
|
from threading import Thread
|
||||||
|
from typing import List, Union
|
||||||
from werkzeug.exceptions import NotAcceptable
|
from werkzeug.exceptions import NotAcceptable
|
||||||
from app.models import Permission
|
from app.models import Permission
|
||||||
|
|
||||||
@ -23,21 +24,22 @@ def admin_required(f):
|
|||||||
|
|
||||||
def socketio_login_required(f):
|
def socketio_login_required(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrapper(*args, **kwargs):
|
def decorated_function(*args, **kwargs):
|
||||||
if current_user.is_authenticated:
|
if current_user.is_authenticated:
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
return {'status': 401, 'statusText': 'Unauthorized'}
|
else:
|
||||||
return wrapper
|
return {'code': 401, 'msg': 'Unauthorized'}
|
||||||
|
return decorated_function
|
||||||
|
|
||||||
|
|
||||||
def socketio_permission_required(permission):
|
def socketio_permission_required(permission):
|
||||||
def decorator(f):
|
def decorator(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrapper(*args, **kwargs):
|
def decorated_function(*args, **kwargs):
|
||||||
if not current_user.can(permission):
|
if not current_user.can(permission):
|
||||||
return {'status': 403, 'statusText': 'Forbidden'}
|
return {'code': 403, 'msg': 'Forbidden'}
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
return wrapper
|
return decorated_function
|
||||||
return decorator
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
@ -45,9 +47,27 @@ def socketio_admin_required(f):
|
|||||||
return socketio_permission_required(Permission.ADMINISTRATE)(f)
|
return socketio_permission_required(Permission.ADMINISTRATE)(f)
|
||||||
|
|
||||||
|
|
||||||
|
def background(f):
|
||||||
|
'''
|
||||||
|
' This decorator executes a function in a Thread.
|
||||||
|
' Decorated functions need to be executed within a code block where an
|
||||||
|
' app context exists.
|
||||||
|
'
|
||||||
|
' NOTE: An app object is passed as a keyword argument to the decorated
|
||||||
|
' function.
|
||||||
|
'''
|
||||||
|
@wraps(f)
|
||||||
|
def wrapped(*args, **kwargs):
|
||||||
|
kwargs['app'] = current_app._get_current_object()
|
||||||
|
thread = Thread(target=f, args=args, kwargs=kwargs)
|
||||||
|
thread.start()
|
||||||
|
return thread
|
||||||
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
def content_negotiation(
|
def content_negotiation(
|
||||||
produces: Optional[str | list[str]] = None,
|
produces: Union[str, List[str], None] = None,
|
||||||
consumes: Optional[str | list[str]] = None
|
consumes: Union[str, List[str], None] = None
|
||||||
):
|
):
|
||||||
def decorator(f):
|
def decorator(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
|
31
app/email.py
31
app/email.py
@ -1,32 +1,25 @@
|
|||||||
from flask import current_app, Flask, render_template
|
from flask import current_app, render_template
|
||||||
from flask_mail import Message
|
from flask_mail import Message
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from app import mail
|
from app import mail
|
||||||
|
|
||||||
|
|
||||||
def create_message(
|
def create_message(recipient, subject, template, **kwargs):
|
||||||
recipient: str,
|
subject_prefix: str = current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX']
|
||||||
subject: str,
|
msg: Message = Message(
|
||||||
template: str,
|
body=render_template(f'{template}.txt.j2', **kwargs),
|
||||||
**context
|
html=render_template(f'{template}.html.j2', **kwargs),
|
||||||
) -> Message:
|
|
||||||
message = Message(
|
|
||||||
body=render_template(f'{template}.txt.j2', **context),
|
|
||||||
html=render_template(f'{template}.html.j2', **context),
|
|
||||||
recipients=[recipient],
|
recipients=[recipient],
|
||||||
subject=f'[nopaque] {subject}'
|
subject=f'{subject_prefix} {subject}'
|
||||||
)
|
)
|
||||||
return message
|
return msg
|
||||||
|
|
||||||
|
|
||||||
def send(message: Message) -> Thread:
|
def send(msg, *args, **kwargs):
|
||||||
def _send(app: Flask, message: Message):
|
def _send(app, msg):
|
||||||
with app.app_context():
|
with app.app_context():
|
||||||
mail.send(message)
|
mail.send(msg)
|
||||||
|
|
||||||
thread = Thread(
|
thread = Thread(target=_send, args=[current_app._get_current_object(), msg])
|
||||||
target=_send,
|
|
||||||
args=[current_app._get_current_object(), message]
|
|
||||||
)
|
|
||||||
thread.start()
|
thread.start()
|
||||||
return thread
|
return thread
|
||||||
|
@ -1,2 +0,0 @@
|
|||||||
from .types import ContainerColumn
|
|
||||||
from .types import IntEnumColumn
|
|
@ -1,42 +0,0 @@
|
|||||||
import json
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
class ContainerColumn(db.TypeDecorator):
|
|
||||||
impl = db.String
|
|
||||||
|
|
||||||
def __init__(self, container_type, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.container_type = container_type
|
|
||||||
|
|
||||||
def process_bind_param(self, value, dialect):
|
|
||||||
if isinstance(value, self.container_type):
|
|
||||||
return json.dumps(value)
|
|
||||||
elif isinstance(value, str) and isinstance(json.loads(value), self.container_type):
|
|
||||||
return value
|
|
||||||
else:
|
|
||||||
return TypeError()
|
|
||||||
|
|
||||||
def process_result_value(self, value, dialect):
|
|
||||||
return json.loads(value)
|
|
||||||
|
|
||||||
|
|
||||||
class IntEnumColumn(db.TypeDecorator):
|
|
||||||
impl = db.Integer
|
|
||||||
|
|
||||||
def __init__(self, enum_type, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.enum_type = enum_type
|
|
||||||
|
|
||||||
def process_bind_param(self, value, dialect):
|
|
||||||
if isinstance(value, self.enum_type) and isinstance(value.value, int):
|
|
||||||
return value.value
|
|
||||||
elif isinstance(value, int):
|
|
||||||
return self.enum_type(value).value
|
|
||||||
elif isinstance(value, str):
|
|
||||||
return self.enum_type[value].value
|
|
||||||
else:
|
|
||||||
return TypeError()
|
|
||||||
|
|
||||||
def process_result_value(self, value, dialect):
|
|
||||||
return self.enum_type(value)
|
|
@ -1,2 +1,18 @@
|
|||||||
from .handle_corpora import handle_corpora
|
from flask import Blueprint
|
||||||
from .handle_jobs import handle_jobs
|
from flask_login import login_required
|
||||||
|
|
||||||
|
|
||||||
|
bp = Blueprint('jobs', __name__)
|
||||||
|
|
||||||
|
|
||||||
|
@bp.before_request
|
||||||
|
@login_required
|
||||||
|
def before_request():
|
||||||
|
'''
|
||||||
|
Ensures that the routes in this package can only be visited by users that
|
||||||
|
are logged in.
|
||||||
|
'''
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
from . import routes, json_routes
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
from flask import abort, current_app
|
from flask import abort, current_app
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
import os
|
||||||
from app import db
|
from app import db
|
||||||
from app.decorators import admin_required, content_negotiation
|
from app.decorators import admin_required, content_negotiation
|
||||||
from app.models import Job, JobStatus
|
from app.models import Job, JobStatus
|
||||||
@ -17,7 +18,7 @@ def delete_job(job_id):
|
|||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
job = Job.query.get_or_404(job_id)
|
job = Job.query.get_or_404(job_id)
|
||||||
if not (job.user == current_user or current_user.is_administrator):
|
if not (job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
thread = Thread(
|
thread = Thread(
|
||||||
target=_delete_job,
|
target=_delete_job,
|
||||||
@ -38,7 +39,7 @@ def job_log(job_id):
|
|||||||
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
||||||
response = {'errors': {'message': 'Job status is not completed or failed'}}
|
response = {'errors': {'message': 'Job status is not completed or failed'}}
|
||||||
return response, 409
|
return response, 409
|
||||||
with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
|
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
|
||||||
log = log_file.read()
|
log = log_file.read()
|
||||||
response_data = {
|
response_data = {
|
||||||
'jobLog': log
|
'jobLog': log
|
||||||
@ -56,7 +57,7 @@ def restart_job(job_id):
|
|||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
job = Job.query.get_or_404(job_id)
|
job = Job.query.get_or_404(job_id)
|
||||||
if not (job.user == current_user or current_user.is_administrator):
|
if not (job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
if job.status == JobStatus.FAILED:
|
if job.status == JobStatus.FAILED:
|
||||||
response = {'errors': {'message': 'Job status is not "failed"'}}
|
response = {'errors': {'message': 'Job status is not "failed"'}}
|
@ -5,20 +5,25 @@ from flask import (
|
|||||||
send_from_directory,
|
send_from_directory,
|
||||||
url_for
|
url_for
|
||||||
)
|
)
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
|
import os
|
||||||
from app.models import Job, JobInput, JobResult
|
from app.models import Job, JobInput, JobResult
|
||||||
from . import bp
|
from . import bp
|
||||||
|
from .utils import job_dynamic_list_constructor as job_dlc
|
||||||
|
|
||||||
|
|
||||||
@bp.route('')
|
@bp.route('')
|
||||||
def jobs():
|
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">J</i>My Jobs')
|
||||||
|
def corpora():
|
||||||
return redirect(url_for('main.dashboard', _anchor='jobs'))
|
return redirect(url_for('main.dashboard', _anchor='jobs'))
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:job_id>')
|
@bp.route('/<hashid:job_id>')
|
||||||
|
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=job_dlc)
|
||||||
def job(job_id):
|
def job(job_id):
|
||||||
job = Job.query.get_or_404(job_id)
|
job = Job.query.get_or_404(job_id)
|
||||||
if not (job.user == current_user or current_user.is_administrator):
|
if not (job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return render_template(
|
return render_template(
|
||||||
'jobs/job.html.j2',
|
'jobs/job.html.j2',
|
||||||
@ -30,13 +35,13 @@ def job(job_id):
|
|||||||
@bp.route('/<hashid:job_id>/inputs/<hashid:job_input_id>/download')
|
@bp.route('/<hashid:job_id>/inputs/<hashid:job_input_id>/download')
|
||||||
def download_job_input(job_id, job_input_id):
|
def download_job_input(job_id, job_input_id):
|
||||||
job_input = JobInput.query.filter_by(job_id=job_id, id=job_input_id).first_or_404()
|
job_input = JobInput.query.filter_by(job_id=job_id, id=job_input_id).first_or_404()
|
||||||
if not (job_input.job.user == current_user or current_user.is_administrator):
|
if not (job_input.job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return send_from_directory(
|
return send_from_directory(
|
||||||
job_input.path.parent,
|
os.path.dirname(job_input.path),
|
||||||
job_input.path.name,
|
os.path.basename(job_input.path),
|
||||||
as_attachment=True,
|
as_attachment=True,
|
||||||
download_name=job_input.filename,
|
attachment_filename=job_input.filename,
|
||||||
mimetype=job_input.mimetype
|
mimetype=job_input.mimetype
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -44,12 +49,12 @@ def download_job_input(job_id, job_input_id):
|
|||||||
@bp.route('/<hashid:job_id>/results/<hashid:job_result_id>/download')
|
@bp.route('/<hashid:job_id>/results/<hashid:job_result_id>/download')
|
||||||
def download_job_result(job_id, job_result_id):
|
def download_job_result(job_id, job_result_id):
|
||||||
job_result = JobResult.query.filter_by(job_id=job_id, id=job_result_id).first_or_404()
|
job_result = JobResult.query.filter_by(job_id=job_id, id=job_result_id).first_or_404()
|
||||||
if not (job_result.job.user == current_user or current_user.is_administrator):
|
if not (job_result.job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return send_from_directory(
|
return send_from_directory(
|
||||||
job_result.path.parent,
|
os.path.dirname(job_result.path),
|
||||||
job_result.path.name,
|
os.path.basename(job_result.path),
|
||||||
as_attachment=True,
|
as_attachment=True,
|
||||||
download_name=job_result.filename,
|
attachment_filename=job_result.filename,
|
||||||
mimetype=job_result.mimetype
|
mimetype=job_result.mimetype
|
||||||
)
|
)
|
13
app/jobs/utils.py
Normal file
13
app/jobs/utils.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from flask import request, url_for
|
||||||
|
from app.models import Job
|
||||||
|
|
||||||
|
|
||||||
|
def job_dynamic_list_constructor():
|
||||||
|
job_id = request.view_args['job_id']
|
||||||
|
job = Job.query.get_or_404(job_id)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'text': f'<i class="nopaque-icons left service-icons" data-service="{job.service}"></i>{job.title}',
|
||||||
|
'url': url_for('.job', job_id=job_id)
|
||||||
|
}
|
||||||
|
]
|
@ -1,9 +1,7 @@
|
|||||||
from flask import current_app
|
from flask import current_app
|
||||||
from flask_migrate import upgrade
|
from flask_migrate import upgrade
|
||||||
from pathlib import Path
|
import os
|
||||||
from app import db
|
|
||||||
from app.models import (
|
from app.models import (
|
||||||
Corpus,
|
|
||||||
CorpusFollowerRole,
|
CorpusFollowerRole,
|
||||||
Role,
|
Role,
|
||||||
SpaCyNLPPipelineModel,
|
SpaCyNLPPipelineModel,
|
||||||
@ -16,22 +14,25 @@ from . import bp
|
|||||||
@bp.cli.command('deploy')
|
@bp.cli.command('deploy')
|
||||||
def deploy():
|
def deploy():
|
||||||
''' Run deployment tasks. '''
|
''' Run deployment tasks. '''
|
||||||
|
# Make default directories
|
||||||
print('Make default directories')
|
print('Make default directories')
|
||||||
base_dir = current_app.config['NOPAQUE_DATA_DIR']
|
base_dir = current_app.config['NOPAQUE_DATA_DIR']
|
||||||
default_dirs: list[Path] = [
|
default_dirs = [
|
||||||
base_dir / 'tmp',
|
os.path.join(base_dir, 'tmp'),
|
||||||
base_dir / 'users'
|
os.path.join(base_dir, 'users')
|
||||||
]
|
]
|
||||||
for default_dir in default_dirs:
|
for dir in default_dirs:
|
||||||
if not default_dir.exists():
|
if os.path.exists(dir):
|
||||||
default_dir.mkdir()
|
if not os.path.isdir(dir):
|
||||||
if not default_dir.is_dir():
|
raise NotADirectoryError(f'{dir} is not a directory')
|
||||||
raise NotADirectoryError(f'{default_dir} is not a directory')
|
else:
|
||||||
|
os.mkdir(dir)
|
||||||
|
|
||||||
|
# migrate database to latest revision
|
||||||
print('Migrate database to latest revision')
|
print('Migrate database to latest revision')
|
||||||
upgrade()
|
upgrade()
|
||||||
|
|
||||||
|
# Insert/Update default database values
|
||||||
print('Insert/Update default Roles')
|
print('Insert/Update default Roles')
|
||||||
Role.insert_defaults()
|
Role.insert_defaults()
|
||||||
print('Insert/Update default Users')
|
print('Insert/Update default Users')
|
||||||
@ -42,10 +43,3 @@ def deploy():
|
|||||||
SpaCyNLPPipelineModel.insert_defaults()
|
SpaCyNLPPipelineModel.insert_defaults()
|
||||||
print('Insert/Update default TesseractOCRPipelineModels')
|
print('Insert/Update default TesseractOCRPipelineModels')
|
||||||
TesseractOCRPipelineModel.insert_defaults()
|
TesseractOCRPipelineModel.insert_defaults()
|
||||||
|
|
||||||
print('Stop running analysis sessions')
|
|
||||||
for corpus in Corpus.query.all():
|
|
||||||
corpus.num_analysis_sessions = 0
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
# TODO: Implement checks for if the nopaque network exists
|
|
@ -1,11 +1,14 @@
|
|||||||
from flask import flash, redirect, render_template, url_for
|
from flask import flash, redirect, render_template, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from flask_login import current_user, login_required, login_user
|
from flask_login import current_user, login_required, login_user
|
||||||
from app.blueprints.auth.forms import LoginForm
|
from app.auth.forms import LoginForm
|
||||||
from app.models import Corpus, User
|
from app.models import Corpus, User
|
||||||
|
from sqlalchemy import or_
|
||||||
from . import bp
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/', methods=['GET', 'POST'])
|
@bp.route('/', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.', '<i class="material-icons">home</i>')
|
||||||
def index():
|
def index():
|
||||||
form = LoginForm()
|
form = LoginForm()
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
@ -24,6 +27,7 @@ def index():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/faq')
|
@bp.route('/faq')
|
||||||
|
@register_breadcrumb(bp, '.faq', 'Frequently Asked Questions')
|
||||||
def faq():
|
def faq():
|
||||||
return render_template(
|
return render_template(
|
||||||
'main/faq.html.j2',
|
'main/faq.html.j2',
|
||||||
@ -32,6 +36,7 @@ def faq():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/dashboard')
|
@bp.route('/dashboard')
|
||||||
|
@register_breadcrumb(bp, '.dashboard', '<i class="material-icons left">dashboard</i>Dashboard')
|
||||||
@login_required
|
@login_required
|
||||||
def dashboard():
|
def dashboard():
|
||||||
return render_template(
|
return render_template(
|
||||||
@ -40,15 +45,14 @@ def dashboard():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/manual')
|
# @bp.route('/user_manual')
|
||||||
def manual():
|
# @register_breadcrumb(bp, '.user_manual', '<i class="material-icons left">help</i>User manual')
|
||||||
return render_template(
|
# def user_manual():
|
||||||
'main/manual.html.j2',
|
# return render_template('main/user_manual.html.j2', title='User manual')
|
||||||
title='Manual'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/news')
|
@bp.route('/news')
|
||||||
|
@register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News')
|
||||||
def news():
|
def news():
|
||||||
return render_template(
|
return render_template(
|
||||||
'main/news.html.j2',
|
'main/news.html.j2',
|
||||||
@ -57,6 +61,7 @@ def news():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/privacy_policy')
|
@bp.route('/privacy_policy')
|
||||||
|
@register_breadcrumb(bp, '.privacy_policy', 'Private statement (GDPR)')
|
||||||
def privacy_policy():
|
def privacy_policy():
|
||||||
return render_template(
|
return render_template(
|
||||||
'main/privacy_policy.html.j2',
|
'main/privacy_policy.html.j2',
|
||||||
@ -65,6 +70,7 @@ def privacy_policy():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/terms_of_use')
|
@bp.route('/terms_of_use')
|
||||||
|
@register_breadcrumb(bp, '.terms_of_use', 'Terms of Use')
|
||||||
def terms_of_use():
|
def terms_of_use():
|
||||||
return render_template(
|
return render_template(
|
||||||
'main/terms_of_use.html.j2',
|
'main/terms_of_use.html.j2',
|
||||||
@ -72,14 +78,15 @@ def terms_of_use():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/social')
|
# @bp.route('/social-area')
|
||||||
@login_required
|
# @register_breadcrumb(bp, '.social_area', '<i class="material-icons left">group</i>Social Area')
|
||||||
def social():
|
# @login_required
|
||||||
corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
|
# def social_area():
|
||||||
users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
|
# corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
|
||||||
return render_template(
|
# users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
|
||||||
'main/social.html.j2',
|
# return render_template(
|
||||||
title='Social',
|
# 'main/social_area.html.j2',
|
||||||
corpora=corpora,
|
# title='Social Area',
|
||||||
users=users
|
# corpora=corpora,
|
||||||
)
|
# users=users
|
||||||
|
# )
|
1815
app/models.py
Normal file
1815
app/models.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,14 +0,0 @@
|
|||||||
from .anonymous_user import *
|
|
||||||
from .avatar import *
|
|
||||||
from .corpus_file import *
|
|
||||||
from .corpus_follower_association import *
|
|
||||||
from .corpus_follower_role import *
|
|
||||||
from .corpus import *
|
|
||||||
from .job_input import *
|
|
||||||
from .job_result import *
|
|
||||||
from .job import *
|
|
||||||
from .role import *
|
|
||||||
from .spacy_nlp_pipeline_model import *
|
|
||||||
from .tesseract_ocr_pipeline_model import *
|
|
||||||
from .token import *
|
|
||||||
from .user import *
|
|
@ -1,10 +0,0 @@
|
|||||||
from flask_login import AnonymousUserMixin
|
|
||||||
|
|
||||||
|
|
||||||
class AnonymousUser(AnonymousUserMixin):
|
|
||||||
def can(self, permissions):
|
|
||||||
return False
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_administrator(self):
|
|
||||||
return False
|
|
@ -1,40 +0,0 @@
|
|||||||
from flask import current_app
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from pathlib import Path
|
|
||||||
from app import db
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
|
|
||||||
|
|
||||||
class Avatar(HashidMixin, FileMixin, db.Model):
|
|
||||||
__tablename__ = 'avatars'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Relationships
|
|
||||||
user = db.relationship('User', back_populates='avatar')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.user.path / 'avatar'
|
|
||||||
# return os.path.join(self.user.path, 'avatar')
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
try:
|
|
||||||
self.path.unlink(missing_ok=True)
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
raise
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
**self.file_mixin_to_json_serializeable()
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,199 +0,0 @@
|
|||||||
from datetime import datetime
|
|
||||||
from enum import IntEnum
|
|
||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from sqlalchemy.ext.associationproxy import association_proxy
|
|
||||||
from pathlib import Path
|
|
||||||
import shutil
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
from app import db
|
|
||||||
from app.converters.vrt import normalize_vrt_file
|
|
||||||
from app.extensions.nopaque_sqlalchemy_extras import IntEnumColumn
|
|
||||||
from .corpus_follower_association import CorpusFollowerAssociation
|
|
||||||
|
|
||||||
|
|
||||||
class CorpusStatus(IntEnum):
|
|
||||||
UNPREPARED = 1
|
|
||||||
SUBMITTED = 2
|
|
||||||
QUEUED = 3
|
|
||||||
BUILDING = 4
|
|
||||||
BUILT = 5
|
|
||||||
FAILED = 6
|
|
||||||
STARTING_ANALYSIS_SESSION = 7
|
|
||||||
RUNNING_ANALYSIS_SESSION = 8
|
|
||||||
CANCELING_ANALYSIS_SESSION = 9
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get(corpus_status: 'CorpusStatus | int | str') -> 'CorpusStatus':
|
|
||||||
if isinstance(corpus_status, CorpusStatus):
|
|
||||||
return corpus_status
|
|
||||||
if isinstance(corpus_status, int):
|
|
||||||
return CorpusStatus(corpus_status)
|
|
||||||
if isinstance(corpus_status, str):
|
|
||||||
return CorpusStatus[corpus_status]
|
|
||||||
raise TypeError('corpus_status must be CorpusStatus, int, or str')
|
|
||||||
|
|
||||||
|
|
||||||
class Corpus(HashidMixin, db.Model):
|
|
||||||
'''
|
|
||||||
Class to define a corpus.
|
|
||||||
'''
|
|
||||||
__tablename__ = 'corpora'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Fields
|
|
||||||
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
status = db.Column(
|
|
||||||
IntEnumColumn(CorpusStatus),
|
|
||||||
default=CorpusStatus.UNPREPARED
|
|
||||||
)
|
|
||||||
title = db.Column(db.String(32))
|
|
||||||
num_analysis_sessions = db.Column(db.Integer, default=0)
|
|
||||||
num_tokens = db.Column(db.Integer, default=0)
|
|
||||||
is_public = db.Column(db.Boolean, default=False)
|
|
||||||
# Relationships
|
|
||||||
files = db.relationship(
|
|
||||||
'CorpusFile',
|
|
||||||
back_populates='corpus',
|
|
||||||
lazy='dynamic',
|
|
||||||
cascade='all, delete-orphan'
|
|
||||||
)
|
|
||||||
corpus_follower_associations = db.relationship(
|
|
||||||
'CorpusFollowerAssociation',
|
|
||||||
back_populates='corpus',
|
|
||||||
cascade='all, delete-orphan'
|
|
||||||
)
|
|
||||||
followers = association_proxy(
|
|
||||||
'corpus_follower_associations',
|
|
||||||
'follower',
|
|
||||||
creator=lambda u: CorpusFollowerAssociation(follower=u)
|
|
||||||
)
|
|
||||||
user = db.relationship('User', back_populates='corpora')
|
|
||||||
# "static" attributes
|
|
||||||
max_num_tokens = 2_147_483_647
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<Corpus {self.title}>'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def analysis_url(self):
|
|
||||||
return url_for('corpora.analysis', corpus_id=self.id)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.user.jsonpatch_path}/corpora/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.user.path / 'corpora' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for('corpora.corpus', corpus_id=self.id)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.user.hashid
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def create(**kwargs):
|
|
||||||
corpus = Corpus(**kwargs)
|
|
||||||
db.session.add(corpus)
|
|
||||||
db.session.flush(objects=[corpus])
|
|
||||||
db.session.refresh(corpus)
|
|
||||||
corpus_files_dir = corpus.path / 'files'
|
|
||||||
corpus_cwb_dir = corpus.path / 'cwb'
|
|
||||||
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
|
|
||||||
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
|
|
||||||
try:
|
|
||||||
corpus.path.mkdir()
|
|
||||||
corpus_files_dir.mkdir()
|
|
||||||
corpus_cwb_dir.mkdir()
|
|
||||||
corpus_cwb_data_dir.mkdir()
|
|
||||||
corpus_cwb_registry_dir.mkdir()
|
|
||||||
except OSError as e:
|
|
||||||
# TODO: Potential leftover cleanup
|
|
||||||
current_app.logger.error(e)
|
|
||||||
db.session.rollback()
|
|
||||||
raise
|
|
||||||
return corpus
|
|
||||||
|
|
||||||
def build(self):
|
|
||||||
corpus_cwb_dir = self.path / 'cwb'
|
|
||||||
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
|
|
||||||
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
|
|
||||||
try:
|
|
||||||
shutil.rmtree(corpus_cwb_dir, ignore_errors=True)
|
|
||||||
corpus_cwb_dir.mkdir()
|
|
||||||
corpus_cwb_data_dir.mkdir()
|
|
||||||
corpus_cwb_registry_dir.mkdir()
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
self.status = CorpusStatus.FAILED
|
|
||||||
raise
|
|
||||||
corpus_element = ET.fromstring('<corpus>\n</corpus>')
|
|
||||||
for corpus_file in self.files:
|
|
||||||
normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt'
|
|
||||||
try:
|
|
||||||
normalize_vrt_file(corpus_file.path, normalized_vrt_path)
|
|
||||||
except:
|
|
||||||
self.status = CorpusStatus.FAILED
|
|
||||||
return
|
|
||||||
element_tree = ET.parse(normalized_vrt_path)
|
|
||||||
text_element = element_tree.getroot()
|
|
||||||
text_element.set('author', corpus_file.author)
|
|
||||||
text_element.set('title', corpus_file.title)
|
|
||||||
text_element.set(
|
|
||||||
'publishing_year',
|
|
||||||
f'{corpus_file.publishing_year}'
|
|
||||||
)
|
|
||||||
text_element.set('address', corpus_file.address or 'NULL')
|
|
||||||
text_element.set('booktitle', corpus_file.booktitle or 'NULL')
|
|
||||||
text_element.set('chapter', corpus_file.chapter or 'NULL')
|
|
||||||
text_element.set('editor', corpus_file.editor or 'NULL')
|
|
||||||
text_element.set('institution', corpus_file.institution or 'NULL')
|
|
||||||
text_element.set('journal', corpus_file.journal or 'NULL')
|
|
||||||
text_element.set('pages', f'{corpus_file.pages}' or 'NULL')
|
|
||||||
text_element.set('publisher', corpus_file.publisher or 'NULL')
|
|
||||||
text_element.set('school', corpus_file.school or 'NULL')
|
|
||||||
text_element.tail = '\n'
|
|
||||||
# corpus_element.insert(1, text_element)
|
|
||||||
corpus_element.append(text_element)
|
|
||||||
ET.ElementTree(corpus_element).write(
|
|
||||||
corpus_cwb_dir / 'corpus.vrt',
|
|
||||||
encoding='utf-8'
|
|
||||||
)
|
|
||||||
self.status = CorpusStatus.SUBMITTED
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
shutil.rmtree(self.path, ignore_errors=True)
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'creation_date': f'{self.creation_date.isoformat()}Z',
|
|
||||||
'description': self.description,
|
|
||||||
'max_num_tokens': self.max_num_tokens,
|
|
||||||
'num_analysis_sessions': self.num_analysis_sessions,
|
|
||||||
'num_tokens': self.num_tokens,
|
|
||||||
'status': self.status.name,
|
|
||||||
'title': self.title,
|
|
||||||
'is_public': self.is_public
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
json_serializeable['corpus_follower_associations'] = {
|
|
||||||
x.hashid: x.to_json_serializeable()
|
|
||||||
for x in self.corpus_follower_associations
|
|
||||||
}
|
|
||||||
json_serializeable['files'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.files
|
|
||||||
}
|
|
||||||
return json_serializeable
|
|
@ -1,102 +0,0 @@
|
|||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from pathlib import Path
|
|
||||||
from app import db
|
|
||||||
from .corpus import CorpusStatus
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
|
|
||||||
|
|
||||||
class CorpusFile(FileMixin, HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'corpus_files'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
|
|
||||||
# Fields
|
|
||||||
author = db.Column(db.String(255))
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
publishing_year = db.Column(db.Integer)
|
|
||||||
title = db.Column(db.String(255))
|
|
||||||
address = db.Column(db.String(255))
|
|
||||||
booktitle = db.Column(db.String(255))
|
|
||||||
chapter = db.Column(db.String(255))
|
|
||||||
editor = db.Column(db.String(255))
|
|
||||||
institution = db.Column(db.String(255))
|
|
||||||
journal = db.Column(db.String(255))
|
|
||||||
pages = db.Column(db.String(255))
|
|
||||||
publisher = db.Column(db.String(255))
|
|
||||||
school = db.Column(db.String(255))
|
|
||||||
# Relationships
|
|
||||||
corpus = db.relationship(
|
|
||||||
'Corpus',
|
|
||||||
back_populates='files'
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def download_url(self):
|
|
||||||
return url_for(
|
|
||||||
'corpora.download_corpus_file',
|
|
||||||
corpus_id=self.corpus_id,
|
|
||||||
corpus_file_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.corpus.jsonpatch_path}/files/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.corpus.path / 'files' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for(
|
|
||||||
'corpora.corpus_file',
|
|
||||||
corpus_id=self.corpus_id,
|
|
||||||
corpus_file_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.corpus.user.hashid
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_id(self):
|
|
||||||
return self.corpus.user_id
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
try:
|
|
||||||
self.path.unlink(missing_ok=True)
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
raise
|
|
||||||
db.session.delete(self)
|
|
||||||
self.corpus.status = CorpusStatus.UNPREPARED
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'address': self.address,
|
|
||||||
'author': self.author,
|
|
||||||
'description': self.description,
|
|
||||||
'booktitle': self.booktitle,
|
|
||||||
'chapter': self.chapter,
|
|
||||||
'editor': self.editor,
|
|
||||||
'institution': self.institution,
|
|
||||||
'journal': self.journal,
|
|
||||||
'pages': self.pages,
|
|
||||||
'publisher': self.publisher,
|
|
||||||
'publishing_year': self.publishing_year,
|
|
||||||
'school': self.school,
|
|
||||||
'title': self.title,
|
|
||||||
**self.file_mixin_to_json_serializeable(
|
|
||||||
backrefs=backrefs,
|
|
||||||
relationships=relationships
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['corpus'] = \
|
|
||||||
self.corpus.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,47 +0,0 @@
|
|||||||
from flask_hashids import HashidMixin
|
|
||||||
from app import db
|
|
||||||
from .corpus_follower_role import CorpusFollowerRole
|
|
||||||
|
|
||||||
|
|
||||||
class CorpusFollowerAssociation(HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'corpus_follower_associations'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
|
|
||||||
follower_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
role_id = db.Column(db.Integer, db.ForeignKey('corpus_follower_roles.id'))
|
|
||||||
# Relationships
|
|
||||||
corpus = db.relationship(
|
|
||||||
'Corpus',
|
|
||||||
back_populates='corpus_follower_associations'
|
|
||||||
)
|
|
||||||
follower = db.relationship(
|
|
||||||
'User',
|
|
||||||
back_populates='corpus_follower_associations'
|
|
||||||
)
|
|
||||||
role = db.relationship(
|
|
||||||
'CorpusFollowerRole',
|
|
||||||
back_populates='corpus_follower_associations'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
if 'role' not in kwargs:
|
|
||||||
kwargs['role'] = CorpusFollowerRole.query.filter_by(default=True).first()
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<CorpusFollowerAssociation {self.follower.__repr__()} ~ {self.role.__repr__()} ~ {self.corpus.__repr__()}>'
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'corpus': self.corpus.to_json_serializeable(backrefs=True),
|
|
||||||
'follower': self.follower.to_json_serializeable(),
|
|
||||||
'role': self.role.to_json_serializeable()
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
pass
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,106 +0,0 @@
|
|||||||
from flask_hashids import HashidMixin
|
|
||||||
from enum import IntEnum
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
class CorpusFollowerPermission(IntEnum):
|
|
||||||
VIEW = 1
|
|
||||||
MANAGE_FILES = 2
|
|
||||||
MANAGE_FOLLOWERS = 4
|
|
||||||
MANAGE_CORPUS = 8
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get(corpus_follower_permission: 'CorpusFollowerPermission | int | str') -> 'CorpusFollowerPermission':
|
|
||||||
if isinstance(corpus_follower_permission, CorpusFollowerPermission):
|
|
||||||
return corpus_follower_permission
|
|
||||||
if isinstance(corpus_follower_permission, int):
|
|
||||||
return CorpusFollowerPermission(corpus_follower_permission)
|
|
||||||
if isinstance(corpus_follower_permission, str):
|
|
||||||
return CorpusFollowerPermission[corpus_follower_permission]
|
|
||||||
raise TypeError('corpus_follower_permission must be CorpusFollowerPermission, int, or str')
|
|
||||||
|
|
||||||
|
|
||||||
class CorpusFollowerRole(HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'corpus_follower_roles'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Fields
|
|
||||||
name = db.Column(db.String(64), unique=True)
|
|
||||||
default = db.Column(db.Boolean, default=False, index=True)
|
|
||||||
permissions = db.Column(db.Integer, default=0)
|
|
||||||
# Relationships
|
|
||||||
corpus_follower_associations = db.relationship(
|
|
||||||
'CorpusFollowerAssociation',
|
|
||||||
back_populates='role'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<CorpusFollowerRole {self.name}>'
|
|
||||||
|
|
||||||
def has_permission(self, permission: CorpusFollowerPermission | int | str):
|
|
||||||
perm = CorpusFollowerPermission.get(permission)
|
|
||||||
return self.permissions & perm.value == perm.value
|
|
||||||
|
|
||||||
def add_permission(self, permission: CorpusFollowerPermission | int | str):
|
|
||||||
perm = CorpusFollowerPermission.get(permission)
|
|
||||||
if not self.has_permission(perm):
|
|
||||||
self.permissions += perm.value
|
|
||||||
|
|
||||||
def remove_permission(self, permission: CorpusFollowerPermission | int | str):
|
|
||||||
perm = CorpusFollowerPermission.get(permission)
|
|
||||||
if self.has_permission(perm):
|
|
||||||
self.permissions -= perm.value
|
|
||||||
|
|
||||||
def reset_permissions(self):
|
|
||||||
self.permissions = 0
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'default': self.default,
|
|
||||||
'name': self.name,
|
|
||||||
'permissions': [
|
|
||||||
x.name
|
|
||||||
for x in CorpusFollowerPermission
|
|
||||||
if self.has_permission(x)
|
|
||||||
]
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
pass
|
|
||||||
if relationships:
|
|
||||||
json_serializeable['corpus_follower_association'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.corpus_follower_association
|
|
||||||
}
|
|
||||||
return json_serializeable
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def insert_defaults():
|
|
||||||
roles = {
|
|
||||||
'Anonymous': [],
|
|
||||||
'Viewer': [
|
|
||||||
CorpusFollowerPermission.VIEW
|
|
||||||
],
|
|
||||||
'Contributor': [
|
|
||||||
CorpusFollowerPermission.VIEW,
|
|
||||||
CorpusFollowerPermission.MANAGE_FILES
|
|
||||||
],
|
|
||||||
'Administrator': [
|
|
||||||
CorpusFollowerPermission.VIEW,
|
|
||||||
CorpusFollowerPermission.MANAGE_FILES,
|
|
||||||
CorpusFollowerPermission.MANAGE_FOLLOWERS,
|
|
||||||
CorpusFollowerPermission.MANAGE_CORPUS
|
|
||||||
|
|
||||||
]
|
|
||||||
}
|
|
||||||
default_role_name = 'Viewer'
|
|
||||||
for role_name, permissions in roles.items():
|
|
||||||
role = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
|
||||||
if role is None:
|
|
||||||
role = CorpusFollowerRole(name=role_name)
|
|
||||||
role.reset_permissions()
|
|
||||||
for permission in permissions:
|
|
||||||
role.add_permission(permission)
|
|
||||||
role.default = role.name == default_role_name
|
|
||||||
db.session.add(role)
|
|
||||||
db.session.commit()
|
|
@ -1,138 +0,0 @@
|
|||||||
from datetime import datetime
|
|
||||||
from enum import Enum
|
|
||||||
from app import db, mail, socketio
|
|
||||||
from app.email import create_message
|
|
||||||
from .corpus_file import CorpusFile
|
|
||||||
from .corpus_follower_association import CorpusFollowerAssociation
|
|
||||||
from .corpus import Corpus
|
|
||||||
from .job_input import JobInput
|
|
||||||
from .job_result import JobResult
|
|
||||||
from .job import Job, JobStatus
|
|
||||||
from .spacy_nlp_pipeline_model import SpaCyNLPPipelineModel
|
|
||||||
from .tesseract_ocr_pipeline_model import TesseractOCRPipelineModel
|
|
||||||
from .user import UserSettingJobStatusMailNotificationLevel
|
|
||||||
|
|
||||||
|
|
||||||
def register_event_listeners():
|
|
||||||
resources = [
|
|
||||||
Corpus,
|
|
||||||
CorpusFile,
|
|
||||||
Job,
|
|
||||||
JobInput,
|
|
||||||
JobResult,
|
|
||||||
SpaCyNLPPipelineModel,
|
|
||||||
TesseractOCRPipelineModel
|
|
||||||
]
|
|
||||||
|
|
||||||
for resource in resources:
|
|
||||||
db.event.listen(resource, 'after_delete', resource_after_delete)
|
|
||||||
db.event.listen(resource, 'after_insert', resource_after_insert)
|
|
||||||
db.event.listen(resource, 'after_update', resource_after_update)
|
|
||||||
|
|
||||||
db.event.listen(CorpusFollowerAssociation, 'after_delete', cfa_after_delete)
|
|
||||||
db.event.listen(CorpusFollowerAssociation, 'after_insert', cfa_after_insert)
|
|
||||||
|
|
||||||
db.event.listen(Job, 'after_update', job_after_update)
|
|
||||||
|
|
||||||
|
|
||||||
def resource_after_delete(mapper, connection, resource):
|
|
||||||
jsonpatch = [
|
|
||||||
{
|
|
||||||
'op': 'remove',
|
|
||||||
'path': resource.jsonpatch_path
|
|
||||||
}
|
|
||||||
]
|
|
||||||
namespace = '/users'
|
|
||||||
room = f'/users/{resource.user_hashid}'
|
|
||||||
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
|
|
||||||
|
|
||||||
|
|
||||||
def cfa_after_delete(mapper, connection, cfa):
|
|
||||||
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
|
|
||||||
jsonpatch = [
|
|
||||||
{
|
|
||||||
'op': 'remove',
|
|
||||||
'path': jsonpatch_path
|
|
||||||
}
|
|
||||||
]
|
|
||||||
namespace = '/users'
|
|
||||||
room = f'/users/{cfa.corpus.user.hashid}'
|
|
||||||
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
|
|
||||||
|
|
||||||
|
|
||||||
def resource_after_insert(mapper, connection, resource):
|
|
||||||
jsonpatch_value = resource.to_json_serializeable()
|
|
||||||
for attr in mapper.relationships:
|
|
||||||
jsonpatch_value[attr.key] = {}
|
|
||||||
jsonpatch = [
|
|
||||||
{
|
|
||||||
'op': 'add',
|
|
||||||
'path': resource.jsonpatch_path,
|
|
||||||
'value': jsonpatch_value
|
|
||||||
}
|
|
||||||
]
|
|
||||||
namespace = '/users'
|
|
||||||
room = f'/users/{resource.user_hashid}'
|
|
||||||
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
|
|
||||||
|
|
||||||
|
|
||||||
def cfa_after_insert(mapper, connection, cfa):
|
|
||||||
jsonpatch_value = cfa.to_json_serializeable()
|
|
||||||
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
|
|
||||||
jsonpatch = [
|
|
||||||
{
|
|
||||||
'op': 'add',
|
|
||||||
'path': jsonpatch_path,
|
|
||||||
'value': jsonpatch_value
|
|
||||||
}
|
|
||||||
]
|
|
||||||
namespace = '/users'
|
|
||||||
room = f'/users/{cfa.corpus.user.hashid}'
|
|
||||||
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
|
|
||||||
|
|
||||||
|
|
||||||
def resource_after_update(mapper, connection, resource):
|
|
||||||
jsonpatch = []
|
|
||||||
for attr in db.inspect(resource).attrs:
|
|
||||||
if attr.key in mapper.relationships:
|
|
||||||
continue
|
|
||||||
if not attr.load_history().has_changes():
|
|
||||||
continue
|
|
||||||
jsonpatch_path = f'{resource.jsonpatch_path}/{attr.key}'
|
|
||||||
if isinstance(attr.value, datetime):
|
|
||||||
jsonpatch_value = f'{attr.value.isoformat()}Z'
|
|
||||||
elif isinstance(attr.value, Enum):
|
|
||||||
jsonpatch_value = attr.value.name
|
|
||||||
else:
|
|
||||||
jsonpatch_value = attr.value
|
|
||||||
jsonpatch.append(
|
|
||||||
{
|
|
||||||
'op': 'replace',
|
|
||||||
'path': jsonpatch_path,
|
|
||||||
'value': jsonpatch_value
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if jsonpatch:
|
|
||||||
namespace = '/users'
|
|
||||||
room = f'/users/{resource.user_hashid}'
|
|
||||||
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
|
|
||||||
|
|
||||||
|
|
||||||
def job_after_update(mapper, connection, job):
|
|
||||||
for attr in db.inspect(job).attrs:
|
|
||||||
if attr.key != 'status':
|
|
||||||
continue
|
|
||||||
if not attr.load_history().has_changes():
|
|
||||||
return
|
|
||||||
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE:
|
|
||||||
return
|
|
||||||
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END:
|
|
||||||
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
|
||||||
return
|
|
||||||
msg = create_message(
|
|
||||||
job.user.email,
|
|
||||||
f'Status update for your Job "{job.title}"',
|
|
||||||
'tasks/email/notification',
|
|
||||||
job=job
|
|
||||||
)
|
|
||||||
mail.send(msg)
|
|
@ -1,40 +0,0 @@
|
|||||||
from datetime import datetime
|
|
||||||
from flask import current_app
|
|
||||||
from werkzeug.utils import secure_filename
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
class FileMixin:
|
|
||||||
'''
|
|
||||||
Mixin for db.Model classes. All file related models should use this.
|
|
||||||
'''
|
|
||||||
creation_date = db.Column(db.DateTime, default=datetime.utcnow)
|
|
||||||
filename = db.Column(db.String(255))
|
|
||||||
mimetype = db.Column(db.String(255))
|
|
||||||
|
|
||||||
def file_mixin_to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
return {
|
|
||||||
'creation_date': f'{self.creation_date.isoformat()}Z',
|
|
||||||
'filename': self.filename,
|
|
||||||
'mimetype': self.mimetype
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def create(cls, file_storage, **kwargs):
|
|
||||||
filename = kwargs.pop('filename', file_storage.filename)
|
|
||||||
mimetype = kwargs.pop('mimetype', file_storage.mimetype)
|
|
||||||
obj = cls(
|
|
||||||
filename=secure_filename(filename),
|
|
||||||
mimetype=mimetype,
|
|
||||||
**kwargs
|
|
||||||
)
|
|
||||||
db.session.add(obj)
|
|
||||||
db.session.flush(objects=[obj])
|
|
||||||
db.session.refresh(obj)
|
|
||||||
try:
|
|
||||||
file_storage.save(obj.path)
|
|
||||||
except (AttributeError, OSError) as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
db.session.rollback()
|
|
||||||
raise e
|
|
||||||
return obj
|
|
@ -1,171 +0,0 @@
|
|||||||
from datetime import datetime
|
|
||||||
from enum import IntEnum
|
|
||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from time import sleep
|
|
||||||
from pathlib import Path
|
|
||||||
import shutil
|
|
||||||
from app import db
|
|
||||||
from app.extensions.nopaque_sqlalchemy_extras import ContainerColumn, IntEnumColumn
|
|
||||||
|
|
||||||
|
|
||||||
class JobStatus(IntEnum):
|
|
||||||
INITIALIZING = 1
|
|
||||||
SUBMITTED = 2
|
|
||||||
QUEUED = 3
|
|
||||||
RUNNING = 4
|
|
||||||
CANCELING = 5
|
|
||||||
CANCELED = 6
|
|
||||||
COMPLETED = 7
|
|
||||||
FAILED = 8
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get(job_status: 'JobStatus | int | str') -> 'JobStatus':
|
|
||||||
if isinstance(job_status, JobStatus):
|
|
||||||
return job_status
|
|
||||||
if isinstance(job_status, int):
|
|
||||||
return JobStatus(job_status)
|
|
||||||
if isinstance(job_status, str):
|
|
||||||
return JobStatus[job_status]
|
|
||||||
raise TypeError('job_status must be JobStatus, int, or str')
|
|
||||||
|
|
||||||
|
|
||||||
class Job(HashidMixin, db.Model):
|
|
||||||
'''
|
|
||||||
Class to define Jobs.
|
|
||||||
'''
|
|
||||||
__tablename__ = 'jobs'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Fields
|
|
||||||
creation_date = \
|
|
||||||
db.Column(db.DateTime(), default=datetime.utcnow)
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
end_date = db.Column(db.DateTime())
|
|
||||||
service = db.Column(db.String(64))
|
|
||||||
service_args = db.Column(ContainerColumn(dict, 255))
|
|
||||||
service_version = db.Column(db.String(16))
|
|
||||||
status = db.Column(
|
|
||||||
IntEnumColumn(JobStatus),
|
|
||||||
default=JobStatus.INITIALIZING
|
|
||||||
)
|
|
||||||
title = db.Column(db.String(32))
|
|
||||||
# Relationships
|
|
||||||
inputs = db.relationship(
|
|
||||||
'JobInput',
|
|
||||||
back_populates='job',
|
|
||||||
cascade='all, delete-orphan',
|
|
||||||
lazy='dynamic'
|
|
||||||
)
|
|
||||||
results = db.relationship(
|
|
||||||
'JobResult',
|
|
||||||
back_populates='job',
|
|
||||||
cascade='all, delete-orphan',
|
|
||||||
lazy='dynamic'
|
|
||||||
)
|
|
||||||
user = db.relationship(
|
|
||||||
'User',
|
|
||||||
back_populates='jobs'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<Job {self.title}>'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.user.jsonpatch_path}/jobs/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.user.path / 'jobs' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for('jobs.job', job_id=self.id)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.user.hashid
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def create(**kwargs):
|
|
||||||
job = Job(**kwargs)
|
|
||||||
db.session.add(job)
|
|
||||||
db.session.flush(objects=[job])
|
|
||||||
db.session.refresh(job)
|
|
||||||
job_inputs_dir = job.path / 'inputs'
|
|
||||||
job_pipeline_data_dir = job.path / 'pipeline_data'
|
|
||||||
job_results_dir = job.path / 'results'
|
|
||||||
try:
|
|
||||||
job.path.mkdir()
|
|
||||||
job_inputs_dir.mkdir()
|
|
||||||
job_pipeline_data_dir.mkdir()
|
|
||||||
job_results_dir.mkdir()
|
|
||||||
except OSError as e:
|
|
||||||
# TODO: Potential leftover cleanup
|
|
||||||
current_app.logger.error(e)
|
|
||||||
db.session.rollback()
|
|
||||||
raise
|
|
||||||
return job
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
''' Delete the job and its inputs and results from the database. '''
|
|
||||||
if self.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: # noqa
|
|
||||||
self.status = JobStatus.CANCELING
|
|
||||||
db.session.commit()
|
|
||||||
while self.status != JobStatus.CANCELED:
|
|
||||||
# In case the daemon handled a job in any way
|
|
||||||
if self.status != JobStatus.CANCELING:
|
|
||||||
self.status = JobStatus.CANCELING
|
|
||||||
db.session.commit()
|
|
||||||
sleep(1)
|
|
||||||
db.session.refresh(self)
|
|
||||||
try:
|
|
||||||
shutil.rmtree(self.path)
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
db.session.rollback()
|
|
||||||
raise e
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def restart(self):
|
|
||||||
''' Restart a job - only if the status is failed '''
|
|
||||||
if self.status != JobStatus.FAILED:
|
|
||||||
raise Exception('Job status is not "failed"')
|
|
||||||
shutil.rmtree(self.path / 'results', ignore_errors=True)
|
|
||||||
shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True)
|
|
||||||
for result in self.results:
|
|
||||||
db.session.delete(result)
|
|
||||||
self.end_date = None
|
|
||||||
self.status = JobStatus.SUBMITTED
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'creation_date': f'{self.creation_date.isoformat()}Z',
|
|
||||||
'description': self.description,
|
|
||||||
'end_date': (
|
|
||||||
None if self.end_date is None
|
|
||||||
else f'{self.end_date.isoformat()}Z'
|
|
||||||
),
|
|
||||||
'service': self.service,
|
|
||||||
'service_args': self.service_args,
|
|
||||||
'service_version': self.service_version,
|
|
||||||
'status': self.status.name,
|
|
||||||
'title': self.title
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
json_serializeable['inputs'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.inputs
|
|
||||||
}
|
|
||||||
json_serializeable['results'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.results
|
|
||||||
}
|
|
||||||
return json_serializeable
|
|
@ -1,65 +0,0 @@
|
|||||||
from flask import url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from pathlib import Path
|
|
||||||
from app import db
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
|
|
||||||
|
|
||||||
class JobInput(FileMixin, HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'job_inputs'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
|
|
||||||
# Relationships
|
|
||||||
job = db.relationship(
|
|
||||||
'Job',
|
|
||||||
back_populates='inputs'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<JobInput {self.filename}>'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def content_url(self):
|
|
||||||
return url_for(
|
|
||||||
'jobs.download_job_input',
|
|
||||||
job_id=self.job.id,
|
|
||||||
job_input_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.job.jsonpatch_path}/inputs/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.job.path / 'inputs' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for(
|
|
||||||
'jobs.job',
|
|
||||||
job_id=self.job_id,
|
|
||||||
_anchor=f'job-{self.job.hashid}-input-{self.hashid}'
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.job.user.hashid
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_id(self):
|
|
||||||
return self.job.user.id
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
**self.file_mixin_to_json_serializeable()
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['job'] = \
|
|
||||||
self.job.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,71 +0,0 @@
|
|||||||
from flask import url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from pathlib import Path
|
|
||||||
from app import db
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
|
|
||||||
|
|
||||||
class JobResult(FileMixin, HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'job_results'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
|
|
||||||
# Fields
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
# Relationships
|
|
||||||
job = db.relationship(
|
|
||||||
'Job',
|
|
||||||
back_populates='results'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<JobResult {self.filename}>'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def download_url(self):
|
|
||||||
return url_for(
|
|
||||||
'jobs.download_job_result',
|
|
||||||
job_id=self.job_id,
|
|
||||||
job_result_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.job.jsonpatch_path}/results/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.job.path / 'results' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for(
|
|
||||||
'jobs.job',
|
|
||||||
job_id=self.job_id,
|
|
||||||
_anchor=f'job-{self.job.hashid}-result-{self.hashid}'
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.job.user.hashid
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_id(self):
|
|
||||||
return self.job.user.id
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'description': self.description,
|
|
||||||
**self.file_mixin_to_json_serializeable(
|
|
||||||
backrefs=backrefs,
|
|
||||||
relationships=relationships
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['job'] = \
|
|
||||||
self.job.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,99 +0,0 @@
|
|||||||
from enum import IntEnum
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
class Permission(IntEnum):
|
|
||||||
'''
|
|
||||||
Defines User permissions as integers by the power of 2. User permission
|
|
||||||
can be evaluated using the bitwise operator &.
|
|
||||||
'''
|
|
||||||
ADMINISTRATE = 1
|
|
||||||
CONTRIBUTE = 2
|
|
||||||
USE_API = 4
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get(permission: 'Permission | int | str') -> 'Permission':
|
|
||||||
if isinstance(permission, Permission):
|
|
||||||
return permission
|
|
||||||
if isinstance(permission, int):
|
|
||||||
return Permission(permission)
|
|
||||||
if isinstance(permission, str):
|
|
||||||
return Permission[permission]
|
|
||||||
raise TypeError('permission must be Permission, int, or str')
|
|
||||||
|
|
||||||
|
|
||||||
class Role(HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'roles'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Fields
|
|
||||||
name = db.Column(db.String(64), unique=True)
|
|
||||||
default = db.Column(db.Boolean, default=False, index=True)
|
|
||||||
permissions = db.Column(db.Integer, default=0)
|
|
||||||
# Relationships
|
|
||||||
users = db.relationship('User', back_populates='role', lazy='dynamic')
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<Role {self.name}>'
|
|
||||||
|
|
||||||
def has_permission(self, permission: Permission | int | str):
|
|
||||||
p = Permission.get(permission)
|
|
||||||
return self.permissions & p.value == p.value
|
|
||||||
|
|
||||||
def add_permission(self, permission: Permission | int | str):
|
|
||||||
p = Permission.get(permission)
|
|
||||||
if not self.has_permission(p):
|
|
||||||
self.permissions += p.value
|
|
||||||
|
|
||||||
def remove_permission(self, permission: Permission | int | str):
|
|
||||||
p = Permission.get(permission)
|
|
||||||
if self.has_permission(p):
|
|
||||||
self.permissions -= p.value
|
|
||||||
|
|
||||||
def reset_permissions(self):
|
|
||||||
self.permissions = 0
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'default': self.default,
|
|
||||||
'name': self.name,
|
|
||||||
'permissions': [
|
|
||||||
x.name for x in Permission
|
|
||||||
if self.has_permission(x.value)
|
|
||||||
]
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
pass
|
|
||||||
if relationships:
|
|
||||||
json_serializeable['users'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.users
|
|
||||||
}
|
|
||||||
return json_serializeable
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def insert_defaults():
|
|
||||||
roles = {
|
|
||||||
'User': [],
|
|
||||||
'API user': [Permission.USE_API],
|
|
||||||
'Contributor': [Permission.CONTRIBUTE],
|
|
||||||
'Administrator': [
|
|
||||||
Permission.ADMINISTRATE,
|
|
||||||
Permission.CONTRIBUTE,
|
|
||||||
Permission.USE_API
|
|
||||||
],
|
|
||||||
'System user': []
|
|
||||||
}
|
|
||||||
default_role_name = 'User'
|
|
||||||
for role_name, permissions in roles.items():
|
|
||||||
role = Role.query.filter_by(name=role_name).first()
|
|
||||||
if role is None:
|
|
||||||
role = Role(name=role_name)
|
|
||||||
role.reset_permissions()
|
|
||||||
for permission in permissions:
|
|
||||||
role.add_permission(permission)
|
|
||||||
role.default = role.name == default_role_name
|
|
||||||
db.session.add(role)
|
|
||||||
db.session.commit()
|
|
@ -1,136 +0,0 @@
|
|||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from tqdm import tqdm
|
|
||||||
from pathlib import Path
|
|
||||||
import requests
|
|
||||||
import yaml
|
|
||||||
from app import db
|
|
||||||
from app.extensions.nopaque_sqlalchemy_extras import ContainerColumn
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
from .user import User
|
|
||||||
|
|
||||||
|
|
||||||
class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'spacy_nlp_pipeline_models'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Fields
|
|
||||||
title = db.Column(db.String(64))
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
version = db.Column(db.String(16))
|
|
||||||
compatible_service_versions = db.Column(ContainerColumn(list, 255))
|
|
||||||
publisher = db.Column(db.String(128))
|
|
||||||
publisher_url = db.Column(db.String(512))
|
|
||||||
publishing_url = db.Column(db.String(512))
|
|
||||||
publishing_year = db.Column(db.Integer)
|
|
||||||
pipeline_name = db.Column(db.String(64))
|
|
||||||
is_public = db.Column(db.Boolean, default=False)
|
|
||||||
# Relationships
|
|
||||||
user = db.relationship('User', back_populates='spacy_nlp_pipeline_models')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.user.jsonpatch_path}/spacy_nlp_pipeline_models/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for(
|
|
||||||
'contributions.spacy_nlp_pipeline_models.entity',
|
|
||||||
spacy_nlp_pipeline_model_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.user.hashid
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def insert_defaults(force_download=False):
|
|
||||||
nopaque_user = User.query.filter_by(username='nopaque').first()
|
|
||||||
default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml'
|
|
||||||
with default_records_file.open('r') as f:
|
|
||||||
default_records = yaml.safe_load(f)
|
|
||||||
for m in default_records:
|
|
||||||
model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
|
|
||||||
if model is not None:
|
|
||||||
model.compatible_service_versions = m['compatible_service_versions']
|
|
||||||
model.description = m['description']
|
|
||||||
model.filename = m['url'].split('/')[-1]
|
|
||||||
model.publisher = m['publisher']
|
|
||||||
model.publisher_url = m['publisher_url']
|
|
||||||
model.publishing_url = m['publishing_url']
|
|
||||||
model.publishing_year = m['publishing_year']
|
|
||||||
model.is_public = True
|
|
||||||
model.title = m['title']
|
|
||||||
model.version = m['version']
|
|
||||||
model.pipeline_name = m['pipeline_name']
|
|
||||||
else:
|
|
||||||
model = SpaCyNLPPipelineModel(
|
|
||||||
compatible_service_versions=m['compatible_service_versions'],
|
|
||||||
description=m['description'],
|
|
||||||
filename=m['url'].split('/')[-1],
|
|
||||||
publisher=m['publisher'],
|
|
||||||
publisher_url=m['publisher_url'],
|
|
||||||
publishing_url=m['publishing_url'],
|
|
||||||
publishing_year=m['publishing_year'],
|
|
||||||
is_public=True,
|
|
||||||
title=m['title'],
|
|
||||||
user=nopaque_user,
|
|
||||||
version=m['version'],
|
|
||||||
pipeline_name=m['pipeline_name']
|
|
||||||
)
|
|
||||||
db.session.add(model)
|
|
||||||
db.session.flush(objects=[model])
|
|
||||||
db.session.refresh(model)
|
|
||||||
if not model.path.exists() or force_download:
|
|
||||||
r = requests.get(m['url'], stream=True)
|
|
||||||
pbar = tqdm(
|
|
||||||
desc=f'{model.title} ({model.filename})',
|
|
||||||
unit="B",
|
|
||||||
unit_scale=True,
|
|
||||||
unit_divisor=1024,
|
|
||||||
total=int(r.headers['Content-Length'])
|
|
||||||
)
|
|
||||||
pbar.clear()
|
|
||||||
with open(model.path, 'wb') as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=1024):
|
|
||||||
if chunk: # filter out keep-alive new chunks
|
|
||||||
pbar.update(len(chunk))
|
|
||||||
f.write(chunk)
|
|
||||||
pbar.close()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
try:
|
|
||||||
self.path.unlink(missing_ok=True)
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
raise
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'compatible_service_versions': self.compatible_service_versions,
|
|
||||||
'description': self.description,
|
|
||||||
'publisher': self.publisher,
|
|
||||||
'publisher_url': self.publisher_url,
|
|
||||||
'publishing_url': self.publishing_url,
|
|
||||||
'publishing_year': self.publishing_year,
|
|
||||||
'pipeline_name': self.pipeline_name,
|
|
||||||
'is_public': self.is_public,
|
|
||||||
'title': self.title,
|
|
||||||
'version': self.version,
|
|
||||||
**self.file_mixin_to_json_serializeable()
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,132 +0,0 @@
|
|||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from tqdm import tqdm
|
|
||||||
from pathlib import Path
|
|
||||||
import requests
|
|
||||||
import yaml
|
|
||||||
from app import db
|
|
||||||
from app.extensions.nopaque_sqlalchemy_extras import ContainerColumn
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
from .user import User
|
|
||||||
|
|
||||||
|
|
||||||
class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'tesseract_ocr_pipeline_models'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Fields
|
|
||||||
title = db.Column(db.String(64))
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
version = db.Column(db.String(16))
|
|
||||||
compatible_service_versions = db.Column(ContainerColumn(list, 255))
|
|
||||||
publisher = db.Column(db.String(128))
|
|
||||||
publisher_url = db.Column(db.String(512))
|
|
||||||
publishing_url = db.Column(db.String(512))
|
|
||||||
publishing_year = db.Column(db.Integer)
|
|
||||||
is_public = db.Column(db.Boolean, default=False)
|
|
||||||
# Relationships
|
|
||||||
user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.user.jsonpatch_path}/tesseract_ocr_pipeline_models/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for(
|
|
||||||
'contributions.tesseract_ocr_pipeline_models.entity',
|
|
||||||
tesseract_ocr_pipeline_model_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.user.hashid
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def insert_defaults(force_download=False):
|
|
||||||
nopaque_user = User.query.filter_by(username='nopaque').first()
|
|
||||||
default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml'
|
|
||||||
with default_records_file.open('r') as f:
|
|
||||||
default_records = yaml.safe_load(f)
|
|
||||||
for m in default_records:
|
|
||||||
model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
|
|
||||||
if model is not None:
|
|
||||||
model.compatible_service_versions = m['compatible_service_versions']
|
|
||||||
model.description = m['description']
|
|
||||||
model.filename = f'{model.id}.traineddata'
|
|
||||||
model.publisher = m['publisher']
|
|
||||||
model.publisher_url = m['publisher_url']
|
|
||||||
model.publishing_url = m['publishing_url']
|
|
||||||
model.publishing_year = m['publishing_year']
|
|
||||||
model.is_public = True
|
|
||||||
model.title = m['title']
|
|
||||||
model.version = m['version']
|
|
||||||
else:
|
|
||||||
model = TesseractOCRPipelineModel(
|
|
||||||
compatible_service_versions=m['compatible_service_versions'],
|
|
||||||
description=m['description'],
|
|
||||||
publisher=m['publisher'],
|
|
||||||
publisher_url=m['publisher_url'],
|
|
||||||
publishing_url=m['publishing_url'],
|
|
||||||
publishing_year=m['publishing_year'],
|
|
||||||
is_public=True,
|
|
||||||
title=m['title'],
|
|
||||||
user=nopaque_user,
|
|
||||||
version=m['version']
|
|
||||||
)
|
|
||||||
db.session.add(model)
|
|
||||||
db.session.flush(objects=[model])
|
|
||||||
db.session.refresh(model)
|
|
||||||
model.filename = f'{model.id}.traineddata'
|
|
||||||
if not model.path.exists() or force_download:
|
|
||||||
r = requests.get(m['url'], stream=True)
|
|
||||||
pbar = tqdm(
|
|
||||||
desc=f'{model.title} ({model.filename})',
|
|
||||||
unit="B",
|
|
||||||
unit_scale=True,
|
|
||||||
unit_divisor=1024,
|
|
||||||
total=int(r.headers['Content-Length'])
|
|
||||||
)
|
|
||||||
pbar.clear()
|
|
||||||
with open(model.path, 'wb') as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=1024):
|
|
||||||
if chunk: # filter out keep-alive new chunks
|
|
||||||
pbar.update(len(chunk))
|
|
||||||
f.write(chunk)
|
|
||||||
pbar.close()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
try:
|
|
||||||
self.path.unlink(missing_ok=True)
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
raise
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'compatible_service_versions': self.compatible_service_versions,
|
|
||||||
'description': self.description,
|
|
||||||
'publisher': self.publisher,
|
|
||||||
'publisher_url': self.publisher_url,
|
|
||||||
'publishing_url': self.publishing_url,
|
|
||||||
'publishing_year': self.publishing_year,
|
|
||||||
'is_public': self.is_public,
|
|
||||||
'title': self.title,
|
|
||||||
'version': self.version,
|
|
||||||
**self.file_mixin_to_json_serializeable()
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user