mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-12-26 19:34:19 +00:00
Compare commits
No commits in common. "713a7645dbcadf4c2e7d03b57562ac3037840fec" and "cf8c164d60012333781dfc901d0a287eef32351a" have entirely different histories.
713a7645db
...
cf8c164d60
@ -5,9 +5,9 @@
|
|||||||
!app
|
!app
|
||||||
!migrations
|
!migrations
|
||||||
!tests
|
!tests
|
||||||
|
!.flaskenv
|
||||||
!boot.sh
|
!boot.sh
|
||||||
!config.py
|
!config.py
|
||||||
!docker-nopaque-entrypoint.sh
|
!docker-nopaque-entrypoint.sh
|
||||||
|
!nopaque.py
|
||||||
!requirements.txt
|
!requirements.txt
|
||||||
!requirements.freezed.txt
|
|
||||||
!wsgi.py
|
|
||||||
|
22
.env.tpl
22
.env.tpl
@ -1,20 +1,32 @@
|
|||||||
##############################################################################
|
##############################################################################
|
||||||
# Environment variables used by Docker Compose config files. #
|
# Variables for use in Docker Compose YAML files #
|
||||||
##############################################################################
|
##############################################################################
|
||||||
# HINT: Use this bash command `id -u`
|
# HINT: Use this bash command `id -u`
|
||||||
# NOTE: 0 (= root user) is not allowed
|
# NOTE: 0 (= root user) is not allowed
|
||||||
HOST_UID=
|
HOST_UID=
|
||||||
|
|
||||||
# HINT: Use this bash command `id -g`
|
# HINT: Use this bash command `id -g`
|
||||||
# NOTE: 0 (= root group) is not allowed
|
|
||||||
HOST_GID=
|
HOST_GID=
|
||||||
|
|
||||||
# HINT: Use this bash command `getent group docker | cut -d: -f3`
|
# HINT: Use this bash command `getent group docker | cut -d: -f3`
|
||||||
HOST_DOCKER_GID=
|
HOST_DOCKER_GID=
|
||||||
|
|
||||||
# DEFAULT: nopaque
|
# DEFAULT: nopaque
|
||||||
NOPAQUE_DOCKER_NETWORK_NAME=nopaque
|
# DOCKER_DEFAULT_NETWORK_NAME=
|
||||||
|
|
||||||
|
# DEFAULT: ./volumes/db/data
|
||||||
|
# NOTE: Use `.` as <project-basedir>
|
||||||
|
# DOCKER_DB_SERVICE_DATA_VOLUME_SOURCE_PATH=
|
||||||
|
|
||||||
|
# DEFAULT: ./volumes/mq/data
|
||||||
|
# NOTE: Use `.` as <project-basedir>
|
||||||
|
# DOCKER_MQ_SERVICE_DATA_VOLUME_SOURCE_PATH=
|
||||||
|
|
||||||
# NOTE: This must be a network share and it must be available on all
|
# NOTE: This must be a network share and it must be available on all
|
||||||
# Docker Swarm nodes, mounted to the same path.
|
# Docker Swarm nodes, mounted to the same path with the same
|
||||||
HOST_NOPAQUE_DATA_PATH=/mnt/nopaque
|
# user and group ownership.
|
||||||
|
DOCKER_NOPAQUE_SERVICE_DATA_VOLUME_SOURCE_PATH=
|
||||||
|
|
||||||
|
# DEFAULT: ./volumes/nopaque/logs
|
||||||
|
# NOTE: Use `.` as <project-basedir>
|
||||||
|
# DOCKER_NOPAQUE_SERVICE_LOGS_VOLUME_SOURCE_PATH=.
|
||||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -2,6 +2,8 @@
|
|||||||
app/static/gen/
|
app/static/gen/
|
||||||
volumes/
|
volumes/
|
||||||
docker-compose.override.yml
|
docker-compose.override.yml
|
||||||
|
logs/
|
||||||
|
!logs/dummy
|
||||||
*.env
|
*.env
|
||||||
|
|
||||||
*.pjentsch-testing
|
*.pjentsch-testing
|
||||||
|
17
.vscode/settings.json
vendored
17
.vscode/settings.json
vendored
@ -1,17 +1,9 @@
|
|||||||
{
|
{
|
||||||
"editor.rulers": [79],
|
"editor.rulers": [79],
|
||||||
"editor.tabSize": 4,
|
|
||||||
"emmet.includeLanguages": {
|
|
||||||
"jinja-html": "html"
|
|
||||||
},
|
|
||||||
"files.associations": {
|
|
||||||
".flaskenv": "env",
|
|
||||||
"*.env.tpl": "env",
|
|
||||||
"*.txt.j2": "jinja"
|
|
||||||
},
|
|
||||||
"files.insertFinalNewline": true,
|
"files.insertFinalNewline": true,
|
||||||
"files.trimFinalNewlines": true,
|
"[css]": {
|
||||||
"files.trimTrailingWhitespace": true,
|
"editor.tabSize": 2
|
||||||
|
},
|
||||||
"[html]": {
|
"[html]": {
|
||||||
"editor.tabSize": 2
|
"editor.tabSize": 2
|
||||||
},
|
},
|
||||||
@ -20,5 +12,8 @@
|
|||||||
},
|
},
|
||||||
"[jinja-html]": {
|
"[jinja-html]": {
|
||||||
"editor.tabSize": 2
|
"editor.tabSize": 2
|
||||||
|
},
|
||||||
|
"[scss]": {
|
||||||
|
"editor.tabSize": 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
24
Dockerfile
24
Dockerfile
@ -4,13 +4,11 @@ FROM python:3.10.13-slim-bookworm
|
|||||||
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
|
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
|
||||||
|
|
||||||
|
|
||||||
# Set environment variables
|
|
||||||
ENV LANG="C.UTF-8"
|
ENV LANG="C.UTF-8"
|
||||||
ENV PYTHONDONTWRITEBYTECODE="1"
|
ENV PYTHONDONTWRITEBYTECODE="1"
|
||||||
ENV PYTHONUNBUFFERED="1"
|
ENV PYTHONUNBUFFERED="1"
|
||||||
|
|
||||||
|
|
||||||
# Install system dependencies
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install --no-install-recommends --yes \
|
&& apt-get install --no-install-recommends --yes \
|
||||||
build-essential \
|
build-essential \
|
||||||
@ -19,39 +17,37 @@ RUN apt-get update \
|
|||||||
&& rm --recursive /var/lib/apt/lists/*
|
&& rm --recursive /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
||||||
# Create a non-root user
|
|
||||||
RUN useradd --create-home --no-log-init nopaque \
|
RUN useradd --create-home --no-log-init nopaque \
|
||||||
&& groupadd docker \
|
&& groupadd docker \
|
||||||
&& usermod --append --groups docker nopaque
|
&& usermod --append --groups docker nopaque
|
||||||
|
|
||||||
|
|
||||||
USER nopaque
|
USER nopaque
|
||||||
WORKDIR /home/nopaque
|
WORKDIR /home/nopaque
|
||||||
|
|
||||||
|
|
||||||
# Create a Python virtual environment
|
|
||||||
ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv"
|
ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv"
|
||||||
RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}"
|
RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}"
|
||||||
ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}"
|
ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}"
|
||||||
|
|
||||||
|
|
||||||
# Install Python dependencies
|
|
||||||
COPY --chown=nopaque:nopaque requirements.freezed.txt requirements.freezed.txt
|
|
||||||
RUN python3 -m pip install --requirement requirements.freezed.txt \
|
|
||||||
&& rm requirements.freezed.txt
|
|
||||||
|
|
||||||
|
|
||||||
# Install the application
|
|
||||||
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
|
|
||||||
COPY --chown=nopaque:nopaque app app
|
COPY --chown=nopaque:nopaque app app
|
||||||
COPY --chown=nopaque:nopaque migrations migrations
|
COPY --chown=nopaque:nopaque migrations migrations
|
||||||
COPY --chown=nopaque:nopaque tests tests
|
COPY --chown=nopaque:nopaque tests tests
|
||||||
COPY --chown=nopaque:nopaque boot.sh config.py wsgi.py ./
|
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./
|
||||||
|
|
||||||
|
|
||||||
EXPOSE 5000
|
RUN python3 -m pip install --requirement requirements.txt \
|
||||||
|
&& mkdir logs
|
||||||
|
|
||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
|
|
||||||
|
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
|
||||||
|
|
||||||
|
|
||||||
|
EXPOSE 5000
|
||||||
|
|
||||||
|
|
||||||
ENTRYPOINT ["docker-nopaque-entrypoint.sh"]
|
ENTRYPOINT ["docker-nopaque-entrypoint.sh"]
|
||||||
|
@ -35,7 +35,7 @@ username@hostname:~$ sudo mount --types cifs --options gid=${USER},password=nopa
|
|||||||
# Clone the nopaque repository
|
# Clone the nopaque repository
|
||||||
username@hostname:~$ git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
|
username@hostname:~$ git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
|
||||||
# Create data directories
|
# Create data directories
|
||||||
username@hostname:~$ mkdir -p volumes/{db,mq}
|
username@hostname:~$ mkdir data/{db,logs,mq}
|
||||||
username@hostname:~$ cp db.env.tpl db.env
|
username@hostname:~$ cp db.env.tpl db.env
|
||||||
username@hostname:~$ cp .env.tpl .env
|
username@hostname:~$ cp .env.tpl .env
|
||||||
# Fill out the variables within these files.
|
# Fill out the variables within these files.
|
||||||
|
@ -10,7 +10,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Amharic'
|
# - title: 'Amharic'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
|
||||||
@ -23,7 +22,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Arabic'
|
- title: 'Arabic'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
|
||||||
@ -36,7 +34,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Assamese'
|
# - title: 'Assamese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
|
||||||
@ -49,7 +46,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Azerbaijani'
|
# - title: 'Azerbaijani'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
|
||||||
@ -62,7 +58,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Azerbaijani - Cyrillic'
|
# - title: 'Azerbaijani - Cyrillic'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
|
||||||
@ -75,7 +70,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Belarusian'
|
# - title: 'Belarusian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
|
||||||
@ -88,7 +82,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Bengali'
|
# - title: 'Bengali'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
|
||||||
@ -101,7 +94,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Tibetan'
|
# - title: 'Tibetan'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
|
||||||
@ -114,7 +106,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Bosnian'
|
# - title: 'Bosnian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
|
||||||
@ -127,7 +118,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Bulgarian'
|
# - title: 'Bulgarian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
|
||||||
@ -140,7 +130,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Catalan; Valencian'
|
# - title: 'Catalan; Valencian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
|
||||||
@ -153,7 +142,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Cebuano'
|
# - title: 'Cebuano'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
|
||||||
@ -166,7 +154,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Czech'
|
# - title: 'Czech'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
|
||||||
@ -179,7 +166,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Chinese - Simplified'
|
# - title: 'Chinese - Simplified'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
|
||||||
@ -192,7 +178,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Chinese - Traditional'
|
- title: 'Chinese - Traditional'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
|
||||||
@ -205,7 +190,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Cherokee'
|
# - title: 'Cherokee'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
|
||||||
@ -218,7 +202,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Welsh'
|
# - title: 'Welsh'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
|
||||||
@ -231,7 +214,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Danish'
|
- title: 'Danish'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
|
||||||
@ -244,7 +226,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'German'
|
- title: 'German'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
|
||||||
@ -257,7 +238,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Dzongkha'
|
# - title: 'Dzongkha'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
|
||||||
@ -270,7 +250,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Greek, Modern (1453-)'
|
- title: 'Greek, Modern (1453-)'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
|
||||||
@ -283,7 +262,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'English'
|
- title: 'English'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
|
||||||
@ -296,7 +274,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'English, Middle (1100-1500)'
|
- title: 'English, Middle (1100-1500)'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
|
||||||
@ -309,7 +286,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Esperanto'
|
# - title: 'Esperanto'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
|
||||||
@ -322,7 +298,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Estonian'
|
# - title: 'Estonian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
|
||||||
@ -335,7 +310,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Basque'
|
# - title: 'Basque'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
|
||||||
@ -348,7 +322,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Persian'
|
# - title: 'Persian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
|
||||||
@ -361,7 +334,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Finnish'
|
# - title: 'Finnish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
|
||||||
@ -374,7 +346,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'French'
|
- title: 'French'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
|
||||||
@ -387,7 +358,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'German Fraktur'
|
- title: 'German Fraktur'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
|
||||||
@ -400,7 +370,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'French, Middle (ca. 1400-1600)'
|
- title: 'French, Middle (ca. 1400-1600)'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
|
||||||
@ -413,7 +382,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Irish'
|
# - title: 'Irish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
|
||||||
@ -426,7 +394,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Galician'
|
# - title: 'Galician'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
|
||||||
@ -439,7 +406,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Greek, Ancient (-1453)'
|
- title: 'Greek, Ancient (-1453)'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
|
||||||
@ -452,7 +418,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Gujarati'
|
# - title: 'Gujarati'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
|
||||||
@ -465,7 +430,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Haitian; Haitian Creole'
|
# - title: 'Haitian; Haitian Creole'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
|
||||||
@ -478,7 +442,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Hebrew'
|
# - title: 'Hebrew'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
|
||||||
@ -491,7 +454,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Hindi'
|
# - title: 'Hindi'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
|
||||||
@ -504,7 +466,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Croatian'
|
# - title: 'Croatian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
|
||||||
@ -517,7 +478,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Hungarian'
|
# - title: 'Hungarian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
|
||||||
@ -530,7 +490,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Inuktitut'
|
# - title: 'Inuktitut'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
|
||||||
@ -543,7 +502,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Indonesian'
|
# - title: 'Indonesian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
|
||||||
@ -556,7 +514,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Icelandic'
|
# - title: 'Icelandic'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
|
||||||
@ -569,7 +526,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Italian'
|
- title: 'Italian'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
|
||||||
@ -582,7 +538,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'Italian - Old'
|
- title: 'Italian - Old'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
|
||||||
@ -595,7 +550,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Javanese'
|
# - title: 'Javanese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
|
||||||
@ -608,7 +562,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Japanese'
|
# - title: 'Japanese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
|
||||||
@ -621,7 +574,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Kannada'
|
# - title: 'Kannada'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
|
||||||
@ -634,7 +586,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Georgian'
|
# - title: 'Georgian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
|
||||||
@ -647,7 +598,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Georgian - Old'
|
# - title: 'Georgian - Old'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
|
||||||
@ -660,7 +610,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Kazakh'
|
# - title: 'Kazakh'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
|
||||||
@ -673,7 +622,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Central Khmer'
|
# - title: 'Central Khmer'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
|
||||||
@ -686,7 +634,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Kirghiz; Kyrgyz'
|
# - title: 'Kirghiz; Kyrgyz'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
|
||||||
@ -699,7 +646,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Korean'
|
# - title: 'Korean'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
|
||||||
@ -712,7 +658,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Kurdish'
|
# - title: 'Kurdish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
|
||||||
@ -725,7 +670,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Lao'
|
# - title: 'Lao'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
|
||||||
@ -738,7 +682,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Latin'
|
# - title: 'Latin'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
|
||||||
@ -751,7 +694,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Latvian'
|
# - title: 'Latvian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
|
||||||
@ -764,7 +706,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Lithuanian'
|
# - title: 'Lithuanian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
|
||||||
@ -777,7 +718,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Malayalam'
|
# - title: 'Malayalam'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
|
||||||
@ -790,7 +730,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Marathi'
|
# - title: 'Marathi'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
|
||||||
@ -803,7 +742,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Macedonian'
|
# - title: 'Macedonian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
|
||||||
@ -816,7 +754,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Maltese'
|
# - title: 'Maltese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
|
||||||
@ -829,7 +766,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Malay'
|
# - title: 'Malay'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
|
||||||
@ -842,7 +778,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Burmese'
|
# - title: 'Burmese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
|
||||||
@ -855,7 +790,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Nepali'
|
# - title: 'Nepali'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
|
||||||
@ -868,7 +802,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Dutch; Flemish'
|
# - title: 'Dutch; Flemish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
|
||||||
@ -881,7 +814,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Norwegian'
|
# - title: 'Norwegian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
|
||||||
@ -894,7 +826,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Oriya'
|
# - title: 'Oriya'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
|
||||||
@ -907,7 +838,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Panjabi; Punjabi'
|
# - title: 'Panjabi; Punjabi'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
|
||||||
@ -920,7 +850,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Polish'
|
# - title: 'Polish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
|
||||||
@ -933,7 +862,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Portuguese'
|
- title: 'Portuguese'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
|
||||||
@ -946,7 +874,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Pushto; Pashto'
|
# - title: 'Pushto; Pashto'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
|
||||||
@ -959,7 +886,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Romanian; Moldavian; Moldovan'
|
# - title: 'Romanian; Moldavian; Moldovan'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
|
||||||
@ -972,7 +898,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Russian'
|
- title: 'Russian'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
|
||||||
@ -985,7 +910,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Sanskrit'
|
# - title: 'Sanskrit'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
|
||||||
@ -998,7 +922,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Sinhala; Sinhalese'
|
# - title: 'Sinhala; Sinhalese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
|
||||||
@ -1011,7 +934,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Slovak'
|
# - title: 'Slovak'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
|
||||||
@ -1024,7 +946,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Slovenian'
|
# - title: 'Slovenian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
|
||||||
@ -1037,7 +958,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
- title: 'Spanish; Castilian'
|
- title: 'Spanish; Castilian'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
|
||||||
@ -1050,7 +970,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
- title: 'Spanish; Castilian - Old'
|
- title: 'Spanish; Castilian - Old'
|
||||||
description: ''
|
description: ''
|
||||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
|
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
|
||||||
@ -1063,7 +982,6 @@
|
|||||||
- '0.1.0'
|
- '0.1.0'
|
||||||
- '0.1.1'
|
- '0.1.1'
|
||||||
- '0.1.2'
|
- '0.1.2'
|
||||||
- '0.1.3b'
|
|
||||||
# - title: 'Albanian'
|
# - title: 'Albanian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
|
||||||
@ -1076,7 +994,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Serbian'
|
# - title: 'Serbian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
|
||||||
@ -1089,7 +1006,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Serbian - Latin'
|
# - title: 'Serbian - Latin'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
|
||||||
@ -1102,7 +1018,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Swahili'
|
# - title: 'Swahili'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
|
||||||
@ -1115,7 +1030,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Swedish'
|
# - title: 'Swedish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
|
||||||
@ -1128,7 +1042,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Syriac'
|
# - title: 'Syriac'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
|
||||||
@ -1141,7 +1054,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Tamil'
|
# - title: 'Tamil'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
|
||||||
@ -1154,7 +1066,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Telugu'
|
# - title: 'Telugu'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
|
||||||
@ -1167,7 +1078,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Tajik'
|
# - title: 'Tajik'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
|
||||||
@ -1180,7 +1090,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Tagalog'
|
# - title: 'Tagalog'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
|
||||||
@ -1193,7 +1102,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Thai'
|
# - title: 'Thai'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
|
||||||
@ -1206,7 +1114,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Tigrinya'
|
# - title: 'Tigrinya'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
|
||||||
@ -1219,7 +1126,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Turkish'
|
# - title: 'Turkish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
|
||||||
@ -1232,7 +1138,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Uighur; Uyghur'
|
# - title: 'Uighur; Uyghur'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
|
||||||
@ -1245,7 +1150,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Ukrainian'
|
# - title: 'Ukrainian'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
|
||||||
@ -1258,7 +1162,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Urdu'
|
# - title: 'Urdu'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
|
||||||
@ -1271,7 +1174,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Uzbek'
|
# - title: 'Uzbek'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
|
||||||
@ -1284,7 +1186,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Uzbek - Cyrillic'
|
# - title: 'Uzbek - Cyrillic'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
|
||||||
@ -1297,7 +1198,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Vietnamese'
|
# - title: 'Vietnamese'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
|
||||||
@ -1310,7 +1210,6 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
||||||
# - title: 'Yiddish'
|
# - title: 'Yiddish'
|
||||||
# description: ''
|
# description: ''
|
||||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
|
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
|
||||||
@ -1323,4 +1222,3 @@
|
|||||||
# - '0.1.0'
|
# - '0.1.0'
|
||||||
# - '0.1.1'
|
# - '0.1.1'
|
||||||
# - '0.1.2'
|
# - '0.1.2'
|
||||||
# - '0.1.3b'
|
|
113
app/__init__.py
113
app/__init__.py
@ -2,9 +2,9 @@ from apifairy import APIFairy
|
|||||||
from config import Config
|
from config import Config
|
||||||
from docker import DockerClient
|
from docker import DockerClient
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
from flask.logging import default_handler
|
|
||||||
from flask_apscheduler import APScheduler
|
from flask_apscheduler import APScheduler
|
||||||
from flask_assets import Environment
|
from flask_assets import Environment
|
||||||
|
from flask_breadcrumbs import Breadcrumbs, default_breadcrumb_root
|
||||||
from flask_login import LoginManager
|
from flask_login import LoginManager
|
||||||
from flask_mail import Mail
|
from flask_mail import Mail
|
||||||
from flask_marshmallow import Marshmallow
|
from flask_marshmallow import Marshmallow
|
||||||
@ -13,142 +13,95 @@ from flask_paranoid import Paranoid
|
|||||||
from flask_socketio import SocketIO
|
from flask_socketio import SocketIO
|
||||||
from flask_sqlalchemy import SQLAlchemy
|
from flask_sqlalchemy import SQLAlchemy
|
||||||
from flask_hashids import Hashids
|
from flask_hashids import Hashids
|
||||||
from logging import Formatter, StreamHandler
|
|
||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
|
||||||
|
|
||||||
|
|
||||||
docker_client = DockerClient.from_env()
|
|
||||||
|
|
||||||
apifairy = APIFairy()
|
apifairy = APIFairy()
|
||||||
assets = Environment()
|
assets = Environment()
|
||||||
|
breadcrumbs = Breadcrumbs()
|
||||||
db = SQLAlchemy()
|
db = SQLAlchemy()
|
||||||
|
docker_client = DockerClient()
|
||||||
hashids = Hashids()
|
hashids = Hashids()
|
||||||
login = LoginManager()
|
login = LoginManager()
|
||||||
|
login.login_view = 'auth.login'
|
||||||
|
login.login_message = 'Please log in to access this page.'
|
||||||
ma = Marshmallow()
|
ma = Marshmallow()
|
||||||
mail = Mail()
|
mail = Mail()
|
||||||
migrate = Migrate(compare_type=True)
|
migrate = Migrate(compare_type=True)
|
||||||
paranoid = Paranoid()
|
paranoid = Paranoid()
|
||||||
|
paranoid.redirect_view = '/'
|
||||||
scheduler = APScheduler()
|
scheduler = APScheduler()
|
||||||
socketio = SocketIO()
|
socketio = SocketIO()
|
||||||
|
|
||||||
|
|
||||||
def create_app(config: Config = Config) -> Flask:
|
def create_app(config: Config = Config) -> Flask:
|
||||||
''' Creates an initialized Flask object. '''
|
''' Creates an initialized Flask (WSGI Application) object. '''
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
app.config.from_object(config)
|
app.config.from_object(config)
|
||||||
|
config.init_app(app)
|
||||||
# region Logging
|
|
||||||
log_formatter = Formatter(
|
|
||||||
fmt=app.config['NOPAQUE_LOG_FORMAT'],
|
|
||||||
datefmt=app.config['NOPAQUE_LOG_DATE_FORMAT']
|
|
||||||
)
|
|
||||||
|
|
||||||
log_handler = StreamHandler()
|
|
||||||
log_handler.setFormatter(log_formatter)
|
|
||||||
log_handler.setLevel(app.config['NOPAQUE_LOG_LEVEL'])
|
|
||||||
|
|
||||||
app.logger.setLevel('DEBUG')
|
|
||||||
app.logger.removeHandler(default_handler)
|
|
||||||
app.logger.addHandler(log_handler)
|
|
||||||
# endregion Logging
|
|
||||||
|
|
||||||
# region Middlewares
|
|
||||||
if app.config['NOPAQUE_PROXY_FIX_ENABLED']:
|
|
||||||
app.wsgi_app = ProxyFix(
|
|
||||||
app.wsgi_app,
|
|
||||||
x_for=app.config['NOPAQUE_PROXY_FIX_X_FOR'],
|
|
||||||
x_host=app.config['NOPAQUE_PROXY_FIX_X_HOST'],
|
|
||||||
x_port=app.config['NOPAQUE_PROXY_FIX_X_PORT'],
|
|
||||||
x_prefix=app.config['NOPAQUE_PROXY_FIX_X_PREFIX'],
|
|
||||||
x_proto=app.config['NOPAQUE_PROXY_FIX_X_PROTO']
|
|
||||||
)
|
|
||||||
# endregion Middlewares
|
|
||||||
|
|
||||||
# region Extensions
|
|
||||||
docker_client.login(
|
docker_client.login(
|
||||||
app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
|
app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
|
||||||
password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
|
password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
|
||||||
registry=app.config['NOPAQUE_DOCKER_REGISTRY']
|
registry=app.config['NOPAQUE_DOCKER_REGISTRY']
|
||||||
)
|
)
|
||||||
|
|
||||||
from .models import AnonymousUser, User
|
|
||||||
|
|
||||||
apifairy.init_app(app)
|
apifairy.init_app(app)
|
||||||
assets.init_app(app)
|
assets.init_app(app)
|
||||||
|
breadcrumbs.init_app(app)
|
||||||
db.init_app(app)
|
db.init_app(app)
|
||||||
hashids.init_app(app)
|
hashids.init_app(app)
|
||||||
login.init_app(app)
|
login.init_app(app)
|
||||||
login.anonymous_user = AnonymousUser
|
|
||||||
login.login_view = 'auth.login'
|
|
||||||
login.user_loader(lambda user_id: User.query.get(int(user_id)))
|
|
||||||
ma.init_app(app)
|
ma.init_app(app)
|
||||||
mail.init_app(app)
|
mail.init_app(app)
|
||||||
migrate.init_app(app, db)
|
migrate.init_app(app, db)
|
||||||
paranoid.init_app(app)
|
paranoid.init_app(app)
|
||||||
paranoid.redirect_view = '/'
|
|
||||||
scheduler.init_app(app)
|
scheduler.init_app(app)
|
||||||
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI'])
|
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
|
||||||
# endregion Extensions
|
|
||||||
|
|
||||||
# region Blueprints
|
from .admin import bp as admin_blueprint
|
||||||
from .blueprints.admin import bp as admin_blueprint
|
default_breadcrumb_root(admin_blueprint, '.admin')
|
||||||
app.register_blueprint(admin_blueprint, url_prefix='/admin')
|
app.register_blueprint(admin_blueprint, url_prefix='/admin')
|
||||||
|
|
||||||
from .blueprints.api import bp as api_blueprint
|
from .api import bp as api_blueprint
|
||||||
app.register_blueprint(api_blueprint, url_prefix='/api')
|
app.register_blueprint(api_blueprint, url_prefix='/api')
|
||||||
|
|
||||||
from .blueprints.auth import bp as auth_blueprint
|
from .auth import bp as auth_blueprint
|
||||||
|
default_breadcrumb_root(auth_blueprint, '.')
|
||||||
app.register_blueprint(auth_blueprint)
|
app.register_blueprint(auth_blueprint)
|
||||||
|
|
||||||
from .blueprints.contributions import bp as contributions_blueprint
|
from .contributions import bp as contributions_blueprint
|
||||||
|
default_breadcrumb_root(contributions_blueprint, '.contributions')
|
||||||
app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
|
app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
|
||||||
|
|
||||||
from .blueprints.corpora import bp as corpora_blueprint
|
from .corpora import bp as corpora_blueprint
|
||||||
|
from .corpora.cqi_over_sio import CQiNamespace
|
||||||
|
default_breadcrumb_root(corpora_blueprint, '.corpora')
|
||||||
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
|
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
|
||||||
|
socketio.on_namespace(CQiNamespace('/cqi_over_sio'))
|
||||||
|
|
||||||
from .blueprints.errors import bp as errors_bp
|
from .errors import bp as errors_bp
|
||||||
app.register_blueprint(errors_bp)
|
app.register_blueprint(errors_bp)
|
||||||
|
|
||||||
from .blueprints.jobs import bp as jobs_blueprint
|
from .jobs import bp as jobs_blueprint
|
||||||
|
default_breadcrumb_root(jobs_blueprint, '.jobs')
|
||||||
app.register_blueprint(jobs_blueprint, url_prefix='/jobs')
|
app.register_blueprint(jobs_blueprint, url_prefix='/jobs')
|
||||||
|
|
||||||
from .blueprints.main import bp as main_blueprint
|
from .main import bp as main_blueprint
|
||||||
|
default_breadcrumb_root(main_blueprint, '.')
|
||||||
app.register_blueprint(main_blueprint, cli_group=None)
|
app.register_blueprint(main_blueprint, cli_group=None)
|
||||||
|
|
||||||
from .blueprints.services import bp as services_blueprint
|
from .services import bp as services_blueprint
|
||||||
|
default_breadcrumb_root(services_blueprint, '.services')
|
||||||
app.register_blueprint(services_blueprint, url_prefix='/services')
|
app.register_blueprint(services_blueprint, url_prefix='/services')
|
||||||
|
|
||||||
from .blueprints.settings import bp as settings_blueprint
|
from .settings import bp as settings_blueprint
|
||||||
|
default_breadcrumb_root(settings_blueprint, '.settings')
|
||||||
app.register_blueprint(settings_blueprint, url_prefix='/settings')
|
app.register_blueprint(settings_blueprint, url_prefix='/settings')
|
||||||
|
|
||||||
from .blueprints.users import bp as users_blueprint
|
from .users import bp as users_blueprint
|
||||||
|
default_breadcrumb_root(users_blueprint, '.users')
|
||||||
app.register_blueprint(users_blueprint, cli_group='user', url_prefix='/users')
|
app.register_blueprint(users_blueprint, cli_group='user', url_prefix='/users')
|
||||||
|
|
||||||
from .blueprints.workshops import bp as workshops_blueprint
|
from .workshops import bp as workshops_blueprint
|
||||||
app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
|
app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
|
||||||
# endregion Blueprints
|
|
||||||
|
|
||||||
# region SocketIO Namespaces
|
|
||||||
from .namespaces.cqi_over_sio import CQiOverSocketIONamespace
|
|
||||||
socketio.on_namespace(CQiOverSocketIONamespace('/cqi_over_sio'))
|
|
||||||
|
|
||||||
from .namespaces.users import UsersNamespace
|
|
||||||
socketio.on_namespace(UsersNamespace('/users'))
|
|
||||||
# endregion SocketIO Namespaces
|
|
||||||
|
|
||||||
# region Database event Listeners
|
|
||||||
from .models.event_listeners import register_event_listeners
|
|
||||||
register_event_listeners()
|
|
||||||
# endregion Database event Listeners
|
|
||||||
|
|
||||||
# region Add scheduler jobs
|
|
||||||
if app.config['NOPAQUE_IS_PRIMARY_INSTANCE']:
|
|
||||||
from .jobs import handle_corpora
|
|
||||||
scheduler.add_job('handle_corpora', handle_corpora, seconds=3, trigger='interval')
|
|
||||||
|
|
||||||
from .jobs import handle_jobs
|
|
||||||
scheduler.add_job('handle_jobs', handle_jobs, seconds=3, trigger='interval')
|
|
||||||
# endregion Add scheduler jobs
|
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from flask import abort, request
|
from flask import abort, request
|
||||||
from app.decorators import content_negotiation
|
|
||||||
from app import db
|
from app import db
|
||||||
|
from app.decorators import content_negotiation
|
||||||
from app.models import User
|
from app.models import User
|
||||||
from . import bp
|
from . import bp
|
||||||
|
|
@ -1,7 +1,8 @@
|
|||||||
from flask import abort, flash, redirect, render_template, url_for
|
from flask import abort, flash, redirect, render_template, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from app import db, hashids
|
from app import db, hashids
|
||||||
from app.models import Avatar, Corpus, Role, User
|
from app.models import Avatar, Corpus, Role, User
|
||||||
from app.blueprints.users.settings.forms import (
|
from app.users.settings.forms import (
|
||||||
UpdateAvatarForm,
|
UpdateAvatarForm,
|
||||||
UpdatePasswordForm,
|
UpdatePasswordForm,
|
||||||
UpdateNotificationsForm,
|
UpdateNotificationsForm,
|
||||||
@ -10,9 +11,14 @@ from app.blueprints.users.settings.forms import (
|
|||||||
)
|
)
|
||||||
from . import bp
|
from . import bp
|
||||||
from .forms import UpdateUserForm
|
from .forms import UpdateUserForm
|
||||||
|
from app.users.utils import (
|
||||||
|
user_endpoint_arguments_constructor as user_eac,
|
||||||
|
user_dynamic_list_constructor as user_dlc
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('')
|
@bp.route('')
|
||||||
|
@register_breadcrumb(bp, '.', '<i class="material-icons left">admin_panel_settings</i>Administration')
|
||||||
def admin():
|
def admin():
|
||||||
return render_template(
|
return render_template(
|
||||||
'admin/admin.html.j2',
|
'admin/admin.html.j2',
|
||||||
@ -21,6 +27,7 @@ def admin():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/corpora')
|
@bp.route('/corpora')
|
||||||
|
@register_breadcrumb(bp, '.corpora', 'Corpora')
|
||||||
def corpora():
|
def corpora():
|
||||||
corpora = Corpus.query.all()
|
corpora = Corpus.query.all()
|
||||||
return render_template(
|
return render_template(
|
||||||
@ -31,6 +38,7 @@ def corpora():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/users')
|
@bp.route('/users')
|
||||||
|
@register_breadcrumb(bp, '.users', '<i class="material-icons left">group</i>Users')
|
||||||
def users():
|
def users():
|
||||||
users = User.query.all()
|
users = User.query.all()
|
||||||
return render_template(
|
return render_template(
|
||||||
@ -41,6 +49,7 @@ def users():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/users/<hashid:user_id>')
|
@bp.route('/users/<hashid:user_id>')
|
||||||
|
@register_breadcrumb(bp, '.users.entity', '', dynamic_list_constructor=user_dlc)
|
||||||
def user(user_id):
|
def user(user_id):
|
||||||
user = User.query.get_or_404(user_id)
|
user = User.query.get_or_404(user_id)
|
||||||
corpora = Corpus.query.filter(Corpus.user == user).all()
|
corpora = Corpus.query.filter(Corpus.user == user).all()
|
||||||
@ -53,6 +62,7 @@ def user(user_id):
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/users/<hashid:user_id>/settings', methods=['GET', 'POST'])
|
@bp.route('/users/<hashid:user_id>/settings', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.users.entity.settings', '<i class="material-icons left">settings</i>Settings')
|
||||||
def user_settings(user_id):
|
def user_settings(user_id):
|
||||||
user = User.query.get_or_404(user_id)
|
user = User.query.get_or_404(user_id)
|
||||||
update_account_information_form = UpdateAccountInformationForm(user)
|
update_account_information_form = UpdateAccountInformationForm(user)
|
@ -5,8 +5,8 @@ from flask import abort, Blueprint
|
|||||||
from werkzeug.exceptions import InternalServerError
|
from werkzeug.exceptions import InternalServerError
|
||||||
from app import db, hashids
|
from app import db, hashids
|
||||||
from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel
|
from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel
|
||||||
from .auth import auth_error_responses, token_auth
|
|
||||||
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema
|
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema
|
||||||
|
from .auth import auth_error_responses, token_auth
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('jobs', __name__)
|
bp = Blueprint('jobs', __name__)
|
||||||
@ -77,7 +77,7 @@ def delete_job(job_id):
|
|||||||
job = Job.query.get(job_id)
|
job = Job.query.get(job_id)
|
||||||
if job is None:
|
if job is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
if not (job.user == current_user or current_user.is_administrator):
|
if not (job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
try:
|
try:
|
||||||
job.delete()
|
job.delete()
|
||||||
@ -97,6 +97,6 @@ def get_job(job_id):
|
|||||||
job = Job.query.get(job_id)
|
job = Job.query.get(job_id)
|
||||||
if job is None:
|
if job is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
if not (job.user == current_user or current_user.is_administrator):
|
if not (job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return job
|
return job
|
@ -10,7 +10,7 @@ from app.models import (
|
|||||||
User,
|
User,
|
||||||
UserSettingJobStatusMailNotificationLevel
|
UserSettingJobStatusMailNotificationLevel
|
||||||
)
|
)
|
||||||
from app.blueprints.services import SERVICES
|
from app.services import SERVICES
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -3,11 +3,11 @@ from apifairy import authenticate, body, response
|
|||||||
from apifairy.decorators import other_responses
|
from apifairy.decorators import other_responses
|
||||||
from flask import abort, Blueprint
|
from flask import abort, Blueprint
|
||||||
from werkzeug.exceptions import InternalServerError
|
from werkzeug.exceptions import InternalServerError
|
||||||
from app.email import create_message, send
|
|
||||||
from app import db
|
from app import db
|
||||||
|
from app.email import create_message, send
|
||||||
from app.models import User
|
from app.models import User
|
||||||
from .auth import auth_error_responses, token_auth
|
|
||||||
from .schemas import EmptySchema, UserSchema
|
from .schemas import EmptySchema, UserSchema
|
||||||
|
from .auth import auth_error_responses, token_auth
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('users', __name__)
|
bp = Blueprint('users', __name__)
|
||||||
@ -60,7 +60,7 @@ def delete_user(user_id):
|
|||||||
user = User.query.get(user_id)
|
user = User.query.get(user_id)
|
||||||
if user is None:
|
if user is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
if not (user == current_user or current_user.is_administrator):
|
if not (user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
user.delete()
|
user.delete()
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
@ -78,7 +78,7 @@ def get_user(user_id):
|
|||||||
user = User.query.get(user_id)
|
user = User.query.get(user_id)
|
||||||
if user is None:
|
if user is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
if not (user == current_user or current_user.is_administrator):
|
if not (user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return user
|
return user
|
||||||
|
|
||||||
@ -94,6 +94,6 @@ def get_user_by_username(username):
|
|||||||
user = User.query.filter(User.username == username).first()
|
user = User.query.filter(User.username == username).first()
|
||||||
if user is None:
|
if user is None:
|
||||||
abort(404)
|
abort(404)
|
||||||
if not (user == current_user or current_user.is_administrator):
|
if not (user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return user
|
return user
|
5
app/auth/__init__.py
Normal file
5
app/auth/__init__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from flask import Blueprint
|
||||||
|
|
||||||
|
|
||||||
|
bp = Blueprint('auth', __name__)
|
||||||
|
from . import routes
|
@ -60,11 +60,7 @@ class RegistrationForm(FlaskForm):
|
|||||||
|
|
||||||
def validate_username(self, field):
|
def validate_username(self, field):
|
||||||
if User.query.filter_by(username=field.data).first():
|
if User.query.filter_by(username=field.data).first():
|
||||||
raise ValidationError('Username already registered')
|
raise ValidationError('Username already in use')
|
||||||
|
|
||||||
def validate_terms_of_use_accepted(self, field):
|
|
||||||
if not field.data:
|
|
||||||
raise ValidationError('Terms of Use not accepted')
|
|
||||||
|
|
||||||
|
|
||||||
class LoginForm(FlaskForm):
|
class LoginForm(FlaskForm):
|
@ -1,4 +1,5 @@
|
|||||||
from flask import abort, flash, redirect, render_template, request, url_for
|
from flask import abort, flash, redirect, render_template, request, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from flask_login import current_user, login_user, login_required, logout_user
|
from flask_login import current_user, login_user, login_required, logout_user
|
||||||
from app import db
|
from app import db
|
||||||
from app.email import create_message, send
|
from app.email import create_message, send
|
||||||
@ -12,7 +13,24 @@ from .forms import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@bp.before_app_request
|
||||||
|
def before_request():
|
||||||
|
"""
|
||||||
|
Checks if a user is unconfirmed when visiting specific sites. Redirects to
|
||||||
|
unconfirmed view if user is unconfirmed.
|
||||||
|
"""
|
||||||
|
if current_user.is_authenticated:
|
||||||
|
current_user.ping()
|
||||||
|
db.session.commit()
|
||||||
|
if (not current_user.confirmed
|
||||||
|
and request.endpoint
|
||||||
|
and request.blueprint != 'auth'
|
||||||
|
and request.endpoint != 'static'):
|
||||||
|
return redirect(url_for('auth.unconfirmed'))
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/register', methods=['GET', 'POST'])
|
@bp.route('/register', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.register', 'Register')
|
||||||
def register():
|
def register():
|
||||||
if current_user.is_authenticated:
|
if current_user.is_authenticated:
|
||||||
return redirect(url_for('main.dashboard'))
|
return redirect(url_for('main.dashboard'))
|
||||||
@ -49,6 +67,7 @@ def register():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/login', methods=['GET', 'POST'])
|
@bp.route('/login', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.login', 'Login')
|
||||||
def login():
|
def login():
|
||||||
if current_user.is_authenticated:
|
if current_user.is_authenticated:
|
||||||
return redirect(url_for('main.dashboard'))
|
return redirect(url_for('main.dashboard'))
|
||||||
@ -79,6 +98,7 @@ def logout():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/unconfirmed')
|
@bp.route('/unconfirmed')
|
||||||
|
@register_breadcrumb(bp, '.unconfirmed', 'Unconfirmed')
|
||||||
@login_required
|
@login_required
|
||||||
def unconfirmed():
|
def unconfirmed():
|
||||||
if current_user.confirmed:
|
if current_user.confirmed:
|
||||||
@ -121,6 +141,7 @@ def confirm(token):
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/reset-password-request', methods=['GET', 'POST'])
|
@bp.route('/reset-password-request', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.reset_password_request', 'Password Reset')
|
||||||
def reset_password_request():
|
def reset_password_request():
|
||||||
if current_user.is_authenticated:
|
if current_user.is_authenticated:
|
||||||
return redirect(url_for('main.dashboard'))
|
return redirect(url_for('main.dashboard'))
|
||||||
@ -150,6 +171,7 @@ def reset_password_request():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/reset-password/<token>', methods=['GET', 'POST'])
|
@bp.route('/reset-password/<token>', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.reset_password', 'Password Reset')
|
||||||
def reset_password(token):
|
def reset_password(token):
|
||||||
if current_user.is_authenticated:
|
if current_user.is_authenticated:
|
||||||
return redirect(url_for('main.dashboard'))
|
return redirect(url_for('main.dashboard'))
|
@ -1,29 +0,0 @@
|
|||||||
from flask import Blueprint, redirect, request, url_for
|
|
||||||
from flask_login import current_user
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('auth', __name__)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.before_app_request
|
|
||||||
def before_request():
|
|
||||||
if not current_user.is_authenticated:
|
|
||||||
return
|
|
||||||
|
|
||||||
current_user.ping()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
if (
|
|
||||||
not current_user.confirmed
|
|
||||||
and request.endpoint
|
|
||||||
and request.blueprint != 'auth'
|
|
||||||
and request.endpoint != 'static'
|
|
||||||
):
|
|
||||||
return redirect(url_for('auth.unconfirmed'))
|
|
||||||
|
|
||||||
if not current_user.terms_of_use_accepted:
|
|
||||||
return redirect(url_for('main.terms_of_use'))
|
|
||||||
|
|
||||||
|
|
||||||
from . import routes
|
|
@ -1,25 +0,0 @@
|
|||||||
from flask import Blueprint
|
|
||||||
from flask_login import login_required
|
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('contributions', __name__)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.before_request
|
|
||||||
@login_required
|
|
||||||
def before_request():
|
|
||||||
'''
|
|
||||||
Ensures that the routes in this package can only be visited by users that
|
|
||||||
are logged in.
|
|
||||||
'''
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
from . import routes
|
|
||||||
|
|
||||||
|
|
||||||
from .spacy_nlp_pipeline_models import bp as spacy_nlp_pipeline_models_bp
|
|
||||||
bp.register_blueprint(spacy_nlp_pipeline_models_bp, url_prefix='/spacy-nlp-pipeline-models')
|
|
||||||
|
|
||||||
from .tesseract_ocr_pipeline_models import bp as tesseract_ocr_pipeline_models_bp
|
|
||||||
bp.register_blueprint(tesseract_ocr_pipeline_models_bp, url_prefix='/tesseract-ocr-pipeline-models')
|
|
@ -1,7 +0,0 @@
|
|||||||
from flask import render_template
|
|
||||||
from . import bp
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('')
|
|
||||||
def index():
|
|
||||||
return render_template('contributions/index.html.j2', title='Contributions')
|
|
@ -1,18 +0,0 @@
|
|||||||
from flask import current_app, Blueprint
|
|
||||||
from flask_login import login_required
|
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('spacy_nlp_pipeline_models', __name__)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.before_request
|
|
||||||
@login_required
|
|
||||||
def before_request():
|
|
||||||
'''
|
|
||||||
Ensures that the routes in this package can only be visited by users that
|
|
||||||
are logged in.
|
|
||||||
'''
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
from . import routes, json_routes
|
|
@ -1,18 +0,0 @@
|
|||||||
from flask import Blueprint
|
|
||||||
from flask_login import login_required
|
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('jobs', __name__)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.before_request
|
|
||||||
@login_required
|
|
||||||
def before_request():
|
|
||||||
'''
|
|
||||||
Ensures that the routes in this package can only be visited by users that
|
|
||||||
are logged in.
|
|
||||||
'''
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
from . import routes, json_routes
|
|
@ -2,7 +2,7 @@ from flask import Blueprint
|
|||||||
from flask_login import login_required
|
from flask_login import login_required
|
||||||
|
|
||||||
|
|
||||||
bp = Blueprint('tesseract_ocr_pipeline_models', __name__)
|
bp = Blueprint('contributions', __name__)
|
||||||
|
|
||||||
|
|
||||||
@bp.before_request
|
@bp.before_request
|
||||||
@ -15,4 +15,9 @@ def before_request():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
from . import json_routes, routes
|
from . import (
|
||||||
|
routes,
|
||||||
|
spacy_nlp_pipeline_models,
|
||||||
|
tesseract_ocr_pipeline_models,
|
||||||
|
transkribus_htr_pipeline_models
|
||||||
|
)
|
9
app/contributions/routes.py
Normal file
9
app/contributions/routes.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from flask import redirect, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route('')
|
||||||
|
@register_breadcrumb(bp, '.', '<i class="material-icons left">new_label</i>My Contributions')
|
||||||
|
def contributions():
|
||||||
|
return redirect(url_for('main.dashboard', _anchor='contributions'))
|
@ -1,7 +1,7 @@
|
|||||||
from flask_wtf.file import FileField, FileRequired
|
from flask_wtf.file import FileField, FileRequired
|
||||||
from wtforms import StringField, ValidationError
|
from wtforms import StringField, ValidationError
|
||||||
from wtforms.validators import InputRequired, Length
|
from wtforms.validators import InputRequired, Length
|
||||||
from app.blueprints.services import SERVICES
|
from app.services import SERVICES
|
||||||
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
|
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
|
||||||
|
|
||||||
|
|
@ -1,14 +1,13 @@
|
|||||||
from flask import abort, current_app, request
|
from flask import abort, current_app, request
|
||||||
from flask_login import current_user, login_required
|
from flask_login import current_user
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from app import db
|
from app import db
|
||||||
from app.decorators import content_negotiation, permission_required
|
from app.decorators import content_negotiation, permission_required
|
||||||
from app.models import SpaCyNLPPipelineModel
|
from app.models import SpaCyNLPPipelineModel
|
||||||
from . import bp
|
from .. import bp
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
|
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
|
||||||
@login_required
|
|
||||||
@content_negotiation(produces='application/json')
|
@content_negotiation(produces='application/json')
|
||||||
def delete_spacy_model(spacy_nlp_pipeline_model_id):
|
def delete_spacy_model(spacy_nlp_pipeline_model_id):
|
||||||
def _delete_spacy_model(app, spacy_nlp_pipeline_model_id):
|
def _delete_spacy_model(app, spacy_nlp_pipeline_model_id):
|
||||||
@ -16,9 +15,9 @@ def delete_spacy_model(spacy_nlp_pipeline_model_id):
|
|||||||
snpm = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id)
|
snpm = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id)
|
||||||
snpm.delete()
|
snpm.delete()
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||||
if not (snpm.user == current_user or current_user.is_administrator):
|
if not (snpm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
thread = Thread(
|
thread = Thread(
|
||||||
target=_delete_spacy_model,
|
target=_delete_spacy_model,
|
||||||
@ -32,7 +31,7 @@ def delete_spacy_model(spacy_nlp_pipeline_model_id):
|
|||||||
return response_data, 202
|
return response_data, 202
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>/is_public', methods=['PUT'])
|
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>/is_public', methods=['PUT'])
|
||||||
@permission_required('CONTRIBUTE')
|
@permission_required('CONTRIBUTE')
|
||||||
@content_negotiation(consumes='application/json', produces='application/json')
|
@content_negotiation(consumes='application/json', produces='application/json')
|
||||||
def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
|
def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
|
||||||
@ -40,7 +39,7 @@ def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
|
|||||||
if not isinstance(is_public, bool):
|
if not isinstance(is_public, bool):
|
||||||
abort(400)
|
abort(400)
|
||||||
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||||
if not (snpm.user == current_user or current_user.is_administrator):
|
if not (snpm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
snpm.is_public = is_public
|
snpm.is_public = is_public
|
||||||
db.session.commit()
|
db.session.commit()
|
@ -1,5 +1,6 @@
|
|||||||
from flask import abort, flash, redirect, render_template, url_for
|
from flask import abort, flash, redirect, render_template, url_for
|
||||||
from flask_login import current_user, login_required
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
|
from flask_login import current_user
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import SpaCyNLPPipelineModel
|
from app.models import SpaCyNLPPipelineModel
|
||||||
from . import bp
|
from . import bp
|
||||||
@ -7,17 +8,23 @@ from .forms import (
|
|||||||
CreateSpaCyNLPPipelineModelForm,
|
CreateSpaCyNLPPipelineModelForm,
|
||||||
UpdateSpaCyNLPPipelineModelForm
|
UpdateSpaCyNLPPipelineModelForm
|
||||||
)
|
)
|
||||||
|
from .utils import (
|
||||||
|
spacy_nlp_pipeline_model_dlc as spacy_nlp_pipeline_model_dlc
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/')
|
@bp.route('/spacy-nlp-pipeline-models')
|
||||||
@login_required
|
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models', 'SpaCy NLP Pipeline Models')
|
||||||
def index():
|
def spacy_nlp_pipeline_models():
|
||||||
return redirect(url_for('contributions.index', _anchor='spacy-nlp-pipeline-models'))
|
return render_template(
|
||||||
|
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_models.html.j2',
|
||||||
|
title='SpaCy NLP Pipeline Models'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/create', methods=['GET', 'POST'])
|
@bp.route('/spacy-nlp-pipeline-models/create', methods=['GET', 'POST'])
|
||||||
@login_required
|
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.create', 'Create')
|
||||||
def create():
|
def create_spacy_nlp_pipeline_model():
|
||||||
form = CreateSpaCyNLPPipelineModelForm()
|
form = CreateSpaCyNLPPipelineModelForm()
|
||||||
if form.is_submitted():
|
if form.is_submitted():
|
||||||
if not form.validate():
|
if not form.validate():
|
||||||
@ -41,7 +48,7 @@ def create():
|
|||||||
abort(500)
|
abort(500)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
flash(f'SpaCy NLP Pipeline model "{snpm.title}" created')
|
flash(f'SpaCy NLP Pipeline model "{snpm.title}" created')
|
||||||
return {}, 201, {'Location': url_for('.index')}
|
return {}, 201, {'Location': url_for('.spacy_nlp_pipeline_models')}
|
||||||
return render_template(
|
return render_template(
|
||||||
'contributions/spacy_nlp_pipeline_models/create.html.j2',
|
'contributions/spacy_nlp_pipeline_models/create.html.j2',
|
||||||
title='Create SpaCy NLP Pipeline Model',
|
title='Create SpaCy NLP Pipeline Model',
|
||||||
@ -49,11 +56,11 @@ def create():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
|
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
|
||||||
@login_required
|
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.entity', '', dynamic_list_constructor=spacy_nlp_pipeline_model_dlc)
|
||||||
def entity(spacy_nlp_pipeline_model_id):
|
def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id):
|
||||||
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||||
if not (snpm.user == current_user or current_user.is_administrator):
|
if not (snpm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
form = UpdateSpaCyNLPPipelineModelForm(data=snpm.to_json_serializeable())
|
form = UpdateSpaCyNLPPipelineModelForm(data=snpm.to_json_serializeable())
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
@ -61,9 +68,9 @@ def entity(spacy_nlp_pipeline_model_id):
|
|||||||
if db.session.is_modified(snpm):
|
if db.session.is_modified(snpm):
|
||||||
flash(f'SpaCy NLP Pipeline model "{snpm.title}" updated')
|
flash(f'SpaCy NLP Pipeline model "{snpm.title}" updated')
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return redirect(url_for('.index'))
|
return redirect(url_for('.spacy_nlp_pipeline_models'))
|
||||||
return render_template(
|
return render_template(
|
||||||
'contributions/spacy_nlp_pipeline_models/entity.html.j2',
|
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_model.html.j2',
|
||||||
title=f'{snpm.title} {snpm.version}',
|
title=f'{snpm.title} {snpm.version}',
|
||||||
form=form,
|
form=form,
|
||||||
spacy_nlp_pipeline_model=snpm
|
spacy_nlp_pipeline_model=snpm
|
13
app/contributions/spacy_nlp_pipeline_models/utils.py
Normal file
13
app/contributions/spacy_nlp_pipeline_models/utils.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from flask import request, url_for
|
||||||
|
from app.models import SpaCyNLPPipelineModel
|
||||||
|
|
||||||
|
|
||||||
|
def spacy_nlp_pipeline_model_dlc():
|
||||||
|
snpm_id = request.view_args['spacy_nlp_pipeline_model_id']
|
||||||
|
snpm = SpaCyNLPPipelineModel.query.get_or_404(snpm_id)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'text': f'{snpm.title} {snpm.version}',
|
||||||
|
'url': url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=snpm_id)
|
||||||
|
}
|
||||||
|
]
|
@ -1,6 +1,6 @@
|
|||||||
from flask_wtf.file import FileField, FileRequired
|
from flask_wtf.file import FileField, FileRequired
|
||||||
from wtforms import ValidationError
|
from wtforms import ValidationError
|
||||||
from app.blueprints.services import SERVICES
|
from app.services import SERVICES
|
||||||
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
|
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
|
||||||
|
|
||||||
|
|
||||||
@ -9,7 +9,7 @@ class CreateTesseractOCRPipelineModelForm(ContributionBaseForm):
|
|||||||
'File',
|
'File',
|
||||||
validators=[FileRequired()]
|
validators=[FileRequired()]
|
||||||
)
|
)
|
||||||
|
|
||||||
def validate_tesseract_model_file(self, field):
|
def validate_tesseract_model_file(self, field):
|
||||||
if not field.data.filename.lower().endswith('.traineddata'):
|
if not field.data.filename.lower().endswith('.traineddata'):
|
||||||
raise ValidationError('traineddata files only!')
|
raise ValidationError('traineddata files only!')
|
@ -7,7 +7,7 @@ from app.models import TesseractOCRPipelineModel
|
|||||||
from . import bp
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
|
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
|
||||||
@content_negotiation(produces='application/json')
|
@content_negotiation(produces='application/json')
|
||||||
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
||||||
def _delete_tesseract_ocr_pipeline_model(app, tesseract_ocr_pipeline_model_id):
|
def _delete_tesseract_ocr_pipeline_model(app, tesseract_ocr_pipeline_model_id):
|
||||||
@ -17,7 +17,7 @@ def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
|||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||||
if not (topm.user == current_user or current_user.is_administrator):
|
if not (topm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
thread = Thread(
|
thread = Thread(
|
||||||
target=_delete_tesseract_ocr_pipeline_model,
|
target=_delete_tesseract_ocr_pipeline_model,
|
||||||
@ -31,7 +31,7 @@ def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
|||||||
return response_data, 202
|
return response_data, 202
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>/is_public', methods=['PUT'])
|
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>/is_public', methods=['PUT'])
|
||||||
@permission_required('CONTRIBUTE')
|
@permission_required('CONTRIBUTE')
|
||||||
@content_negotiation(consumes='application/json', produces='application/json')
|
@content_negotiation(consumes='application/json', produces='application/json')
|
||||||
def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_id):
|
def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_id):
|
||||||
@ -39,7 +39,7 @@ def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_i
|
|||||||
if not isinstance(is_public, bool):
|
if not isinstance(is_public, bool):
|
||||||
abort(400)
|
abort(400)
|
||||||
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||||
if not (topm.user == current_user or current_user.is_administrator):
|
if not (topm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
topm.is_public = is_public
|
topm.is_public = is_public
|
||||||
db.session.commit()
|
db.session.commit()
|
@ -1,4 +1,5 @@
|
|||||||
from flask import abort, flash, redirect, render_template, url_for
|
from flask import abort, flash, redirect, render_template, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import TesseractOCRPipelineModel
|
from app.models import TesseractOCRPipelineModel
|
||||||
@ -7,15 +8,23 @@ from .forms import (
|
|||||||
CreateTesseractOCRPipelineModelForm,
|
CreateTesseractOCRPipelineModelForm,
|
||||||
UpdateTesseractOCRPipelineModelForm
|
UpdateTesseractOCRPipelineModelForm
|
||||||
)
|
)
|
||||||
|
from .utils import (
|
||||||
|
tesseract_ocr_pipeline_model_dlc as tesseract_ocr_pipeline_model_dlc
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/')
|
@bp.route('/tesseract-ocr-pipeline-models')
|
||||||
def index():
|
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models', 'Tesseract OCR Pipeline Models')
|
||||||
return redirect(url_for('contributions.index', _anchor='tesseract-ocr-pipeline-models'))
|
def tesseract_ocr_pipeline_models():
|
||||||
|
return render_template(
|
||||||
|
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_models.html.j2',
|
||||||
|
title='Tesseract OCR Pipeline Models'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/create', methods=['GET', 'POST'])
|
@bp.route('/tesseract-ocr-pipeline-models/create', methods=['GET', 'POST'])
|
||||||
def create():
|
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.create', 'Create')
|
||||||
|
def create_tesseract_ocr_pipeline_model():
|
||||||
form = CreateTesseractOCRPipelineModelForm()
|
form = CreateTesseractOCRPipelineModelForm()
|
||||||
if form.is_submitted():
|
if form.is_submitted():
|
||||||
if not form.validate():
|
if not form.validate():
|
||||||
@ -38,7 +47,7 @@ def create():
|
|||||||
abort(500)
|
abort(500)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
flash(f'Tesseract OCR Pipeline model "{topm.title}" created')
|
flash(f'Tesseract OCR Pipeline model "{topm.title}" created')
|
||||||
return {}, 201, {'Location': url_for('.index')}
|
return {}, 201, {'Location': url_for('.tesseract_ocr_pipeline_models')}
|
||||||
return render_template(
|
return render_template(
|
||||||
'contributions/tesseract_ocr_pipeline_models/create.html.j2',
|
'contributions/tesseract_ocr_pipeline_models/create.html.j2',
|
||||||
title='Create Tesseract OCR Pipeline Model',
|
title='Create Tesseract OCR Pipeline Model',
|
||||||
@ -46,10 +55,11 @@ def create():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
|
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
|
||||||
def entity(tesseract_ocr_pipeline_model_id):
|
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.entity', '', dynamic_list_constructor=tesseract_ocr_pipeline_model_dlc)
|
||||||
|
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
|
||||||
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||||
if not (topm.user == current_user or current_user.is_administrator):
|
if not (topm.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
form = UpdateTesseractOCRPipelineModelForm(data=topm.to_json_serializeable())
|
form = UpdateTesseractOCRPipelineModelForm(data=topm.to_json_serializeable())
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
@ -57,9 +67,9 @@ def entity(tesseract_ocr_pipeline_model_id):
|
|||||||
if db.session.is_modified(topm):
|
if db.session.is_modified(topm):
|
||||||
flash(f'Tesseract OCR Pipeline model "{topm.title}" updated')
|
flash(f'Tesseract OCR Pipeline model "{topm.title}" updated')
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return redirect(url_for('.index'))
|
return redirect(url_for('.tesseract_ocr_pipeline_models'))
|
||||||
return render_template(
|
return render_template(
|
||||||
'contributions/tesseract_ocr_pipeline_models/entity.html.j2',
|
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_model.html.j2',
|
||||||
title=f'{topm.title} {topm.version}',
|
title=f'{topm.title} {topm.version}',
|
||||||
form=form,
|
form=form,
|
||||||
tesseract_ocr_pipeline_model=topm
|
tesseract_ocr_pipeline_model=topm
|
13
app/contributions/tesseract_ocr_pipeline_models/utils.py
Normal file
13
app/contributions/tesseract_ocr_pipeline_models/utils.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from flask import request, url_for
|
||||||
|
from app.models import TesseractOCRPipelineModel
|
||||||
|
|
||||||
|
|
||||||
|
def tesseract_ocr_pipeline_model_dlc():
|
||||||
|
topm_id = request.view_args['tesseract_ocr_pipeline_model_id']
|
||||||
|
topm = TesseractOCRPipelineModel.query.get_or_404(topm_id)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'text': f'{topm.title} {topm.version}',
|
||||||
|
'url': url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=topm_id)
|
||||||
|
}
|
||||||
|
]
|
@ -0,0 +1,2 @@
|
|||||||
|
from .. import bp
|
||||||
|
from . import routes
|
@ -0,0 +1,7 @@
|
|||||||
|
from flask import abort
|
||||||
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route('/transkribus_htr_pipeline_models')
|
||||||
|
def transkribus_htr_pipeline_models():
|
||||||
|
return abort(503)
|
@ -1,69 +1,81 @@
|
|||||||
from datetime import datetime
|
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
from pathlib import Path
|
|
||||||
import json
|
|
||||||
import shutil
|
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import User, Corpus, CorpusFile
|
from app.models import User, Corpus, CorpusFile
|
||||||
|
from datetime import datetime
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
class SandpaperConverter:
|
class SandpaperConverter:
|
||||||
def __init__(self, json_db_file: Path, data_dir: Path):
|
def __init__(self, json_db_file, data_dir):
|
||||||
self.json_db_file = json_db_file
|
self.json_db_file = json_db_file
|
||||||
self.data_dir = data_dir
|
self.data_dir = data_dir
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
with self.json_db_file.open('r') as f:
|
with open(self.json_db_file, 'r') as f:
|
||||||
json_db: list[dict] = json.load(f)
|
json_db = json.loads(f.read())
|
||||||
|
|
||||||
for json_user in json_db:
|
for json_user in json_db:
|
||||||
if not json_user['confirmed']:
|
if not json_user['confirmed']:
|
||||||
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
|
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
|
||||||
continue
|
continue
|
||||||
user_dir = self.data_dir / f'{json_user["id"]}'
|
user_dir = os.path.join(self.data_dir, str(json_user['id']))
|
||||||
self.convert_user(json_user, user_dir)
|
self.convert_user(json_user, user_dir)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
|
||||||
def convert_user(self, json_user: dict, user_dir: Path):
|
def convert_user(self, json_user, user_dir):
|
||||||
current_app.logger.info(f'Create User {json_user["username"]}...')
|
current_app.logger.info(f'Create User {json_user["username"]}...')
|
||||||
|
user = User(
|
||||||
|
confirmed=json_user['confirmed'],
|
||||||
|
email=json_user['email'],
|
||||||
|
last_seen=datetime.fromtimestamp(json_user['last_seen']),
|
||||||
|
member_since=datetime.fromtimestamp(json_user['member_since']),
|
||||||
|
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
|
||||||
|
username=json_user['username']
|
||||||
|
)
|
||||||
|
db.session.add(user)
|
||||||
|
db.session.flush(objects=[user])
|
||||||
|
db.session.refresh(user)
|
||||||
try:
|
try:
|
||||||
user = User.create(
|
user.makedirs()
|
||||||
confirmed=json_user['confirmed'],
|
except OSError as e:
|
||||||
email=json_user['email'],
|
current_app.logger.error(e)
|
||||||
last_seen=datetime.fromtimestamp(json_user['last_seen']),
|
db.session.rollback()
|
||||||
member_since=datetime.fromtimestamp(json_user['member_since']),
|
|
||||||
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
|
|
||||||
username=json_user['username']
|
|
||||||
)
|
|
||||||
except OSError:
|
|
||||||
raise Exception('Internal Server Error')
|
raise Exception('Internal Server Error')
|
||||||
for json_corpus in json_user['corpora'].values():
|
for json_corpus in json_user['corpora'].values():
|
||||||
if not json_corpus['files'].values():
|
if not json_corpus['files'].values():
|
||||||
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
|
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
|
||||||
continue
|
continue
|
||||||
corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}'
|
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
|
||||||
self.convert_corpus(json_corpus, user, corpus_dir)
|
self.convert_corpus(json_corpus, user, corpus_dir)
|
||||||
current_app.logger.info('Done')
|
current_app.logger.info('Done')
|
||||||
|
|
||||||
|
|
||||||
def convert_corpus(self, json_corpus: dict, user: User, corpus_dir: Path):
|
def convert_corpus(self, json_corpus, user, corpus_dir):
|
||||||
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
|
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
|
||||||
|
corpus = Corpus(
|
||||||
|
user=user,
|
||||||
|
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
|
||||||
|
description=json_corpus['description'],
|
||||||
|
title=json_corpus['title']
|
||||||
|
)
|
||||||
|
db.session.add(corpus)
|
||||||
|
db.session.flush(objects=[corpus])
|
||||||
|
db.session.refresh(corpus)
|
||||||
try:
|
try:
|
||||||
corpus = Corpus.create(
|
corpus.makedirs()
|
||||||
user=user,
|
except OSError as e:
|
||||||
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
|
current_app.logger.error(e)
|
||||||
description=json_corpus['description'],
|
db.session.rollback()
|
||||||
title=json_corpus['title']
|
|
||||||
)
|
|
||||||
except OSError:
|
|
||||||
raise Exception('Internal Server Error')
|
raise Exception('Internal Server Error')
|
||||||
for json_corpus_file in json_corpus['files'].values():
|
for json_corpus_file in json_corpus['files'].values():
|
||||||
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
|
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
|
||||||
current_app.logger.info('Done')
|
current_app.logger.info('Done')
|
||||||
|
|
||||||
|
|
||||||
def convert_corpus_file(self, json_corpus_file: dict, corpus: Corpus, corpus_dir: Path):
|
def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
|
||||||
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
|
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
|
||||||
corpus_file = CorpusFile(
|
corpus_file = CorpusFile(
|
||||||
corpus=corpus,
|
corpus=corpus,
|
||||||
@ -87,13 +99,13 @@ class SandpaperConverter:
|
|||||||
db.session.refresh(corpus_file)
|
db.session.refresh(corpus_file)
|
||||||
try:
|
try:
|
||||||
shutil.copy2(
|
shutil.copy2(
|
||||||
corpus_dir / json_corpus_file['filename'],
|
os.path.join(corpus_dir, json_corpus_file['filename']),
|
||||||
corpus_file.path
|
corpus_file.path
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
current_app.logger.warning(
|
current_app.logger.warning(
|
||||||
'Can not convert corpus file: '
|
'Can not convert corpus file: '
|
||||||
f'{corpus_dir / json_corpus_file["filename"]}'
|
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
|
||||||
' -> '
|
' -> '
|
||||||
f'{corpus_file.path}'
|
f'{corpus_file.path}'
|
||||||
)
|
)
|
||||||
|
@ -1,25 +1,69 @@
|
|||||||
from flask import current_app
|
from flask import current_app
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_vrt_file(input_file: Path, output_file: Path):
|
def normalize_vrt_file(input_file, output_file):
|
||||||
|
def check_pos_attribute_order(vrt_lines):
|
||||||
|
# The following orders are possible:
|
||||||
|
# since 26.02.2019: 'word,lemma,simple_pos,pos,ner'
|
||||||
|
# since 26.03.2021: 'word,pos,lemma,simple_pos,ner'
|
||||||
|
# since 27.01.2022: 'word,pos,lemma,simple_pos'
|
||||||
|
# This Function tries to find out which order we have by looking at the
|
||||||
|
# number of attributes and the position of the simple_pos attribute
|
||||||
|
SIMPLE_POS_LABELS = [
|
||||||
|
'ADJ', 'ADP', 'ADV', 'AUX', 'CONJ',
|
||||||
|
'DET', 'INTJ', 'NOUN', 'NUM', 'PART',
|
||||||
|
'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',
|
||||||
|
'VERB', 'X'
|
||||||
|
]
|
||||||
|
for line in vrt_lines:
|
||||||
|
if line.startswith('<'):
|
||||||
|
continue
|
||||||
|
pos_attrs = line.rstrip('\n').split('\t')
|
||||||
|
num_pos_attrs = len(pos_attrs)
|
||||||
|
if num_pos_attrs == 4:
|
||||||
|
if pos_attrs[3] in SIMPLE_POS_LABELS:
|
||||||
|
return ['word', 'pos', 'lemma', 'simple_pos']
|
||||||
|
continue
|
||||||
|
elif num_pos_attrs == 5:
|
||||||
|
if pos_attrs[2] in SIMPLE_POS_LABELS:
|
||||||
|
return ['word', 'lemma', 'simple_pos', 'pos', 'ner']
|
||||||
|
elif pos_attrs[3] in SIMPLE_POS_LABELS:
|
||||||
|
return ['word', 'pos', 'lemma', 'simple_pos', 'ner']
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def check_has_ent_as_s_attr(vrt_lines):
|
||||||
|
for line in vrt_lines:
|
||||||
|
if line.startswith('<ent'):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def pos_attrs_to_string_1(pos_attrs):
|
||||||
|
return f'{pos_attrs[0]}\t{pos_attrs[3]}\t{pos_attrs[1]}\t{pos_attrs[2]}\n'
|
||||||
|
|
||||||
|
|
||||||
|
def pos_attrs_to_string_2(pos_attrs):
|
||||||
|
return f'{pos_attrs[0]}\t{pos_attrs[1]}\t{pos_attrs[2]}\t{pos_attrs[3]}\n'
|
||||||
|
|
||||||
current_app.logger.info(f'Converting {input_file}...')
|
current_app.logger.info(f'Converting {input_file}...')
|
||||||
|
|
||||||
with input_file.open() as f:
|
with open(input_file) as f:
|
||||||
input_vrt_lines = f.readlines()
|
input_vrt_lines = f.readlines()
|
||||||
|
|
||||||
pos_attr_order = _check_pos_attribute_order(input_vrt_lines)
|
pos_attr_order = check_pos_attribute_order(input_vrt_lines)
|
||||||
has_ent_as_s_attr = _check_has_ent_as_s_attr(input_vrt_lines)
|
has_ent_as_s_attr = check_has_ent_as_s_attr(input_vrt_lines)
|
||||||
|
|
||||||
current_app.logger.info(f'Detected pos_attr_order: [{",".join(pos_attr_order)}]')
|
current_app.logger.info(f'Detected pos_attr_order: [{",".join(pos_attr_order)}]')
|
||||||
current_app.logger.info(f'Detected has_ent_as_s_attr: {has_ent_as_s_attr}')
|
current_app.logger.info(f'Detected has_ent_as_s_attr: {has_ent_as_s_attr}')
|
||||||
|
|
||||||
if pos_attr_order == ['word', 'lemma', 'simple_pos', 'pos', 'ner']:
|
if pos_attr_order == ['word', 'lemma', 'simple_pos', 'pos', 'ner']:
|
||||||
pos_attrs_to_string_function = _pos_attrs_to_string_1
|
pos_attrs_to_string_function = pos_attrs_to_string_1
|
||||||
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos', 'ner']:
|
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos', 'ner']:
|
||||||
pos_attrs_to_string_function = _pos_attrs_to_string_2
|
pos_attrs_to_string_function = pos_attrs_to_string_2
|
||||||
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos']:
|
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos']:
|
||||||
pos_attrs_to_string_function = _pos_attrs_to_string_2
|
pos_attrs_to_string_function = pos_attrs_to_string_2
|
||||||
else:
|
else:
|
||||||
raise Exception('Can not handle format')
|
raise Exception('Can not handle format')
|
||||||
|
|
||||||
@ -69,49 +113,5 @@ def normalize_vrt_file(input_file: Path, output_file: Path):
|
|||||||
current_ent = pos_attrs[4]
|
current_ent = pos_attrs[4]
|
||||||
output_vrt += pos_attrs_to_string_function(pos_attrs)
|
output_vrt += pos_attrs_to_string_function(pos_attrs)
|
||||||
|
|
||||||
with output_file.open(mode='w') as f:
|
with open(output_file, 'w') as f:
|
||||||
f.write(output_vrt)
|
f.write(output_vrt)
|
||||||
|
|
||||||
|
|
||||||
def _check_pos_attribute_order(vrt_lines: list[str]) -> list[str]:
|
|
||||||
# The following orders are possible:
|
|
||||||
# since 26.02.2019: 'word,lemma,simple_pos,pos,ner'
|
|
||||||
# since 26.03.2021: 'word,pos,lemma,simple_pos,ner'
|
|
||||||
# since 27.01.2022: 'word,pos,lemma,simple_pos'
|
|
||||||
# This Function tries to find out which order we have by looking at the
|
|
||||||
# number of attributes and the position of the simple_pos attribute
|
|
||||||
SIMPLE_POS_LABELS = [
|
|
||||||
'ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM',
|
|
||||||
'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X'
|
|
||||||
]
|
|
||||||
for line in vrt_lines:
|
|
||||||
if line.startswith('<'):
|
|
||||||
continue
|
|
||||||
pos_attrs = line.rstrip('\n').split('\t')
|
|
||||||
num_pos_attrs = len(pos_attrs)
|
|
||||||
if num_pos_attrs == 4:
|
|
||||||
if pos_attrs[3] in SIMPLE_POS_LABELS:
|
|
||||||
return ['word', 'pos', 'lemma', 'simple_pos']
|
|
||||||
continue
|
|
||||||
elif num_pos_attrs == 5:
|
|
||||||
if pos_attrs[2] in SIMPLE_POS_LABELS:
|
|
||||||
return ['word', 'lemma', 'simple_pos', 'pos', 'ner']
|
|
||||||
elif pos_attrs[3] in SIMPLE_POS_LABELS:
|
|
||||||
return ['word', 'pos', 'lemma', 'simple_pos', 'ner']
|
|
||||||
continue
|
|
||||||
# TODO: raise exception "can't determine attribute order"
|
|
||||||
|
|
||||||
|
|
||||||
def _check_has_ent_as_s_attr(vrt_lines: list[str]) -> bool:
|
|
||||||
for line in vrt_lines:
|
|
||||||
if line.startswith('<ent'):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _pos_attrs_to_string_1(pos_attrs: list[str]) -> str:
|
|
||||||
return f'{pos_attrs[0]}\t{pos_attrs[3]}\t{pos_attrs[1]}\t{pos_attrs[2]}\n'
|
|
||||||
|
|
||||||
|
|
||||||
def _pos_attrs_to_string_2(pos_attrs: list[str]) -> str:
|
|
||||||
return f'{pos_attrs[0]}\t{pos_attrs[1]}\t{pos_attrs[2]}\t{pos_attrs[3]}\n'
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from flask import current_app
|
from app.models import Corpus, CorpusStatus
|
||||||
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import Corpus, CorpusStatus
|
|
||||||
from . import bp
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
@ -18,17 +18,10 @@ def reset():
|
|||||||
]
|
]
|
||||||
for corpus in [x for x in Corpus.query.all() if x.status in status]:
|
for corpus in [x for x in Corpus.query.all() if x.status in status]:
|
||||||
print(f'Resetting corpus {corpus}')
|
print(f'Resetting corpus {corpus}')
|
||||||
corpus_cwb_dir = corpus.path / 'cwb'
|
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
|
||||||
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
|
os.mkdir(os.path.join(corpus.path, 'cwb'))
|
||||||
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
|
os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
|
||||||
try:
|
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
|
||||||
shutil.rmtree(corpus.path / 'cwb', ignore_errors=True)
|
|
||||||
corpus_cwb_dir.mkdir()
|
|
||||||
corpus_cwb_data_dir.mkdir()
|
|
||||||
corpus_cwb_registry_dir.mkdir()
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
raise
|
|
||||||
corpus.status = CorpusStatus.UNPREPARED
|
corpus.status = CorpusStatus.UNPREPARED
|
||||||
corpus.num_analysis_sessions = 0
|
corpus.num_analysis_sessions = 0
|
||||||
db.session.commit()
|
db.session.commit()
|
@ -1,16 +1,17 @@
|
|||||||
from cqi import CQiClient
|
from cqi import CQiClient
|
||||||
from cqi.errors import CQiException
|
from cqi.errors import CQiException
|
||||||
from cqi.status import CQiStatus
|
from cqi.status import CQiStatus
|
||||||
from flask import current_app
|
from docker.models.containers import Container
|
||||||
|
from flask import current_app, session
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from flask_socketio import Namespace
|
from flask_socketio import Namespace
|
||||||
from inspect import signature
|
from inspect import signature
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
|
from typing import Callable, Dict, List, Optional
|
||||||
from app import db, docker_client, hashids, socketio
|
from app import db, docker_client, hashids, socketio
|
||||||
from app.decorators import socketio_login_required
|
from app.decorators import socketio_login_required
|
||||||
from app.models import Corpus, CorpusStatus
|
from app.models import Corpus, CorpusStatus
|
||||||
from . import cqi_extension_functions
|
from . import extensions
|
||||||
from .utils import SessionManager
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -18,7 +19,7 @@ This package tunnels the Corpus Query interface (CQi) protocol through
|
|||||||
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
|
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
|
||||||
|
|
||||||
Basic concept:
|
Basic concept:
|
||||||
1. A client connects to the namespace.
|
1. A client connects to the "/cqi_over_sio" namespace.
|
||||||
2. The client emits the "init" event and provides a corpus id for the corpus
|
2. The client emits the "init" event and provides a corpus id for the corpus
|
||||||
that should be analysed in this session.
|
that should be analysed in this session.
|
||||||
1.1 The analysis session counter of the corpus is incremented.
|
1.1 The analysis session counter of the corpus is incremented.
|
||||||
@ -27,17 +28,17 @@ Basic concept:
|
|||||||
1.4 Connect the CQiClient to the server.
|
1.4 Connect the CQiClient to the server.
|
||||||
1.5 Save the CQiClient, the Lock and the corpus id in the session for
|
1.5 Save the CQiClient, the Lock and the corpus id in the session for
|
||||||
subsequential use.
|
subsequential use.
|
||||||
3. The client emits "exec" events, within which it provides the name of a CQi
|
2. The client emits the "exec" event provides the name of a CQi API function
|
||||||
API function and the corresponding arguments.
|
arguments (optional).
|
||||||
3.1 The "exec" event handler will execute the function, make sure that
|
- The event "exec" handler will execute the function, make sure that the
|
||||||
the result is serializable and returns the result back to the client.
|
result is serializable and returns the result back to the client.
|
||||||
4. The client disconnects from the namespace
|
4. Wait for more events
|
||||||
4.1 The analysis session counter of the corpus is decremented.
|
5. The client disconnects from the "/cqi_over_sio" namespace
|
||||||
4.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
1.1 The analysis session counter of the corpus is decremented.
|
||||||
|
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
CQI_API_FUNCTION_NAMES: List[str] = [
|
||||||
CQI_API_FUNCTION_NAMES = [
|
|
||||||
'ask_feature_cl_2_3',
|
'ask_feature_cl_2_3',
|
||||||
'ask_feature_cqi_1_0',
|
'ask_feature_cqi_1_0',
|
||||||
'ask_feature_cqp_2_3',
|
'ask_feature_cqp_2_3',
|
||||||
@ -85,90 +86,68 @@ CQI_API_FUNCTION_NAMES = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
CQI_EXTENSION_FUNCTION_NAMES = [
|
class CQiNamespace(Namespace):
|
||||||
'ext_corpus_update_db',
|
|
||||||
'ext_corpus_static_data',
|
|
||||||
'ext_corpus_paginate_corpus',
|
|
||||||
'ext_cqp_paginate_subcorpus',
|
|
||||||
'ext_cqp_partial_export_subcorpus',
|
|
||||||
'ext_cqp_export_subcorpus',
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class CQiOverSocketIONamespace(Namespace):
|
|
||||||
@socketio_login_required
|
@socketio_login_required
|
||||||
def on_connect(self):
|
def on_connect(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@socketio_login_required
|
@socketio_login_required
|
||||||
def on_init(self, corpus_hashid: str) -> dict:
|
def on_init(self, db_corpus_hashid: str):
|
||||||
corpus_id = hashids.decode(corpus_hashid)
|
db_corpus_id: int = hashids.decode(db_corpus_hashid)
|
||||||
|
db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
|
||||||
if not isinstance(corpus_id, int):
|
if db_corpus is None:
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
|
||||||
|
|
||||||
corpus = Corpus.query.get(corpus_id)
|
|
||||||
|
|
||||||
if corpus is None:
|
|
||||||
return {'code': 404, 'msg': 'Not Found'}
|
return {'code': 404, 'msg': 'Not Found'}
|
||||||
|
if not (db_corpus.user == current_user
|
||||||
if not (
|
or current_user.is_following_corpus(db_corpus)
|
||||||
corpus.user == current_user
|
or current_user.is_administrator()):
|
||||||
or current_user.is_following_corpus(corpus)
|
|
||||||
or current_user.is_administrator
|
|
||||||
):
|
|
||||||
return {'code': 403, 'msg': 'Forbidden'}
|
return {'code': 403, 'msg': 'Forbidden'}
|
||||||
|
if db_corpus.status not in [
|
||||||
if corpus.status not in [
|
|
||||||
CorpusStatus.BUILT,
|
CorpusStatus.BUILT,
|
||||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
||||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
||||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||||
]:
|
]:
|
||||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||||
|
if db_corpus.num_analysis_sessions is None:
|
||||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
db_corpus.num_analysis_sessions = 0
|
||||||
|
db.session.commit()
|
||||||
|
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
retry_counter = 20
|
retry_counter: int = 20
|
||||||
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
while db_corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
||||||
if retry_counter == 0:
|
if retry_counter == 0:
|
||||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return {'code': 408, 'msg': 'Request Timeout'}
|
return {'code': 408, 'msg': 'Request Timeout'}
|
||||||
socketio.sleep(3)
|
socketio.sleep(3)
|
||||||
retry_counter -= 1
|
retry_counter -= 1
|
||||||
db.session.refresh(corpus)
|
db.session.refresh(db_corpus)
|
||||||
|
# cqi_client: CQiClient = CQiClient(f'cqpserver_{db_corpus_id}')
|
||||||
cqpserver_container_name = f'nopaque-cqpserver-{corpus_id}'
|
cqpserver_container_name: str = f'cqpserver_{db_corpus_id}'
|
||||||
cqpserver_container = docker_client.containers.get(cqpserver_container_name)
|
cqpserver_container: Container = docker_client.containers.get(cqpserver_container_name)
|
||||||
cqpserver_ip_address = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
|
cqpserver_host: str = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
|
||||||
cqi_client = CQiClient(cqpserver_ip_address)
|
cqi_client: CQiClient = CQiClient(cqpserver_host)
|
||||||
cqi_client_lock = Lock()
|
session['cqi_over_sio'] = {
|
||||||
|
'cqi_client': cqi_client,
|
||||||
SessionManager.setup()
|
'cqi_client_lock': Lock(),
|
||||||
SessionManager.set_corpus_id(corpus_id)
|
'db_corpus_id': db_corpus_id
|
||||||
SessionManager.set_cqi_client(cqi_client)
|
}
|
||||||
SessionManager.set_cqi_client_lock(cqi_client_lock)
|
|
||||||
|
|
||||||
return {'code': 200, 'msg': 'OK'}
|
return {'code': 200, 'msg': 'OK'}
|
||||||
|
|
||||||
@socketio_login_required
|
@socketio_login_required
|
||||||
def on_exec(self, fn_name: str, fn_args: dict = {}) -> dict:
|
def on_exec(self, fn_name: str, fn_args: Dict = {}):
|
||||||
try:
|
try:
|
||||||
cqi_client = SessionManager.get_cqi_client()
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
cqi_client_lock = SessionManager.get_cqi_client_lock()
|
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||||
|
|
||||||
if fn_name in CQI_API_FUNCTION_NAMES:
|
if fn_name in CQI_API_FUNCTION_NAMES:
|
||||||
fn = getattr(cqi_client.api, fn_name)
|
fn: Callable = getattr(cqi_client.api, fn_name)
|
||||||
elif fn_name in CQI_EXTENSION_FUNCTION_NAMES:
|
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
|
||||||
fn = getattr(cqi_extension_functions, fn_name)
|
fn: Callable = getattr(extensions, fn_name)
|
||||||
else:
|
else:
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
return {'code': 400, 'msg': 'Bad Request'}
|
||||||
|
|
||||||
for param in signature(fn).parameters.values():
|
for param in signature(fn).parameters.values():
|
||||||
# Check if the parameter is optional or required
|
|
||||||
if param.default is param.empty:
|
if param.default is param.empty:
|
||||||
if param.name not in fn_args:
|
if param.name not in fn_args:
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
return {'code': 400, 'msg': 'Bad Request'}
|
||||||
@ -177,7 +156,6 @@ class CQiOverSocketIONamespace(Namespace):
|
|||||||
continue
|
continue
|
||||||
if type(fn_args[param.name]) is not param.annotation:
|
if type(fn_args[param.name]) is not param.annotation:
|
||||||
return {'code': 400, 'msg': 'Bad Request'}
|
return {'code': 400, 'msg': 'Bad Request'}
|
||||||
|
|
||||||
cqi_client_lock.acquire()
|
cqi_client_lock.acquire()
|
||||||
try:
|
try:
|
||||||
fn_return_value = fn(**fn_args)
|
fn_return_value = fn(**fn_args)
|
||||||
@ -195,7 +173,6 @@ class CQiOverSocketIONamespace(Namespace):
|
|||||||
}
|
}
|
||||||
finally:
|
finally:
|
||||||
cqi_client_lock.release()
|
cqi_client_lock.release()
|
||||||
|
|
||||||
if isinstance(fn_return_value, CQiStatus):
|
if isinstance(fn_return_value, CQiStatus):
|
||||||
payload = {
|
payload = {
|
||||||
'code': fn_return_value.code,
|
'code': fn_return_value.code,
|
||||||
@ -203,31 +180,27 @@ class CQiOverSocketIONamespace(Namespace):
|
|||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
payload = fn_return_value
|
payload = fn_return_value
|
||||||
|
|
||||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||||
|
|
||||||
def on_disconnect(self):
|
def on_disconnect(self):
|
||||||
try:
|
try:
|
||||||
corpus_id = SessionManager.get_corpus_id()
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
cqi_client = SessionManager.get_cqi_client()
|
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||||
cqi_client_lock = SessionManager.get_cqi_client_lock()
|
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||||
SessionManager.teardown()
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return
|
return
|
||||||
|
|
||||||
cqi_client_lock.acquire()
|
cqi_client_lock.acquire()
|
||||||
|
try:
|
||||||
|
session.pop('cqi_over_sio')
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
try:
|
try:
|
||||||
cqi_client.api.ctrl_bye()
|
cqi_client.api.ctrl_bye()
|
||||||
except (BrokenPipeError, CQiException):
|
except (BrokenPipeError, CQiException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
cqi_client_lock.release()
|
cqi_client_lock.release()
|
||||||
|
db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
|
||||||
corpus = Corpus.query.get(corpus_id)
|
if db_corpus is None:
|
||||||
|
|
||||||
if corpus is None:
|
|
||||||
return
|
return
|
||||||
|
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
|
||||||
db.session.commit()
|
db.session.commit()
|
@ -1,39 +1,56 @@
|
|||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
from cqi import CQiClient
|
||||||
from cqi.models.corpora import Corpus as CQiCorpus
|
from cqi.models.corpora import Corpus as CQiCorpus
|
||||||
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
|
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
|
||||||
|
from cqi.models.attributes import (
|
||||||
|
PositionalAttribute as CQiPositionalAttribute,
|
||||||
|
StructuralAttribute as CQiStructuralAttribute
|
||||||
|
)
|
||||||
from cqi.status import StatusOk as CQiStatusOk
|
from cqi.status import StatusOk as CQiStatusOk
|
||||||
from flask import current_app
|
from flask import session
|
||||||
|
from typing import Dict, List
|
||||||
import gzip
|
import gzip
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
|
import os
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import Corpus
|
from app.models import Corpus
|
||||||
from .utils import SessionManager
|
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
|
||||||
|
|
||||||
|
|
||||||
|
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
|
||||||
|
'ext_corpus_update_db',
|
||||||
|
'ext_corpus_static_data',
|
||||||
|
'ext_corpus_paginate_corpus',
|
||||||
|
'ext_cqp_paginate_subcorpus',
|
||||||
|
'ext_cqp_partial_export_subcorpus',
|
||||||
|
'ext_cqp_export_subcorpus',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
|
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
|
||||||
corpus_id = SessionManager.get_corpus_id()
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
cqi_client = SessionManager.get_cqi_client()
|
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||||
db_corpus = Corpus.query.get(corpus_id)
|
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus)
|
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
|
||||||
db_corpus.num_tokens = cqi_corpus.size
|
db_corpus.num_tokens = cqi_corpus.size
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return CQiStatusOk()
|
return CQiStatusOk()
|
||||||
|
|
||||||
|
|
||||||
def ext_corpus_static_data(corpus: str) -> dict:
|
def ext_corpus_static_data(corpus: str) -> Dict:
|
||||||
corpus_id = SessionManager.get_corpus_id()
|
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||||
db_corpus = Corpus.query.get(corpus_id)
|
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
|
||||||
|
|
||||||
static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz'
|
static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
|
||||||
if static_data_file_path.exists():
|
if os.path.exists(static_data_file_path):
|
||||||
with static_data_file_path.open('rb') as f:
|
with open(static_data_file_path, 'rb') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
cqi_client = SessionManager.get_cqi_client()
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus)
|
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
|
||||||
cqi_p_attrs = cqi_corpus.positional_attributes.list()
|
cqi_p_attrs: List[CQiPositionalAttribute] = cqi_corpus.positional_attributes.list()
|
||||||
cqi_s_attrs = cqi_corpus.structural_attributes.list()
|
cqi_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list()
|
||||||
|
|
||||||
static_data = {
|
static_data = {
|
||||||
'corpus': {
|
'corpus': {
|
||||||
@ -46,21 +63,21 @@ def ext_corpus_static_data(corpus: str) -> dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
for p_attr in cqi_p_attrs:
|
for p_attr in cqi_p_attrs:
|
||||||
current_app.logger.info(f'corpus.freqs.{p_attr.name}')
|
print(f'corpus.freqs.{p_attr.name}')
|
||||||
static_data['corpus']['freqs'][p_attr.name] = []
|
static_data['corpus']['freqs'][p_attr.name] = []
|
||||||
p_attr_id_list = list(range(p_attr.lexicon_size))
|
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
|
||||||
static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list))
|
static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list))
|
||||||
del p_attr_id_list
|
del p_attr_id_list
|
||||||
|
|
||||||
current_app.logger.info(f'p_attrs.{p_attr.name}')
|
print(f'p_attrs.{p_attr.name}')
|
||||||
static_data['p_attrs'][p_attr.name] = []
|
static_data['p_attrs'][p_attr.name] = []
|
||||||
cpos_list = list(range(cqi_corpus.size))
|
cpos_list: List[int] = list(range(cqi_corpus.size))
|
||||||
static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list))
|
static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list))
|
||||||
del cpos_list
|
del cpos_list
|
||||||
|
|
||||||
current_app.logger.info(f'values.p_attrs.{p_attr.name}')
|
print(f'values.p_attrs.{p_attr.name}')
|
||||||
static_data['values']['p_attrs'][p_attr.name] = []
|
static_data['values']['p_attrs'][p_attr.name] = []
|
||||||
p_attr_id_list = list(range(p_attr.lexicon_size))
|
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
|
||||||
static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list))
|
static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list))
|
||||||
del p_attr_id_list
|
del p_attr_id_list
|
||||||
|
|
||||||
@ -76,9 +93,9 @@ def ext_corpus_static_data(corpus: str) -> dict:
|
|||||||
# Note: Needs more testing, don't use it in production #
|
# Note: Needs more testing, don't use it in production #
|
||||||
##############################################################
|
##############################################################
|
||||||
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
|
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
|
cqi_subcorpus: CQiSubcorpus = cqi_corpus.subcorpora.get('Last')
|
||||||
first_match = 0
|
first_match: int = 0
|
||||||
last_match = cqi_subcorpus.size - 1
|
last_match: int = cqi_subcorpus.size - 1
|
||||||
match_boundaries = zip(
|
match_boundaries = zip(
|
||||||
range(first_match, last_match + 1),
|
range(first_match, last_match + 1),
|
||||||
cqi_subcorpus.dump(
|
cqi_subcorpus.dump(
|
||||||
@ -96,7 +113,7 @@ def ext_corpus_static_data(corpus: str) -> dict:
|
|||||||
del cqi_subcorpus, first_match, last_match
|
del cqi_subcorpus, first_match, last_match
|
||||||
for id, lbound, rbound in match_boundaries:
|
for id, lbound, rbound in match_boundaries:
|
||||||
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
|
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
|
||||||
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
||||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||||
del match_boundaries
|
del match_boundaries
|
||||||
|
|
||||||
@ -108,33 +125,33 @@ def ext_corpus_static_data(corpus: str) -> dict:
|
|||||||
# This is a very slow operation, thats why we only use it for
|
# This is a very slow operation, thats why we only use it for
|
||||||
# the text attribute
|
# the text attribute
|
||||||
lbound, rbound = s_attr.cpos_by_id(id)
|
lbound, rbound = s_attr.cpos_by_id(id)
|
||||||
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
||||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
||||||
cpos_list = list(range(lbound, rbound + 1))
|
cpos_list: List[int] = list(range(lbound, rbound + 1))
|
||||||
for p_attr in cqi_p_attrs:
|
for p_attr in cqi_p_attrs:
|
||||||
p_attr_ids = []
|
p_attr_ids: List[int] = []
|
||||||
p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list))
|
p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list))
|
||||||
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
|
print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
|
||||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
|
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
|
||||||
del p_attr_ids
|
del p_attr_ids
|
||||||
del cpos_list
|
del cpos_list
|
||||||
|
|
||||||
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
sub_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
||||||
current_app.logger.info(f's_attrs.{s_attr.name}.values')
|
print(f's_attrs.{s_attr.name}.values')
|
||||||
static_data['s_attrs'][s_attr.name]['values'] = [
|
static_data['s_attrs'][s_attr.name]['values'] = [
|
||||||
sub_s_attr.name[(len(s_attr.name) + 1):]
|
sub_s_attr.name[(len(s_attr.name) + 1):]
|
||||||
for sub_s_attr in sub_s_attrs
|
for sub_s_attr in sub_s_attrs
|
||||||
]
|
]
|
||||||
s_attr_id_list = list(range(s_attr.size))
|
s_attr_id_list: List[int] = list(range(s_attr.size))
|
||||||
sub_s_attr_values = []
|
sub_s_attr_values: List[str] = []
|
||||||
for sub_s_attr in sub_s_attrs:
|
for sub_s_attr in sub_s_attrs:
|
||||||
tmp = []
|
tmp = []
|
||||||
tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list))
|
tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list))
|
||||||
sub_s_attr_values.append(tmp)
|
sub_s_attr_values.append(tmp)
|
||||||
del tmp
|
del tmp
|
||||||
del s_attr_id_list
|
del s_attr_id_list
|
||||||
current_app.logger.info(f'values.s_attrs.{s_attr.name}')
|
print(f'values.s_attrs.{s_attr.name}')
|
||||||
static_data['values']['s_attrs'][s_attr.name] = [
|
static_data['values']['s_attrs'][s_attr.name] = [
|
||||||
{
|
{
|
||||||
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id]
|
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id]
|
||||||
@ -144,11 +161,11 @@ def ext_corpus_static_data(corpus: str) -> dict:
|
|||||||
} for s_attr_id in range(0, s_attr.size)
|
} for s_attr_id in range(0, s_attr.size)
|
||||||
]
|
]
|
||||||
del sub_s_attr_values
|
del sub_s_attr_values
|
||||||
current_app.logger.info('Saving static data to file')
|
print('Saving static data to file')
|
||||||
with gzip.open(static_data_file_path, 'wt') as f:
|
with gzip.open(static_data_file_path, 'wt') as f:
|
||||||
json.dump(static_data, f)
|
json.dump(static_data, f)
|
||||||
del static_data
|
del static_data
|
||||||
current_app.logger.info('Sending static data to client')
|
print('Sending static data to client')
|
||||||
with open(static_data_file_path, 'rb') as f:
|
with open(static_data_file_path, 'rb') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
@ -157,8 +174,8 @@ def ext_corpus_paginate_corpus(
|
|||||||
corpus: str,
|
corpus: str,
|
||||||
page: int = 1,
|
page: int = 1,
|
||||||
per_page: int = 20
|
per_page: int = 20
|
||||||
) -> dict:
|
) -> Dict:
|
||||||
cqi_client = SessionManager.get_cqi_client()
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus)
|
cqi_corpus = cqi_client.corpora.get(corpus)
|
||||||
# Sanity checks
|
# Sanity checks
|
||||||
if (
|
if (
|
||||||
@ -173,7 +190,7 @@ def ext_corpus_paginate_corpus(
|
|||||||
first_cpos = (page - 1) * per_page
|
first_cpos = (page - 1) * per_page
|
||||||
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
|
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
|
||||||
cpos_list = [*range(first_cpos, last_cpos)]
|
cpos_list = [*range(first_cpos, last_cpos)]
|
||||||
lookups = _lookups_by_cpos(cqi_corpus, cpos_list)
|
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
|
||||||
payload = {}
|
payload = {}
|
||||||
# the items for the current page
|
# the items for the current page
|
||||||
payload['items'] = [cpos_list]
|
payload['items'] = [cpos_list]
|
||||||
@ -203,9 +220,9 @@ def ext_cqp_paginate_subcorpus(
|
|||||||
context: int = 50,
|
context: int = 50,
|
||||||
page: int = 1,
|
page: int = 1,
|
||||||
per_page: int = 20
|
per_page: int = 20
|
||||||
) -> dict:
|
) -> Dict:
|
||||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||||
cqi_client = SessionManager.get_cqi_client()
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
# Sanity checks
|
# Sanity checks
|
||||||
@ -220,7 +237,7 @@ def ext_cqp_paginate_subcorpus(
|
|||||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||||
offset = (page - 1) * per_page
|
offset = (page - 1) * per_page
|
||||||
cutoff = per_page
|
cutoff = per_page
|
||||||
cqi_results_export = _export_subcorpus(
|
cqi_results_export = export_subcorpus(
|
||||||
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
||||||
payload = {}
|
payload = {}
|
||||||
# the items for the current page
|
# the items for the current page
|
||||||
@ -250,147 +267,22 @@ def ext_cqp_partial_export_subcorpus(
|
|||||||
subcorpus: str,
|
subcorpus: str,
|
||||||
match_id_list: list,
|
match_id_list: list,
|
||||||
context: int = 50
|
context: int = 50
|
||||||
) -> dict:
|
) -> Dict:
|
||||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||||
cqi_client = SessionManager.get_cqi_client()
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
cqi_subcorpus_partial_export = _partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
||||||
return cqi_subcorpus_partial_export
|
return cqi_subcorpus_partial_export
|
||||||
|
|
||||||
|
|
||||||
def ext_cqp_export_subcorpus(subcorpus: str, context: int = 50) -> dict:
|
def ext_cqp_export_subcorpus(
|
||||||
|
subcorpus: str,
|
||||||
|
context: int = 50
|
||||||
|
) -> Dict:
|
||||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||||
cqi_client = SessionManager.get_cqi_client()
|
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||||
cqi_subcorpus_export = _export_subcorpus(cqi_subcorpus, context=context)
|
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
||||||
return cqi_subcorpus_export
|
return cqi_subcorpus_export
|
||||||
|
|
||||||
|
|
||||||
def _lookups_by_cpos(corpus: CQiCorpus, cpos_list: list[int]) -> dict:
|
|
||||||
lookups = {}
|
|
||||||
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
|
||||||
for attr in corpus.positional_attributes.list():
|
|
||||||
cpos_attr_values = attr.values_by_cpos(cpos_list)
|
|
||||||
for i, cpos in enumerate(cpos_list):
|
|
||||||
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
|
|
||||||
for attr in corpus.structural_attributes.list():
|
|
||||||
# We only want to iterate over non subattributes, identifiable by
|
|
||||||
# attr.has_values == False
|
|
||||||
if attr.has_values:
|
|
||||||
continue
|
|
||||||
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
|
|
||||||
for i, cpos in enumerate(cpos_list):
|
|
||||||
if cpos_attr_ids[i] == -1:
|
|
||||||
continue
|
|
||||||
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
|
|
||||||
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
|
||||||
if len(occured_attr_ids) == 0:
|
|
||||||
continue
|
|
||||||
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
|
||||||
if len(subattrs) == 0:
|
|
||||||
continue
|
|
||||||
lookup_name = f'{attr.name}_lookup'
|
|
||||||
lookups[lookup_name] = {}
|
|
||||||
for attr_id in occured_attr_ids:
|
|
||||||
lookups[lookup_name][attr_id] = {}
|
|
||||||
for subattr in subattrs:
|
|
||||||
subattr_name = subattr.name[(len(attr.name) + 1):] # noqa
|
|
||||||
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
|
||||||
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
|
||||||
return lookups
|
|
||||||
|
|
||||||
|
|
||||||
def _partial_export_subcorpus(
|
|
||||||
subcorpus: CQiSubcorpus,
|
|
||||||
match_id_list: list[int],
|
|
||||||
context: int = 25
|
|
||||||
) -> dict:
|
|
||||||
if subcorpus.size == 0:
|
|
||||||
return {'matches': []}
|
|
||||||
match_boundaries = []
|
|
||||||
for match_id in match_id_list:
|
|
||||||
if match_id < 0 or match_id >= subcorpus.size:
|
|
||||||
continue
|
|
||||||
match_boundaries.append(
|
|
||||||
(
|
|
||||||
match_id,
|
|
||||||
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
|
|
||||||
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
cpos_set = set()
|
|
||||||
matches = []
|
|
||||||
for match_boundary in match_boundaries:
|
|
||||||
match_num, match_start, match_end = match_boundary
|
|
||||||
c = (match_start, match_end)
|
|
||||||
if match_start == 0 or context == 0:
|
|
||||||
lc = None
|
|
||||||
cpos_list_lbound = match_start
|
|
||||||
else:
|
|
||||||
lc_lbound = max(0, (match_start - context))
|
|
||||||
lc_rbound = match_start - 1
|
|
||||||
lc = (lc_lbound, lc_rbound)
|
|
||||||
cpos_list_lbound = lc_lbound
|
|
||||||
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
|
||||||
rc = None
|
|
||||||
cpos_list_rbound = match_end
|
|
||||||
else:
|
|
||||||
rc_lbound = match_end + 1
|
|
||||||
rc_rbound = min(
|
|
||||||
(match_end + context),
|
|
||||||
(subcorpus.collection.corpus.size - 1)
|
|
||||||
)
|
|
||||||
rc = (rc_lbound, rc_rbound)
|
|
||||||
cpos_list_rbound = rc_rbound
|
|
||||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
|
||||||
matches.append(match)
|
|
||||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
|
||||||
lookups = _lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
|
||||||
return {'matches': matches, **lookups}
|
|
||||||
|
|
||||||
|
|
||||||
def _export_subcorpus(
|
|
||||||
subcorpus: CQiSubcorpus,
|
|
||||||
context: int = 25,
|
|
||||||
cutoff: float = float('inf'),
|
|
||||||
offset: int = 0
|
|
||||||
) -> dict:
|
|
||||||
if subcorpus.size == 0:
|
|
||||||
return {'matches': []}
|
|
||||||
first_match = max(0, offset)
|
|
||||||
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
|
|
||||||
match_boundaries = zip(
|
|
||||||
range(first_match, last_match + 1),
|
|
||||||
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
|
|
||||||
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
|
|
||||||
)
|
|
||||||
cpos_set = set()
|
|
||||||
matches = []
|
|
||||||
for match_num, match_start, match_end in match_boundaries:
|
|
||||||
c = (match_start, match_end)
|
|
||||||
if match_start == 0 or context == 0:
|
|
||||||
lc = None
|
|
||||||
cpos_list_lbound = match_start
|
|
||||||
else:
|
|
||||||
lc_lbound = max(0, (match_start - context))
|
|
||||||
lc_rbound = match_start - 1
|
|
||||||
lc = (lc_lbound, lc_rbound)
|
|
||||||
cpos_list_lbound = lc_lbound
|
|
||||||
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
|
||||||
rc = None
|
|
||||||
cpos_list_rbound = match_end
|
|
||||||
else:
|
|
||||||
rc_lbound = match_end + 1
|
|
||||||
rc_rbound = min(
|
|
||||||
(match_end + context),
|
|
||||||
(subcorpus.collection.corpus.size - 1)
|
|
||||||
)
|
|
||||||
rc = (rc_lbound, rc_rbound)
|
|
||||||
cpos_list_rbound = rc_rbound
|
|
||||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
|
||||||
matches.append(match)
|
|
||||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
|
||||||
lookups = _lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
|
||||||
return {'matches': matches, **lookups}
|
|
131
app/corpora/cqi_over_sio/utils.py
Normal file
131
app/corpora/cqi_over_sio/utils.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
from cqi.models.corpora import Corpus as CQiCorpus
|
||||||
|
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
|
||||||
|
def lookups_by_cpos(corpus: CQiCorpus, cpos_list: List[int]) -> Dict:
|
||||||
|
lookups = {}
|
||||||
|
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
||||||
|
for attr in corpus.positional_attributes.list():
|
||||||
|
cpos_attr_values: List[str] = attr.values_by_cpos(cpos_list)
|
||||||
|
for i, cpos in enumerate(cpos_list):
|
||||||
|
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
|
||||||
|
for attr in corpus.structural_attributes.list():
|
||||||
|
# We only want to iterate over non subattributes, identifiable by
|
||||||
|
# attr.has_values == False
|
||||||
|
if attr.has_values:
|
||||||
|
continue
|
||||||
|
cpos_attr_ids: List[int] = attr.ids_by_cpos(cpos_list)
|
||||||
|
for i, cpos in enumerate(cpos_list):
|
||||||
|
if cpos_attr_ids[i] == -1:
|
||||||
|
continue
|
||||||
|
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
|
||||||
|
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
||||||
|
if len(occured_attr_ids) == 0:
|
||||||
|
continue
|
||||||
|
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
||||||
|
if len(subattrs) == 0:
|
||||||
|
continue
|
||||||
|
lookup_name: str = f'{attr.name}_lookup'
|
||||||
|
lookups[lookup_name] = {}
|
||||||
|
for attr_id in occured_attr_ids:
|
||||||
|
lookups[lookup_name][attr_id] = {}
|
||||||
|
for subattr in subattrs:
|
||||||
|
subattr_name = subattr.name[(len(attr.name) + 1):] # noqa
|
||||||
|
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
||||||
|
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
||||||
|
return lookups
|
||||||
|
|
||||||
|
|
||||||
|
def partial_export_subcorpus(
|
||||||
|
subcorpus: CQiSubcorpus,
|
||||||
|
match_id_list: List[int],
|
||||||
|
context: int = 25
|
||||||
|
) -> Dict:
|
||||||
|
if subcorpus.size == 0:
|
||||||
|
return {"matches": []}
|
||||||
|
match_boundaries = []
|
||||||
|
for match_id in match_id_list:
|
||||||
|
if match_id < 0 or match_id >= subcorpus.size:
|
||||||
|
continue
|
||||||
|
match_boundaries.append(
|
||||||
|
(
|
||||||
|
match_id,
|
||||||
|
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
|
||||||
|
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
cpos_set = set()
|
||||||
|
matches = []
|
||||||
|
for match_boundary in match_boundaries:
|
||||||
|
match_num, match_start, match_end = match_boundary
|
||||||
|
c = (match_start, match_end)
|
||||||
|
if match_start == 0 or context == 0:
|
||||||
|
lc = None
|
||||||
|
cpos_list_lbound = match_start
|
||||||
|
else:
|
||||||
|
lc_lbound = max(0, (match_start - context))
|
||||||
|
lc_rbound = match_start - 1
|
||||||
|
lc = (lc_lbound, lc_rbound)
|
||||||
|
cpos_list_lbound = lc_lbound
|
||||||
|
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||||
|
rc = None
|
||||||
|
cpos_list_rbound = match_end
|
||||||
|
else:
|
||||||
|
rc_lbound = match_end + 1
|
||||||
|
rc_rbound = min(
|
||||||
|
(match_end + context),
|
||||||
|
(subcorpus.collection.corpus.size - 1)
|
||||||
|
)
|
||||||
|
rc = (rc_lbound, rc_rbound)
|
||||||
|
cpos_list_rbound = rc_rbound
|
||||||
|
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||||
|
matches.append(match)
|
||||||
|
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||||
|
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||||
|
return {'matches': matches, **lookups}
|
||||||
|
|
||||||
|
|
||||||
|
def export_subcorpus(
|
||||||
|
subcorpus: CQiSubcorpus,
|
||||||
|
context: int = 25,
|
||||||
|
cutoff: float = float('inf'),
|
||||||
|
offset: int = 0
|
||||||
|
) -> Dict:
|
||||||
|
if subcorpus.size == 0:
|
||||||
|
return {"matches": []}
|
||||||
|
first_match = max(0, offset)
|
||||||
|
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
|
||||||
|
match_boundaries = zip(
|
||||||
|
range(first_match, last_match + 1),
|
||||||
|
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
|
||||||
|
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
|
||||||
|
)
|
||||||
|
cpos_set = set()
|
||||||
|
matches = []
|
||||||
|
for match_num, match_start, match_end in match_boundaries:
|
||||||
|
c = (match_start, match_end)
|
||||||
|
if match_start == 0 or context == 0:
|
||||||
|
lc = None
|
||||||
|
cpos_list_lbound = match_start
|
||||||
|
else:
|
||||||
|
lc_lbound = max(0, (match_start - context))
|
||||||
|
lc_rbound = match_start - 1
|
||||||
|
lc = (lc_lbound, lc_rbound)
|
||||||
|
cpos_list_lbound = lc_lbound
|
||||||
|
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||||
|
rc = None
|
||||||
|
cpos_list_rbound = match_end
|
||||||
|
else:
|
||||||
|
rc_lbound = match_end + 1
|
||||||
|
rc_rbound = min(
|
||||||
|
(match_end + context),
|
||||||
|
(subcorpus.collection.corpus.size - 1)
|
||||||
|
)
|
||||||
|
rc = (rc_lbound, rc_rbound)
|
||||||
|
cpos_list_rbound = rc_rbound
|
||||||
|
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||||
|
matches.append(match)
|
||||||
|
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||||
|
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||||
|
return {'matches': matches, **lookups}
|
@ -10,7 +10,7 @@ def corpus_follower_permission_required(*permissions):
|
|||||||
def decorated_function(*args, **kwargs):
|
def decorated_function(*args, **kwargs):
|
||||||
corpus_id = kwargs.get('corpus_id')
|
corpus_id = kwargs.get('corpus_id')
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
if not (corpus.user == current_user or current_user.is_administrator):
|
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
|
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
|
||||||
if cfa is None:
|
if cfa is None:
|
||||||
abort(403)
|
abort(403)
|
||||||
@ -26,7 +26,7 @@ def corpus_owner_or_admin_required(f):
|
|||||||
def decorated_function(*args, **kwargs):
|
def decorated_function(*args, **kwargs):
|
||||||
corpus_id = kwargs.get('corpus_id')
|
corpus_id = kwargs.get('corpus_id')
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
if not (corpus.user == current_user or current_user.is_administrator):
|
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
return decorated_function
|
return decorated_function
|
@ -15,7 +15,7 @@ def get_corpus(corpus_hashid):
|
|||||||
if not (
|
if not (
|
||||||
corpus.is_public
|
corpus.is_public
|
||||||
or corpus.user == current_user
|
or corpus.user == current_user
|
||||||
or current_user.is_administrator
|
or current_user.is_administrator()
|
||||||
):
|
):
|
||||||
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
|
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
|
||||||
return {
|
return {
|
||||||
@ -38,7 +38,7 @@ def subscribe_corpus(corpus_hashid):
|
|||||||
if not (
|
if not (
|
||||||
corpus.is_public
|
corpus.is_public
|
||||||
or corpus.user == current_user
|
or corpus.user == current_user
|
||||||
or current_user.is_administrator
|
or current_user.is_administrator()
|
||||||
):
|
):
|
||||||
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
|
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
|
||||||
join_room(f'/corpora/{corpus.hashid}')
|
join_room(f'/corpora/{corpus.hashid}')
|
2
app/corpora/files/__init__.py
Normal file
2
app/corpora/files/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
from .. import bp
|
||||||
|
from . import json_routes, routes
|
@ -1,7 +1,7 @@
|
|||||||
from flask import current_app
|
from flask import abort, current_app
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from app.decorators import content_negotiation
|
|
||||||
from app import db
|
from app import db
|
||||||
|
from app.decorators import content_negotiation
|
||||||
from app.models import CorpusFile
|
from app.models import CorpusFile
|
||||||
from ..decorators import corpus_follower_permission_required
|
from ..decorators import corpus_follower_permission_required
|
||||||
from . import bp
|
from . import bp
|
@ -6,19 +6,25 @@ from flask import (
|
|||||||
send_from_directory,
|
send_from_directory,
|
||||||
url_for
|
url_for
|
||||||
)
|
)
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
|
import os
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import Corpus, CorpusFile, CorpusStatus
|
from app.models import Corpus, CorpusFile, CorpusStatus
|
||||||
from ..decorators import corpus_follower_permission_required
|
from ..decorators import corpus_follower_permission_required
|
||||||
|
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
|
||||||
from . import bp
|
from . import bp
|
||||||
from .forms import CreateCorpusFileForm, UpdateCorpusFileForm
|
from .forms import CreateCorpusFileForm, UpdateCorpusFileForm
|
||||||
|
from .utils import corpus_file_dynamic_list_constructor as corpus_file_dlc
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/files')
|
@bp.route('/<hashid:corpus_id>/files')
|
||||||
|
@register_breadcrumb(bp, '.entity.files', 'Files', endpoint_arguments_constructor=corpus_eac)
|
||||||
def corpus_files(corpus_id):
|
def corpus_files(corpus_id):
|
||||||
return redirect(url_for('.corpus', _anchor='files', corpus_id=corpus_id))
|
return redirect(url_for('.corpus', _anchor='files', corpus_id=corpus_id))
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST'])
|
@bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.entity.files.create', 'Create', endpoint_arguments_constructor=corpus_eac)
|
||||||
@corpus_follower_permission_required('MANAGE_FILES')
|
@corpus_follower_permission_required('MANAGE_FILES')
|
||||||
def create_corpus_file(corpus_id):
|
def create_corpus_file(corpus_id):
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
@ -60,6 +66,7 @@ def create_corpus_file(corpus_id):
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST'])
|
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.entity.files.entity', '', dynamic_list_constructor=corpus_file_dlc)
|
||||||
@corpus_follower_permission_required('MANAGE_FILES')
|
@corpus_follower_permission_required('MANAGE_FILES')
|
||||||
def corpus_file(corpus_id, corpus_file_id):
|
def corpus_file(corpus_id, corpus_file_id):
|
||||||
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||||
@ -85,9 +92,9 @@ def corpus_file(corpus_id, corpus_file_id):
|
|||||||
def download_corpus_file(corpus_id, corpus_file_id):
|
def download_corpus_file(corpus_id, corpus_file_id):
|
||||||
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||||
return send_from_directory(
|
return send_from_directory(
|
||||||
corpus_file.path.parent,
|
os.path.dirname(corpus_file.path),
|
||||||
corpus_file.path.name,
|
os.path.basename(corpus_file.path),
|
||||||
as_attachment=True,
|
as_attachment=True,
|
||||||
download_name=corpus_file.filename,
|
attachment_filename=corpus_file.filename,
|
||||||
mimetype=corpus_file.mimetype
|
mimetype=corpus_file.mimetype
|
||||||
)
|
)
|
15
app/corpora/files/utils.py
Normal file
15
app/corpora/files/utils.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from flask import request, url_for
|
||||||
|
from app.models import CorpusFile
|
||||||
|
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
|
||||||
|
|
||||||
|
|
||||||
|
def corpus_file_dynamic_list_constructor():
|
||||||
|
corpus_id = request.view_args['corpus_id']
|
||||||
|
corpus_file_id = request.view_args['corpus_file_id']
|
||||||
|
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'text': f'{corpus_file.author}: {corpus_file.title} ({corpus_file.publishing_year})',
|
||||||
|
'url': url_for('.corpus_file', corpus_id=corpus_id, corpus_file_id=corpus_file_id)
|
||||||
|
}
|
||||||
|
]
|
@ -58,7 +58,7 @@ def delete_corpus_follower(corpus_id, follower_id):
|
|||||||
current_user.id == follower_id
|
current_user.id == follower_id
|
||||||
or current_user == cfa.corpus.user
|
or current_user == cfa.corpus.user
|
||||||
or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
|
or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
|
||||||
or current_user.is_administrator):
|
or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
if current_user.id == follower_id:
|
if current_user.id == follower_id:
|
||||||
flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')
|
flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')
|
@ -1,4 +1,5 @@
|
|||||||
from flask import abort, flash, redirect, render_template, url_for
|
from flask import abort, flash, redirect, render_template, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from app import db
|
from app import db
|
||||||
from app.models import (
|
from app.models import (
|
||||||
@ -10,14 +11,20 @@ from app.models import (
|
|||||||
from . import bp
|
from . import bp
|
||||||
from .decorators import corpus_follower_permission_required
|
from .decorators import corpus_follower_permission_required
|
||||||
from .forms import CreateCorpusForm
|
from .forms import CreateCorpusForm
|
||||||
|
from .utils import (
|
||||||
|
corpus_endpoint_arguments_constructor as corpus_eac,
|
||||||
|
corpus_dynamic_list_constructor as corpus_dlc
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('')
|
@bp.route('')
|
||||||
|
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">I</i>My Corpora')
|
||||||
def corpora():
|
def corpora():
|
||||||
return redirect(url_for('main.dashboard', _anchor='corpora'))
|
return redirect(url_for('main.dashboard', _anchor='corpora'))
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/create', methods=['GET', 'POST'])
|
@bp.route('/create', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.create', 'Create')
|
||||||
def create_corpus():
|
def create_corpus():
|
||||||
form = CreateCorpusForm()
|
form = CreateCorpusForm()
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
@ -40,6 +47,7 @@ def create_corpus():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>')
|
@bp.route('/<hashid:corpus_id>')
|
||||||
|
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=corpus_dlc)
|
||||||
def corpus(corpus_id):
|
def corpus(corpus_id):
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
cfrs = CorpusFollowerRole.query.all()
|
cfrs = CorpusFollowerRole.query.all()
|
||||||
@ -47,13 +55,13 @@ def corpus(corpus_id):
|
|||||||
users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all()
|
users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all()
|
||||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
|
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
|
||||||
if cfa is None:
|
if cfa is None:
|
||||||
if corpus.user == current_user or current_user.is_administrator:
|
if corpus.user == current_user or current_user.is_administrator():
|
||||||
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
|
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
|
||||||
else:
|
else:
|
||||||
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
|
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
|
||||||
else:
|
else:
|
||||||
cfr = cfa.role
|
cfr = cfa.role
|
||||||
if corpus.user == current_user or current_user.is_administrator:
|
if corpus.user == current_user or current_user.is_administrator():
|
||||||
return render_template(
|
return render_template(
|
||||||
'corpora/corpus.html.j2',
|
'corpora/corpus.html.j2',
|
||||||
title=corpus.title,
|
title=corpus.title,
|
||||||
@ -79,6 +87,7 @@ def corpus(corpus_id):
|
|||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/analysis')
|
@bp.route('/<hashid:corpus_id>/analysis')
|
||||||
@corpus_follower_permission_required('VIEW')
|
@corpus_follower_permission_required('VIEW')
|
||||||
|
@register_breadcrumb(bp, '.entity.analysis', 'Analysis', endpoint_arguments_constructor=corpus_eac)
|
||||||
def analysis(corpus_id):
|
def analysis(corpus_id):
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
return render_template(
|
return render_template(
|
||||||
@ -99,11 +108,13 @@ def follow_corpus(corpus_id, token):
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/import', methods=['GET', 'POST'])
|
@bp.route('/import', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.import', 'Import')
|
||||||
def import_corpus():
|
def import_corpus():
|
||||||
abort(503)
|
abort(503)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:corpus_id>/export')
|
@bp.route('/<hashid:corpus_id>/export')
|
||||||
@corpus_follower_permission_required('VIEW')
|
@corpus_follower_permission_required('VIEW')
|
||||||
|
@register_breadcrumb(bp, '.entity.export', 'Export', endpoint_arguments_constructor=corpus_eac)
|
||||||
def export_corpus(corpus_id):
|
def export_corpus(corpus_id):
|
||||||
abort(503)
|
abort(503)
|
17
app/corpora/utils.py
Normal file
17
app/corpora/utils.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from flask import request, url_for
|
||||||
|
from app.models import Corpus
|
||||||
|
|
||||||
|
|
||||||
|
def corpus_endpoint_arguments_constructor():
|
||||||
|
return {'corpus_id': request.view_args['corpus_id']}
|
||||||
|
|
||||||
|
|
||||||
|
def corpus_dynamic_list_constructor():
|
||||||
|
corpus_id = request.view_args['corpus_id']
|
||||||
|
corpus = Corpus.query.get_or_404(corpus_id)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'text': f'<i class="material-icons left">book</i>{corpus.title}',
|
||||||
|
'url': url_for('.corpus', corpus_id=corpus_id)
|
||||||
|
}
|
||||||
|
]
|
11
app/daemon/__init__.py
Normal file
11
app/daemon/__init__.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
from app import db
|
||||||
|
from flask import Flask
|
||||||
|
from .corpus_utils import check_corpora
|
||||||
|
from .job_utils import check_jobs
|
||||||
|
|
||||||
|
|
||||||
|
def daemon(app: Flask):
|
||||||
|
with app.app_context():
|
||||||
|
check_corpora()
|
||||||
|
check_jobs()
|
||||||
|
db.session.commit()
|
@ -1,16 +1,12 @@
|
|||||||
|
from app import docker_client
|
||||||
|
from app.models import Corpus, CorpusStatus
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
import docker
|
import docker
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from app import db, docker_client, scheduler
|
|
||||||
from app.models import Corpus, CorpusStatus
|
|
||||||
|
|
||||||
|
|
||||||
def handle_corpora():
|
def check_corpora():
|
||||||
with scheduler.app.app_context():
|
|
||||||
_handle_corpora()
|
|
||||||
|
|
||||||
def _handle_corpora():
|
|
||||||
corpora = Corpus.query.all()
|
corpora = Corpus.query.all()
|
||||||
for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
|
for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
|
||||||
_create_build_corpus_service(corpus)
|
_create_build_corpus_service(corpus)
|
||||||
@ -21,14 +17,13 @@ def _handle_corpora():
|
|||||||
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
|
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
|
||||||
corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
|
corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||||
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
|
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
|
||||||
_checkout_cqpserver_container(corpus)
|
_checkout_analysing_corpus_container(corpus)
|
||||||
for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
|
for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
|
||||||
_create_cqpserver_container(corpus)
|
_create_cqpserver_container(corpus)
|
||||||
for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
|
for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
|
||||||
_remove_cqpserver_container(corpus)
|
_remove_cqpserver_container(corpus)
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def _create_build_corpus_service(corpus: Corpus):
|
def _create_build_corpus_service(corpus):
|
||||||
''' # Docker service settings # '''
|
''' # Docker service settings # '''
|
||||||
''' ## Command ## '''
|
''' ## Command ## '''
|
||||||
command = ['bash', '-c']
|
command = ['bash', '-c']
|
||||||
@ -50,10 +45,12 @@ def _create_build_corpus_service(corpus: Corpus):
|
|||||||
''' ## Constraints ## '''
|
''' ## Constraints ## '''
|
||||||
constraints = ['node.role==worker']
|
constraints = ['node.role==worker']
|
||||||
''' ## Image ## '''
|
''' ## Image ## '''
|
||||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1887'
|
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
|
||||||
''' ## Labels ## '''
|
''' ## Labels ## '''
|
||||||
labels = {
|
labels = {
|
||||||
'nopaque.server_name': current_app.config['SERVER_NAME']
|
'origin': current_app.config['SERVER_NAME'],
|
||||||
|
'type': 'corpus.build',
|
||||||
|
'corpus_id': str(corpus.id)
|
||||||
}
|
}
|
||||||
''' ## Mounts ## '''
|
''' ## Mounts ## '''
|
||||||
mounts = []
|
mounts = []
|
||||||
@ -98,7 +95,7 @@ def _create_build_corpus_service(corpus: Corpus):
|
|||||||
return
|
return
|
||||||
corpus.status = CorpusStatus.QUEUED
|
corpus.status = CorpusStatus.QUEUED
|
||||||
|
|
||||||
def _checkout_build_corpus_service(corpus: Corpus):
|
def _checkout_build_corpus_service(corpus):
|
||||||
service_name = f'build-corpus_{corpus.id}'
|
service_name = f'build-corpus_{corpus.id}'
|
||||||
try:
|
try:
|
||||||
service = docker_client.services.get(service_name)
|
service = docker_client.services.get(service_name)
|
||||||
@ -126,7 +123,8 @@ def _checkout_build_corpus_service(corpus: Corpus):
|
|||||||
except docker.errors.DockerException as e:
|
except docker.errors.DockerException as e:
|
||||||
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
|
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
|
||||||
|
|
||||||
def _create_cqpserver_container(corpus: Corpus):
|
def _create_cqpserver_container(corpus):
|
||||||
|
''' # Docker container settings # '''
|
||||||
''' ## Command ## '''
|
''' ## Command ## '''
|
||||||
command = []
|
command = []
|
||||||
command.append(
|
command.append(
|
||||||
@ -141,9 +139,9 @@ def _create_cqpserver_container(corpus: Corpus):
|
|||||||
''' ## Entrypoint ## '''
|
''' ## Entrypoint ## '''
|
||||||
entrypoint = ['bash', '-c']
|
entrypoint = ['bash', '-c']
|
||||||
''' ## Image ## '''
|
''' ## Image ## '''
|
||||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1887'
|
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
|
||||||
''' ## Name ## '''
|
''' ## Name ## '''
|
||||||
name = f'nopaque-cqpserver-{corpus.id}'
|
name = f'cqpserver_{corpus.id}'
|
||||||
''' ## Network ## '''
|
''' ## Network ## '''
|
||||||
network = f'{current_app.config["NOPAQUE_DOCKER_NETWORK_NAME"]}'
|
network = f'{current_app.config["NOPAQUE_DOCKER_NETWORK_NAME"]}'
|
||||||
''' ## Volumes ## '''
|
''' ## Volumes ## '''
|
||||||
@ -200,8 +198,8 @@ def _create_cqpserver_container(corpus: Corpus):
|
|||||||
return
|
return
|
||||||
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
|
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
|
||||||
|
|
||||||
def _checkout_cqpserver_container(corpus: Corpus):
|
def _checkout_analysing_corpus_container(corpus):
|
||||||
container_name = f'nopaque-cqpserver-{corpus.id}'
|
container_name = f'cqpserver_{corpus.id}'
|
||||||
try:
|
try:
|
||||||
docker_client.containers.get(container_name)
|
docker_client.containers.get(container_name)
|
||||||
except docker.errors.NotFound as e:
|
except docker.errors.NotFound as e:
|
||||||
@ -211,8 +209,8 @@ def _checkout_cqpserver_container(corpus: Corpus):
|
|||||||
except docker.errors.DockerException as e:
|
except docker.errors.DockerException as e:
|
||||||
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
|
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
|
||||||
|
|
||||||
def _remove_cqpserver_container(corpus: Corpus):
|
def _remove_cqpserver_container(corpus):
|
||||||
container_name = f'nopaque-cqpserver-{corpus.id}'
|
container_name = f'cqpserver_{corpus.id}'
|
||||||
try:
|
try:
|
||||||
container = docker_client.containers.get(container_name)
|
container = docker_client.containers.get(container_name)
|
||||||
except docker.errors.NotFound:
|
except docker.errors.NotFound:
|
@ -1,11 +1,4 @@
|
|||||||
from datetime import datetime
|
from app import db, docker_client, hashids
|
||||||
from flask import current_app
|
|
||||||
from werkzeug.utils import secure_filename
|
|
||||||
import docker
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
from app import db, docker_client, hashids, scheduler
|
|
||||||
from app.models import (
|
from app.models import (
|
||||||
Job,
|
Job,
|
||||||
JobResult,
|
JobResult,
|
||||||
@ -13,13 +6,16 @@ from app.models import (
|
|||||||
TesseractOCRPipelineModel,
|
TesseractOCRPipelineModel,
|
||||||
SpaCyNLPPipelineModel
|
SpaCyNLPPipelineModel
|
||||||
)
|
)
|
||||||
|
from datetime import datetime
|
||||||
|
from flask import current_app
|
||||||
|
from werkzeug.utils import secure_filename
|
||||||
|
import docker
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
def handle_jobs():
|
def check_jobs():
|
||||||
with scheduler.app.app_context():
|
|
||||||
_handle_jobs()
|
|
||||||
|
|
||||||
def _handle_jobs():
|
|
||||||
jobs = Job.query.all()
|
jobs = Job.query.all()
|
||||||
for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
|
for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
|
||||||
_create_job_service(job)
|
_create_job_service(job)
|
||||||
@ -27,9 +23,8 @@ def _handle_jobs():
|
|||||||
_checkout_job_service(job)
|
_checkout_job_service(job)
|
||||||
for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
|
for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
|
||||||
_remove_job_service(job)
|
_remove_job_service(job)
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def _create_job_service(job: Job):
|
def _create_job_service(job):
|
||||||
''' # Docker service settings # '''
|
''' # Docker service settings # '''
|
||||||
''' ## Service specific settings ## '''
|
''' ## Service specific settings ## '''
|
||||||
if job.service == 'file-setup-pipeline':
|
if job.service == 'file-setup-pipeline':
|
||||||
@ -86,7 +81,9 @@ def _create_job_service(job: Job):
|
|||||||
constraints = ['node.role==worker']
|
constraints = ['node.role==worker']
|
||||||
''' ## Labels ## '''
|
''' ## Labels ## '''
|
||||||
labels = {
|
labels = {
|
||||||
'origin': current_app.config['SERVER_NAME']
|
'origin': current_app.config['SERVER_NAME'],
|
||||||
|
'type': 'job',
|
||||||
|
'job_id': str(job.id)
|
||||||
}
|
}
|
||||||
''' ## Mounts ## '''
|
''' ## Mounts ## '''
|
||||||
mounts = []
|
mounts = []
|
||||||
@ -167,7 +164,7 @@ def _create_job_service(job: Job):
|
|||||||
return
|
return
|
||||||
job.status = JobStatus.QUEUED
|
job.status = JobStatus.QUEUED
|
||||||
|
|
||||||
def _checkout_job_service(job: Job):
|
def _checkout_job_service(job):
|
||||||
service_name = f'job_{job.id}'
|
service_name = f'job_{job.id}'
|
||||||
try:
|
try:
|
||||||
service = docker_client.services.get(service_name)
|
service = docker_client.services.get(service_name)
|
||||||
@ -216,7 +213,7 @@ def _checkout_job_service(job: Job):
|
|||||||
except docker.errors.DockerException as e:
|
except docker.errors.DockerException as e:
|
||||||
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
|
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
|
||||||
|
|
||||||
def _remove_job_service(job: Job):
|
def _remove_job_service(job):
|
||||||
service_name = f'job_{job.id}'
|
service_name = f'job_{job.id}'
|
||||||
try:
|
try:
|
||||||
service = docker_client.services.get(service_name)
|
service = docker_client.services.get(service_name)
|
@ -1,7 +1,8 @@
|
|||||||
from flask import abort, request
|
from flask import abort, current_app, request
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from typing import Optional
|
from threading import Thread
|
||||||
|
from typing import List, Union
|
||||||
from werkzeug.exceptions import NotAcceptable
|
from werkzeug.exceptions import NotAcceptable
|
||||||
from app.models import Permission
|
from app.models import Permission
|
||||||
|
|
||||||
@ -23,21 +24,22 @@ def admin_required(f):
|
|||||||
|
|
||||||
def socketio_login_required(f):
|
def socketio_login_required(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrapper(*args, **kwargs):
|
def decorated_function(*args, **kwargs):
|
||||||
if current_user.is_authenticated:
|
if current_user.is_authenticated:
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
return {'status': 401, 'statusText': 'Unauthorized'}
|
else:
|
||||||
return wrapper
|
return {'code': 401, 'msg': 'Unauthorized'}
|
||||||
|
return decorated_function
|
||||||
|
|
||||||
|
|
||||||
def socketio_permission_required(permission):
|
def socketio_permission_required(permission):
|
||||||
def decorator(f):
|
def decorator(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrapper(*args, **kwargs):
|
def decorated_function(*args, **kwargs):
|
||||||
if not current_user.can(permission):
|
if not current_user.can(permission):
|
||||||
return {'status': 403, 'statusText': 'Forbidden'}
|
return {'code': 403, 'msg': 'Forbidden'}
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
return wrapper
|
return decorated_function
|
||||||
return decorator
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
@ -45,9 +47,27 @@ def socketio_admin_required(f):
|
|||||||
return socketio_permission_required(Permission.ADMINISTRATE)(f)
|
return socketio_permission_required(Permission.ADMINISTRATE)(f)
|
||||||
|
|
||||||
|
|
||||||
|
def background(f):
|
||||||
|
'''
|
||||||
|
' This decorator executes a function in a Thread.
|
||||||
|
' Decorated functions need to be executed within a code block where an
|
||||||
|
' app context exists.
|
||||||
|
'
|
||||||
|
' NOTE: An app object is passed as a keyword argument to the decorated
|
||||||
|
' function.
|
||||||
|
'''
|
||||||
|
@wraps(f)
|
||||||
|
def wrapped(*args, **kwargs):
|
||||||
|
kwargs['app'] = current_app._get_current_object()
|
||||||
|
thread = Thread(target=f, args=args, kwargs=kwargs)
|
||||||
|
thread.start()
|
||||||
|
return thread
|
||||||
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
def content_negotiation(
|
def content_negotiation(
|
||||||
produces: Optional[str | list[str]] = None,
|
produces: Union[str, List[str], None] = None,
|
||||||
consumes: Optional[str | list[str]] = None
|
consumes: Union[str, List[str], None] = None
|
||||||
):
|
):
|
||||||
def decorator(f):
|
def decorator(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
|
31
app/email.py
31
app/email.py
@ -1,32 +1,25 @@
|
|||||||
from flask import current_app, Flask, render_template
|
from flask import current_app, render_template
|
||||||
from flask_mail import Message
|
from flask_mail import Message
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from app import mail
|
from app import mail
|
||||||
|
|
||||||
|
|
||||||
def create_message(
|
def create_message(recipient, subject, template, **kwargs):
|
||||||
recipient: str,
|
subject_prefix: str = current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX']
|
||||||
subject: str,
|
msg: Message = Message(
|
||||||
template: str,
|
body=render_template(f'{template}.txt.j2', **kwargs),
|
||||||
**context
|
html=render_template(f'{template}.html.j2', **kwargs),
|
||||||
) -> Message:
|
|
||||||
message = Message(
|
|
||||||
body=render_template(f'{template}.txt.j2', **context),
|
|
||||||
html=render_template(f'{template}.html.j2', **context),
|
|
||||||
recipients=[recipient],
|
recipients=[recipient],
|
||||||
subject=f'[nopaque] {subject}'
|
subject=f'{subject_prefix} {subject}'
|
||||||
)
|
)
|
||||||
return message
|
return msg
|
||||||
|
|
||||||
|
|
||||||
def send(message: Message) -> Thread:
|
def send(msg, *args, **kwargs):
|
||||||
def _send(app: Flask, message: Message):
|
def _send(app, msg):
|
||||||
with app.app_context():
|
with app.app_context():
|
||||||
mail.send(message)
|
mail.send(msg)
|
||||||
|
|
||||||
thread = Thread(
|
thread = Thread(target=_send, args=[current_app._get_current_object(), msg])
|
||||||
target=_send,
|
|
||||||
args=[current_app._get_current_object(), message]
|
|
||||||
)
|
|
||||||
thread.start()
|
thread.start()
|
||||||
return thread
|
return thread
|
||||||
|
@ -1,2 +0,0 @@
|
|||||||
from .types import ContainerColumn
|
|
||||||
from .types import IntEnumColumn
|
|
@ -1,42 +0,0 @@
|
|||||||
import json
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
class ContainerColumn(db.TypeDecorator):
|
|
||||||
impl = db.String
|
|
||||||
|
|
||||||
def __init__(self, container_type, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.container_type = container_type
|
|
||||||
|
|
||||||
def process_bind_param(self, value, dialect):
|
|
||||||
if isinstance(value, self.container_type):
|
|
||||||
return json.dumps(value)
|
|
||||||
elif isinstance(value, str) and isinstance(json.loads(value), self.container_type):
|
|
||||||
return value
|
|
||||||
else:
|
|
||||||
return TypeError()
|
|
||||||
|
|
||||||
def process_result_value(self, value, dialect):
|
|
||||||
return json.loads(value)
|
|
||||||
|
|
||||||
|
|
||||||
class IntEnumColumn(db.TypeDecorator):
|
|
||||||
impl = db.Integer
|
|
||||||
|
|
||||||
def __init__(self, enum_type, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.enum_type = enum_type
|
|
||||||
|
|
||||||
def process_bind_param(self, value, dialect):
|
|
||||||
if isinstance(value, self.enum_type) and isinstance(value.value, int):
|
|
||||||
return value.value
|
|
||||||
elif isinstance(value, int):
|
|
||||||
return self.enum_type(value).value
|
|
||||||
elif isinstance(value, str):
|
|
||||||
return self.enum_type[value].value
|
|
||||||
else:
|
|
||||||
return TypeError()
|
|
||||||
|
|
||||||
def process_result_value(self, value, dialect):
|
|
||||||
return self.enum_type(value)
|
|
@ -1,2 +1,18 @@
|
|||||||
from .handle_corpora import handle_corpora
|
from flask import Blueprint
|
||||||
from .handle_jobs import handle_jobs
|
from flask_login import login_required
|
||||||
|
|
||||||
|
|
||||||
|
bp = Blueprint('jobs', __name__)
|
||||||
|
|
||||||
|
|
||||||
|
@bp.before_request
|
||||||
|
@login_required
|
||||||
|
def before_request():
|
||||||
|
'''
|
||||||
|
Ensures that the routes in this package can only be visited by users that
|
||||||
|
are logged in.
|
||||||
|
'''
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
from . import routes, json_routes
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
from flask import abort, current_app
|
from flask import abort, current_app
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
import os
|
||||||
from app import db
|
from app import db
|
||||||
from app.decorators import admin_required, content_negotiation
|
from app.decorators import admin_required, content_negotiation
|
||||||
from app.models import Job, JobStatus
|
from app.models import Job, JobStatus
|
||||||
@ -17,7 +18,7 @@ def delete_job(job_id):
|
|||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
job = Job.query.get_or_404(job_id)
|
job = Job.query.get_or_404(job_id)
|
||||||
if not (job.user == current_user or current_user.is_administrator):
|
if not (job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
thread = Thread(
|
thread = Thread(
|
||||||
target=_delete_job,
|
target=_delete_job,
|
||||||
@ -38,7 +39,7 @@ def job_log(job_id):
|
|||||||
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
||||||
response = {'errors': {'message': 'Job status is not completed or failed'}}
|
response = {'errors': {'message': 'Job status is not completed or failed'}}
|
||||||
return response, 409
|
return response, 409
|
||||||
with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
|
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
|
||||||
log = log_file.read()
|
log = log_file.read()
|
||||||
response_data = {
|
response_data = {
|
||||||
'jobLog': log
|
'jobLog': log
|
||||||
@ -56,7 +57,7 @@ def restart_job(job_id):
|
|||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
job = Job.query.get_or_404(job_id)
|
job = Job.query.get_or_404(job_id)
|
||||||
if not (job.user == current_user or current_user.is_administrator):
|
if not (job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
if job.status == JobStatus.FAILED:
|
if job.status == JobStatus.FAILED:
|
||||||
response = {'errors': {'message': 'Job status is not "failed"'}}
|
response = {'errors': {'message': 'Job status is not "failed"'}}
|
@ -5,20 +5,25 @@ from flask import (
|
|||||||
send_from_directory,
|
send_from_directory,
|
||||||
url_for
|
url_for
|
||||||
)
|
)
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
|
import os
|
||||||
from app.models import Job, JobInput, JobResult
|
from app.models import Job, JobInput, JobResult
|
||||||
from . import bp
|
from . import bp
|
||||||
|
from .utils import job_dynamic_list_constructor as job_dlc
|
||||||
|
|
||||||
|
|
||||||
@bp.route('')
|
@bp.route('')
|
||||||
def jobs():
|
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">J</i>My Jobs')
|
||||||
|
def corpora():
|
||||||
return redirect(url_for('main.dashboard', _anchor='jobs'))
|
return redirect(url_for('main.dashboard', _anchor='jobs'))
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/<hashid:job_id>')
|
@bp.route('/<hashid:job_id>')
|
||||||
|
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=job_dlc)
|
||||||
def job(job_id):
|
def job(job_id):
|
||||||
job = Job.query.get_or_404(job_id)
|
job = Job.query.get_or_404(job_id)
|
||||||
if not (job.user == current_user or current_user.is_administrator):
|
if not (job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return render_template(
|
return render_template(
|
||||||
'jobs/job.html.j2',
|
'jobs/job.html.j2',
|
||||||
@ -30,13 +35,13 @@ def job(job_id):
|
|||||||
@bp.route('/<hashid:job_id>/inputs/<hashid:job_input_id>/download')
|
@bp.route('/<hashid:job_id>/inputs/<hashid:job_input_id>/download')
|
||||||
def download_job_input(job_id, job_input_id):
|
def download_job_input(job_id, job_input_id):
|
||||||
job_input = JobInput.query.filter_by(job_id=job_id, id=job_input_id).first_or_404()
|
job_input = JobInput.query.filter_by(job_id=job_id, id=job_input_id).first_or_404()
|
||||||
if not (job_input.job.user == current_user or current_user.is_administrator):
|
if not (job_input.job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return send_from_directory(
|
return send_from_directory(
|
||||||
job_input.path.parent,
|
os.path.dirname(job_input.path),
|
||||||
job_input.path.name,
|
os.path.basename(job_input.path),
|
||||||
as_attachment=True,
|
as_attachment=True,
|
||||||
download_name=job_input.filename,
|
attachment_filename=job_input.filename,
|
||||||
mimetype=job_input.mimetype
|
mimetype=job_input.mimetype
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -44,12 +49,12 @@ def download_job_input(job_id, job_input_id):
|
|||||||
@bp.route('/<hashid:job_id>/results/<hashid:job_result_id>/download')
|
@bp.route('/<hashid:job_id>/results/<hashid:job_result_id>/download')
|
||||||
def download_job_result(job_id, job_result_id):
|
def download_job_result(job_id, job_result_id):
|
||||||
job_result = JobResult.query.filter_by(job_id=job_id, id=job_result_id).first_or_404()
|
job_result = JobResult.query.filter_by(job_id=job_id, id=job_result_id).first_or_404()
|
||||||
if not (job_result.job.user == current_user or current_user.is_administrator):
|
if not (job_result.job.user == current_user or current_user.is_administrator()):
|
||||||
abort(403)
|
abort(403)
|
||||||
return send_from_directory(
|
return send_from_directory(
|
||||||
job_result.path.parent,
|
os.path.dirname(job_result.path),
|
||||||
job_result.path.name,
|
os.path.basename(job_result.path),
|
||||||
as_attachment=True,
|
as_attachment=True,
|
||||||
download_name=job_result.filename,
|
attachment_filename=job_result.filename,
|
||||||
mimetype=job_result.mimetype
|
mimetype=job_result.mimetype
|
||||||
)
|
)
|
13
app/jobs/utils.py
Normal file
13
app/jobs/utils.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from flask import request, url_for
|
||||||
|
from app.models import Job
|
||||||
|
|
||||||
|
|
||||||
|
def job_dynamic_list_constructor():
|
||||||
|
job_id = request.view_args['job_id']
|
||||||
|
job = Job.query.get_or_404(job_id)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'text': f'<i class="nopaque-icons left service-icons" data-service="{job.service}"></i>{job.title}',
|
||||||
|
'url': url_for('.job', job_id=job_id)
|
||||||
|
}
|
||||||
|
]
|
@ -1,9 +1,7 @@
|
|||||||
from flask import current_app
|
from flask import current_app
|
||||||
from flask_migrate import upgrade
|
from flask_migrate import upgrade
|
||||||
from pathlib import Path
|
import os
|
||||||
from app import db
|
|
||||||
from app.models import (
|
from app.models import (
|
||||||
Corpus,
|
|
||||||
CorpusFollowerRole,
|
CorpusFollowerRole,
|
||||||
Role,
|
Role,
|
||||||
SpaCyNLPPipelineModel,
|
SpaCyNLPPipelineModel,
|
||||||
@ -16,22 +14,25 @@ from . import bp
|
|||||||
@bp.cli.command('deploy')
|
@bp.cli.command('deploy')
|
||||||
def deploy():
|
def deploy():
|
||||||
''' Run deployment tasks. '''
|
''' Run deployment tasks. '''
|
||||||
|
# Make default directories
|
||||||
print('Make default directories')
|
print('Make default directories')
|
||||||
base_dir = current_app.config['NOPAQUE_DATA_DIR']
|
base_dir = current_app.config['NOPAQUE_DATA_DIR']
|
||||||
default_dirs: list[Path] = [
|
default_dirs = [
|
||||||
base_dir / 'tmp',
|
os.path.join(base_dir, 'tmp'),
|
||||||
base_dir / 'users'
|
os.path.join(base_dir, 'users')
|
||||||
]
|
]
|
||||||
for default_dir in default_dirs:
|
for dir in default_dirs:
|
||||||
if not default_dir.exists():
|
if os.path.exists(dir):
|
||||||
default_dir.mkdir()
|
if not os.path.isdir(dir):
|
||||||
if not default_dir.is_dir():
|
raise NotADirectoryError(f'{dir} is not a directory')
|
||||||
raise NotADirectoryError(f'{default_dir} is not a directory')
|
else:
|
||||||
|
os.mkdir(dir)
|
||||||
|
|
||||||
|
# migrate database to latest revision
|
||||||
print('Migrate database to latest revision')
|
print('Migrate database to latest revision')
|
||||||
upgrade()
|
upgrade()
|
||||||
|
|
||||||
|
# Insert/Update default database values
|
||||||
print('Insert/Update default Roles')
|
print('Insert/Update default Roles')
|
||||||
Role.insert_defaults()
|
Role.insert_defaults()
|
||||||
print('Insert/Update default Users')
|
print('Insert/Update default Users')
|
||||||
@ -43,9 +44,4 @@ def deploy():
|
|||||||
print('Insert/Update default TesseractOCRPipelineModels')
|
print('Insert/Update default TesseractOCRPipelineModels')
|
||||||
TesseractOCRPipelineModel.insert_defaults()
|
TesseractOCRPipelineModel.insert_defaults()
|
||||||
|
|
||||||
print('Stop running analysis sessions')
|
|
||||||
for corpus in Corpus.query.all():
|
|
||||||
corpus.num_analysis_sessions = 0
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
# TODO: Implement checks for if the nopaque network exists
|
# TODO: Implement checks for if the nopaque network exists
|
@ -1,11 +1,14 @@
|
|||||||
from flask import flash, redirect, render_template, url_for
|
from flask import flash, redirect, render_template, url_for
|
||||||
|
from flask_breadcrumbs import register_breadcrumb
|
||||||
from flask_login import current_user, login_required, login_user
|
from flask_login import current_user, login_required, login_user
|
||||||
from app.blueprints.auth.forms import LoginForm
|
from app.auth.forms import LoginForm
|
||||||
from app.models import Corpus, User
|
from app.models import Corpus, User
|
||||||
|
from sqlalchemy import or_
|
||||||
from . import bp
|
from . import bp
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/', methods=['GET', 'POST'])
|
@bp.route('/', methods=['GET', 'POST'])
|
||||||
|
@register_breadcrumb(bp, '.', '<i class="material-icons">home</i>')
|
||||||
def index():
|
def index():
|
||||||
form = LoginForm()
|
form = LoginForm()
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
@ -24,6 +27,7 @@ def index():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/faq')
|
@bp.route('/faq')
|
||||||
|
@register_breadcrumb(bp, '.faq', 'Frequently Asked Questions')
|
||||||
def faq():
|
def faq():
|
||||||
return render_template(
|
return render_template(
|
||||||
'main/faq.html.j2',
|
'main/faq.html.j2',
|
||||||
@ -32,6 +36,7 @@ def faq():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/dashboard')
|
@bp.route('/dashboard')
|
||||||
|
@register_breadcrumb(bp, '.dashboard', '<i class="material-icons left">dashboard</i>Dashboard')
|
||||||
@login_required
|
@login_required
|
||||||
def dashboard():
|
def dashboard():
|
||||||
return render_template(
|
return render_template(
|
||||||
@ -40,15 +45,8 @@ def dashboard():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/manual')
|
|
||||||
def manual():
|
|
||||||
return render_template(
|
|
||||||
'main/manual.html.j2',
|
|
||||||
title='Manual'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/news')
|
@bp.route('/news')
|
||||||
|
@register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News')
|
||||||
def news():
|
def news():
|
||||||
return render_template(
|
return render_template(
|
||||||
'main/news.html.j2',
|
'main/news.html.j2',
|
||||||
@ -57,6 +55,7 @@ def news():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/privacy_policy')
|
@bp.route('/privacy_policy')
|
||||||
|
@register_breadcrumb(bp, '.privacy_policy', 'Private statement (GDPR)')
|
||||||
def privacy_policy():
|
def privacy_policy():
|
||||||
return render_template(
|
return render_template(
|
||||||
'main/privacy_policy.html.j2',
|
'main/privacy_policy.html.j2',
|
||||||
@ -65,6 +64,7 @@ def privacy_policy():
|
|||||||
|
|
||||||
|
|
||||||
@bp.route('/terms_of_use')
|
@bp.route('/terms_of_use')
|
||||||
|
@register_breadcrumb(bp, '.terms_of_use', 'Terms of Use')
|
||||||
def terms_of_use():
|
def terms_of_use():
|
||||||
return render_template(
|
return render_template(
|
||||||
'main/terms_of_use.html.j2',
|
'main/terms_of_use.html.j2',
|
||||||
@ -72,14 +72,17 @@ def terms_of_use():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/social')
|
@bp.route('/social-area')
|
||||||
|
@register_breadcrumb(bp, '.social_area', '<i class="material-icons left">group</i>Social Area')
|
||||||
@login_required
|
@login_required
|
||||||
def social():
|
def social_area():
|
||||||
|
print('test')
|
||||||
corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
|
corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
|
||||||
|
print(corpora)
|
||||||
users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
|
users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
|
||||||
return render_template(
|
return render_template(
|
||||||
'main/social.html.j2',
|
'main/social_area.html.j2',
|
||||||
title='Social',
|
title='Social Area',
|
||||||
corpora=corpora,
|
corpora=corpora,
|
||||||
users=users
|
users=users
|
||||||
)
|
)
|
1819
app/models.py
Normal file
1819
app/models.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,14 +0,0 @@
|
|||||||
from .anonymous_user import *
|
|
||||||
from .avatar import *
|
|
||||||
from .corpus_file import *
|
|
||||||
from .corpus_follower_association import *
|
|
||||||
from .corpus_follower_role import *
|
|
||||||
from .corpus import *
|
|
||||||
from .job_input import *
|
|
||||||
from .job_result import *
|
|
||||||
from .job import *
|
|
||||||
from .role import *
|
|
||||||
from .spacy_nlp_pipeline_model import *
|
|
||||||
from .tesseract_ocr_pipeline_model import *
|
|
||||||
from .token import *
|
|
||||||
from .user import *
|
|
@ -1,10 +0,0 @@
|
|||||||
from flask_login import AnonymousUserMixin
|
|
||||||
|
|
||||||
|
|
||||||
class AnonymousUser(AnonymousUserMixin):
|
|
||||||
def can(self, permissions):
|
|
||||||
return False
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_administrator(self):
|
|
||||||
return False
|
|
@ -1,40 +0,0 @@
|
|||||||
from flask import current_app
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from pathlib import Path
|
|
||||||
from app import db
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
|
|
||||||
|
|
||||||
class Avatar(HashidMixin, FileMixin, db.Model):
|
|
||||||
__tablename__ = 'avatars'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Relationships
|
|
||||||
user = db.relationship('User', back_populates='avatar')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.user.path / 'avatar'
|
|
||||||
# return os.path.join(self.user.path, 'avatar')
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
try:
|
|
||||||
self.path.unlink(missing_ok=True)
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
raise
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
**self.file_mixin_to_json_serializeable()
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,199 +0,0 @@
|
|||||||
from datetime import datetime
|
|
||||||
from enum import IntEnum
|
|
||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from sqlalchemy.ext.associationproxy import association_proxy
|
|
||||||
from pathlib import Path
|
|
||||||
import shutil
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
from app import db
|
|
||||||
from app.converters.vrt import normalize_vrt_file
|
|
||||||
from app.extensions.nopaque_sqlalchemy_extras import IntEnumColumn
|
|
||||||
from .corpus_follower_association import CorpusFollowerAssociation
|
|
||||||
|
|
||||||
|
|
||||||
class CorpusStatus(IntEnum):
|
|
||||||
UNPREPARED = 1
|
|
||||||
SUBMITTED = 2
|
|
||||||
QUEUED = 3
|
|
||||||
BUILDING = 4
|
|
||||||
BUILT = 5
|
|
||||||
FAILED = 6
|
|
||||||
STARTING_ANALYSIS_SESSION = 7
|
|
||||||
RUNNING_ANALYSIS_SESSION = 8
|
|
||||||
CANCELING_ANALYSIS_SESSION = 9
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get(corpus_status: 'CorpusStatus | int | str') -> 'CorpusStatus':
|
|
||||||
if isinstance(corpus_status, CorpusStatus):
|
|
||||||
return corpus_status
|
|
||||||
if isinstance(corpus_status, int):
|
|
||||||
return CorpusStatus(corpus_status)
|
|
||||||
if isinstance(corpus_status, str):
|
|
||||||
return CorpusStatus[corpus_status]
|
|
||||||
raise TypeError('corpus_status must be CorpusStatus, int, or str')
|
|
||||||
|
|
||||||
|
|
||||||
class Corpus(HashidMixin, db.Model):
|
|
||||||
'''
|
|
||||||
Class to define a corpus.
|
|
||||||
'''
|
|
||||||
__tablename__ = 'corpora'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Fields
|
|
||||||
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
status = db.Column(
|
|
||||||
IntEnumColumn(CorpusStatus),
|
|
||||||
default=CorpusStatus.UNPREPARED
|
|
||||||
)
|
|
||||||
title = db.Column(db.String(32))
|
|
||||||
num_analysis_sessions = db.Column(db.Integer, default=0)
|
|
||||||
num_tokens = db.Column(db.Integer, default=0)
|
|
||||||
is_public = db.Column(db.Boolean, default=False)
|
|
||||||
# Relationships
|
|
||||||
files = db.relationship(
|
|
||||||
'CorpusFile',
|
|
||||||
back_populates='corpus',
|
|
||||||
lazy='dynamic',
|
|
||||||
cascade='all, delete-orphan'
|
|
||||||
)
|
|
||||||
corpus_follower_associations = db.relationship(
|
|
||||||
'CorpusFollowerAssociation',
|
|
||||||
back_populates='corpus',
|
|
||||||
cascade='all, delete-orphan'
|
|
||||||
)
|
|
||||||
followers = association_proxy(
|
|
||||||
'corpus_follower_associations',
|
|
||||||
'follower',
|
|
||||||
creator=lambda u: CorpusFollowerAssociation(follower=u)
|
|
||||||
)
|
|
||||||
user = db.relationship('User', back_populates='corpora')
|
|
||||||
# "static" attributes
|
|
||||||
max_num_tokens = 2_147_483_647
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<Corpus {self.title}>'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def analysis_url(self):
|
|
||||||
return url_for('corpora.analysis', corpus_id=self.id)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.user.jsonpatch_path}/corpora/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.user.path / 'corpora' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for('corpora.corpus', corpus_id=self.id)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.user.hashid
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def create(**kwargs):
|
|
||||||
corpus = Corpus(**kwargs)
|
|
||||||
db.session.add(corpus)
|
|
||||||
db.session.flush(objects=[corpus])
|
|
||||||
db.session.refresh(corpus)
|
|
||||||
corpus_files_dir = corpus.path / 'files'
|
|
||||||
corpus_cwb_dir = corpus.path / 'cwb'
|
|
||||||
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
|
|
||||||
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
|
|
||||||
try:
|
|
||||||
corpus.path.mkdir()
|
|
||||||
corpus_files_dir.mkdir()
|
|
||||||
corpus_cwb_dir.mkdir()
|
|
||||||
corpus_cwb_data_dir.mkdir()
|
|
||||||
corpus_cwb_registry_dir.mkdir()
|
|
||||||
except OSError as e:
|
|
||||||
# TODO: Potential leftover cleanup
|
|
||||||
current_app.logger.error(e)
|
|
||||||
db.session.rollback()
|
|
||||||
raise
|
|
||||||
return corpus
|
|
||||||
|
|
||||||
def build(self):
|
|
||||||
corpus_cwb_dir = self.path / 'cwb'
|
|
||||||
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
|
|
||||||
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
|
|
||||||
try:
|
|
||||||
shutil.rmtree(corpus_cwb_dir, ignore_errors=True)
|
|
||||||
corpus_cwb_dir.mkdir()
|
|
||||||
corpus_cwb_data_dir.mkdir()
|
|
||||||
corpus_cwb_registry_dir.mkdir()
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
self.status = CorpusStatus.FAILED
|
|
||||||
raise
|
|
||||||
corpus_element = ET.fromstring('<corpus>\n</corpus>')
|
|
||||||
for corpus_file in self.files:
|
|
||||||
normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt'
|
|
||||||
try:
|
|
||||||
normalize_vrt_file(corpus_file.path, normalized_vrt_path)
|
|
||||||
except:
|
|
||||||
self.status = CorpusStatus.FAILED
|
|
||||||
return
|
|
||||||
element_tree = ET.parse(normalized_vrt_path)
|
|
||||||
text_element = element_tree.getroot()
|
|
||||||
text_element.set('author', corpus_file.author)
|
|
||||||
text_element.set('title', corpus_file.title)
|
|
||||||
text_element.set(
|
|
||||||
'publishing_year',
|
|
||||||
f'{corpus_file.publishing_year}'
|
|
||||||
)
|
|
||||||
text_element.set('address', corpus_file.address or 'NULL')
|
|
||||||
text_element.set('booktitle', corpus_file.booktitle or 'NULL')
|
|
||||||
text_element.set('chapter', corpus_file.chapter or 'NULL')
|
|
||||||
text_element.set('editor', corpus_file.editor or 'NULL')
|
|
||||||
text_element.set('institution', corpus_file.institution or 'NULL')
|
|
||||||
text_element.set('journal', corpus_file.journal or 'NULL')
|
|
||||||
text_element.set('pages', f'{corpus_file.pages}' or 'NULL')
|
|
||||||
text_element.set('publisher', corpus_file.publisher or 'NULL')
|
|
||||||
text_element.set('school', corpus_file.school or 'NULL')
|
|
||||||
text_element.tail = '\n'
|
|
||||||
# corpus_element.insert(1, text_element)
|
|
||||||
corpus_element.append(text_element)
|
|
||||||
ET.ElementTree(corpus_element).write(
|
|
||||||
corpus_cwb_dir / 'corpus.vrt',
|
|
||||||
encoding='utf-8'
|
|
||||||
)
|
|
||||||
self.status = CorpusStatus.SUBMITTED
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
shutil.rmtree(self.path, ignore_errors=True)
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'creation_date': f'{self.creation_date.isoformat()}Z',
|
|
||||||
'description': self.description,
|
|
||||||
'max_num_tokens': self.max_num_tokens,
|
|
||||||
'num_analysis_sessions': self.num_analysis_sessions,
|
|
||||||
'num_tokens': self.num_tokens,
|
|
||||||
'status': self.status.name,
|
|
||||||
'title': self.title,
|
|
||||||
'is_public': self.is_public
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
json_serializeable['corpus_follower_associations'] = {
|
|
||||||
x.hashid: x.to_json_serializeable()
|
|
||||||
for x in self.corpus_follower_associations
|
|
||||||
}
|
|
||||||
json_serializeable['files'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.files
|
|
||||||
}
|
|
||||||
return json_serializeable
|
|
@ -1,102 +0,0 @@
|
|||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from pathlib import Path
|
|
||||||
from app import db
|
|
||||||
from .corpus import CorpusStatus
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
|
|
||||||
|
|
||||||
class CorpusFile(FileMixin, HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'corpus_files'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
|
|
||||||
# Fields
|
|
||||||
author = db.Column(db.String(255))
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
publishing_year = db.Column(db.Integer)
|
|
||||||
title = db.Column(db.String(255))
|
|
||||||
address = db.Column(db.String(255))
|
|
||||||
booktitle = db.Column(db.String(255))
|
|
||||||
chapter = db.Column(db.String(255))
|
|
||||||
editor = db.Column(db.String(255))
|
|
||||||
institution = db.Column(db.String(255))
|
|
||||||
journal = db.Column(db.String(255))
|
|
||||||
pages = db.Column(db.String(255))
|
|
||||||
publisher = db.Column(db.String(255))
|
|
||||||
school = db.Column(db.String(255))
|
|
||||||
# Relationships
|
|
||||||
corpus = db.relationship(
|
|
||||||
'Corpus',
|
|
||||||
back_populates='files'
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def download_url(self):
|
|
||||||
return url_for(
|
|
||||||
'corpora.download_corpus_file',
|
|
||||||
corpus_id=self.corpus_id,
|
|
||||||
corpus_file_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.corpus.jsonpatch_path}/files/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.corpus.path / 'files' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for(
|
|
||||||
'corpora.corpus_file',
|
|
||||||
corpus_id=self.corpus_id,
|
|
||||||
corpus_file_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.corpus.user.hashid
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_id(self):
|
|
||||||
return self.corpus.user_id
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
try:
|
|
||||||
self.path.unlink(missing_ok=True)
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
raise
|
|
||||||
db.session.delete(self)
|
|
||||||
self.corpus.status = CorpusStatus.UNPREPARED
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'address': self.address,
|
|
||||||
'author': self.author,
|
|
||||||
'description': self.description,
|
|
||||||
'booktitle': self.booktitle,
|
|
||||||
'chapter': self.chapter,
|
|
||||||
'editor': self.editor,
|
|
||||||
'institution': self.institution,
|
|
||||||
'journal': self.journal,
|
|
||||||
'pages': self.pages,
|
|
||||||
'publisher': self.publisher,
|
|
||||||
'publishing_year': self.publishing_year,
|
|
||||||
'school': self.school,
|
|
||||||
'title': self.title,
|
|
||||||
**self.file_mixin_to_json_serializeable(
|
|
||||||
backrefs=backrefs,
|
|
||||||
relationships=relationships
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['corpus'] = \
|
|
||||||
self.corpus.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,47 +0,0 @@
|
|||||||
from flask_hashids import HashidMixin
|
|
||||||
from app import db
|
|
||||||
from .corpus_follower_role import CorpusFollowerRole
|
|
||||||
|
|
||||||
|
|
||||||
class CorpusFollowerAssociation(HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'corpus_follower_associations'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
|
|
||||||
follower_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
role_id = db.Column(db.Integer, db.ForeignKey('corpus_follower_roles.id'))
|
|
||||||
# Relationships
|
|
||||||
corpus = db.relationship(
|
|
||||||
'Corpus',
|
|
||||||
back_populates='corpus_follower_associations'
|
|
||||||
)
|
|
||||||
follower = db.relationship(
|
|
||||||
'User',
|
|
||||||
back_populates='corpus_follower_associations'
|
|
||||||
)
|
|
||||||
role = db.relationship(
|
|
||||||
'CorpusFollowerRole',
|
|
||||||
back_populates='corpus_follower_associations'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
if 'role' not in kwargs:
|
|
||||||
kwargs['role'] = CorpusFollowerRole.query.filter_by(default=True).first()
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<CorpusFollowerAssociation {self.follower.__repr__()} ~ {self.role.__repr__()} ~ {self.corpus.__repr__()}>'
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'corpus': self.corpus.to_json_serializeable(backrefs=True),
|
|
||||||
'follower': self.follower.to_json_serializeable(),
|
|
||||||
'role': self.role.to_json_serializeable()
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
pass
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,106 +0,0 @@
|
|||||||
from flask_hashids import HashidMixin
|
|
||||||
from enum import IntEnum
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
class CorpusFollowerPermission(IntEnum):
|
|
||||||
VIEW = 1
|
|
||||||
MANAGE_FILES = 2
|
|
||||||
MANAGE_FOLLOWERS = 4
|
|
||||||
MANAGE_CORPUS = 8
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get(corpus_follower_permission: 'CorpusFollowerPermission | int | str') -> 'CorpusFollowerPermission':
|
|
||||||
if isinstance(corpus_follower_permission, CorpusFollowerPermission):
|
|
||||||
return corpus_follower_permission
|
|
||||||
if isinstance(corpus_follower_permission, int):
|
|
||||||
return CorpusFollowerPermission(corpus_follower_permission)
|
|
||||||
if isinstance(corpus_follower_permission, str):
|
|
||||||
return CorpusFollowerPermission[corpus_follower_permission]
|
|
||||||
raise TypeError('corpus_follower_permission must be CorpusFollowerPermission, int, or str')
|
|
||||||
|
|
||||||
|
|
||||||
class CorpusFollowerRole(HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'corpus_follower_roles'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Fields
|
|
||||||
name = db.Column(db.String(64), unique=True)
|
|
||||||
default = db.Column(db.Boolean, default=False, index=True)
|
|
||||||
permissions = db.Column(db.Integer, default=0)
|
|
||||||
# Relationships
|
|
||||||
corpus_follower_associations = db.relationship(
|
|
||||||
'CorpusFollowerAssociation',
|
|
||||||
back_populates='role'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<CorpusFollowerRole {self.name}>'
|
|
||||||
|
|
||||||
def has_permission(self, permission: CorpusFollowerPermission | int | str):
|
|
||||||
perm = CorpusFollowerPermission.get(permission)
|
|
||||||
return self.permissions & perm.value == perm.value
|
|
||||||
|
|
||||||
def add_permission(self, permission: CorpusFollowerPermission | int | str):
|
|
||||||
perm = CorpusFollowerPermission.get(permission)
|
|
||||||
if not self.has_permission(perm):
|
|
||||||
self.permissions += perm.value
|
|
||||||
|
|
||||||
def remove_permission(self, permission: CorpusFollowerPermission | int | str):
|
|
||||||
perm = CorpusFollowerPermission.get(permission)
|
|
||||||
if self.has_permission(perm):
|
|
||||||
self.permissions -= perm.value
|
|
||||||
|
|
||||||
def reset_permissions(self):
|
|
||||||
self.permissions = 0
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'default': self.default,
|
|
||||||
'name': self.name,
|
|
||||||
'permissions': [
|
|
||||||
x.name
|
|
||||||
for x in CorpusFollowerPermission
|
|
||||||
if self.has_permission(x)
|
|
||||||
]
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
pass
|
|
||||||
if relationships:
|
|
||||||
json_serializeable['corpus_follower_association'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.corpus_follower_association
|
|
||||||
}
|
|
||||||
return json_serializeable
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def insert_defaults():
|
|
||||||
roles = {
|
|
||||||
'Anonymous': [],
|
|
||||||
'Viewer': [
|
|
||||||
CorpusFollowerPermission.VIEW
|
|
||||||
],
|
|
||||||
'Contributor': [
|
|
||||||
CorpusFollowerPermission.VIEW,
|
|
||||||
CorpusFollowerPermission.MANAGE_FILES
|
|
||||||
],
|
|
||||||
'Administrator': [
|
|
||||||
CorpusFollowerPermission.VIEW,
|
|
||||||
CorpusFollowerPermission.MANAGE_FILES,
|
|
||||||
CorpusFollowerPermission.MANAGE_FOLLOWERS,
|
|
||||||
CorpusFollowerPermission.MANAGE_CORPUS
|
|
||||||
|
|
||||||
]
|
|
||||||
}
|
|
||||||
default_role_name = 'Viewer'
|
|
||||||
for role_name, permissions in roles.items():
|
|
||||||
role = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
|
||||||
if role is None:
|
|
||||||
role = CorpusFollowerRole(name=role_name)
|
|
||||||
role.reset_permissions()
|
|
||||||
for permission in permissions:
|
|
||||||
role.add_permission(permission)
|
|
||||||
role.default = role.name == default_role_name
|
|
||||||
db.session.add(role)
|
|
||||||
db.session.commit()
|
|
@ -1,138 +0,0 @@
|
|||||||
from datetime import datetime
|
|
||||||
from enum import Enum
|
|
||||||
from app import db, mail, socketio
|
|
||||||
from app.email import create_message
|
|
||||||
from .corpus_file import CorpusFile
|
|
||||||
from .corpus_follower_association import CorpusFollowerAssociation
|
|
||||||
from .corpus import Corpus
|
|
||||||
from .job_input import JobInput
|
|
||||||
from .job_result import JobResult
|
|
||||||
from .job import Job, JobStatus
|
|
||||||
from .spacy_nlp_pipeline_model import SpaCyNLPPipelineModel
|
|
||||||
from .tesseract_ocr_pipeline_model import TesseractOCRPipelineModel
|
|
||||||
from .user import UserSettingJobStatusMailNotificationLevel
|
|
||||||
|
|
||||||
|
|
||||||
def register_event_listeners():
|
|
||||||
resources = [
|
|
||||||
Corpus,
|
|
||||||
CorpusFile,
|
|
||||||
Job,
|
|
||||||
JobInput,
|
|
||||||
JobResult,
|
|
||||||
SpaCyNLPPipelineModel,
|
|
||||||
TesseractOCRPipelineModel
|
|
||||||
]
|
|
||||||
|
|
||||||
for resource in resources:
|
|
||||||
db.event.listen(resource, 'after_delete', resource_after_delete)
|
|
||||||
db.event.listen(resource, 'after_insert', resource_after_insert)
|
|
||||||
db.event.listen(resource, 'after_update', resource_after_update)
|
|
||||||
|
|
||||||
db.event.listen(CorpusFollowerAssociation, 'after_delete', cfa_after_delete)
|
|
||||||
db.event.listen(CorpusFollowerAssociation, 'after_insert', cfa_after_insert)
|
|
||||||
|
|
||||||
db.event.listen(Job, 'after_update', job_after_update)
|
|
||||||
|
|
||||||
|
|
||||||
def resource_after_delete(mapper, connection, resource):
|
|
||||||
jsonpatch = [
|
|
||||||
{
|
|
||||||
'op': 'remove',
|
|
||||||
'path': resource.jsonpatch_path
|
|
||||||
}
|
|
||||||
]
|
|
||||||
namespace = '/users'
|
|
||||||
room = f'/users/{resource.user_hashid}'
|
|
||||||
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
|
|
||||||
|
|
||||||
|
|
||||||
def cfa_after_delete(mapper, connection, cfa):
|
|
||||||
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
|
|
||||||
jsonpatch = [
|
|
||||||
{
|
|
||||||
'op': 'remove',
|
|
||||||
'path': jsonpatch_path
|
|
||||||
}
|
|
||||||
]
|
|
||||||
namespace = '/users'
|
|
||||||
room = f'/users/{cfa.corpus.user.hashid}'
|
|
||||||
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
|
|
||||||
|
|
||||||
|
|
||||||
def resource_after_insert(mapper, connection, resource):
|
|
||||||
jsonpatch_value = resource.to_json_serializeable()
|
|
||||||
for attr in mapper.relationships:
|
|
||||||
jsonpatch_value[attr.key] = {}
|
|
||||||
jsonpatch = [
|
|
||||||
{
|
|
||||||
'op': 'add',
|
|
||||||
'path': resource.jsonpatch_path,
|
|
||||||
'value': jsonpatch_value
|
|
||||||
}
|
|
||||||
]
|
|
||||||
namespace = '/users'
|
|
||||||
room = f'/users/{resource.user_hashid}'
|
|
||||||
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
|
|
||||||
|
|
||||||
|
|
||||||
def cfa_after_insert(mapper, connection, cfa):
|
|
||||||
jsonpatch_value = cfa.to_json_serializeable()
|
|
||||||
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
|
|
||||||
jsonpatch = [
|
|
||||||
{
|
|
||||||
'op': 'add',
|
|
||||||
'path': jsonpatch_path,
|
|
||||||
'value': jsonpatch_value
|
|
||||||
}
|
|
||||||
]
|
|
||||||
namespace = '/users'
|
|
||||||
room = f'/users/{cfa.corpus.user.hashid}'
|
|
||||||
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
|
|
||||||
|
|
||||||
|
|
||||||
def resource_after_update(mapper, connection, resource):
|
|
||||||
jsonpatch = []
|
|
||||||
for attr in db.inspect(resource).attrs:
|
|
||||||
if attr.key in mapper.relationships:
|
|
||||||
continue
|
|
||||||
if not attr.load_history().has_changes():
|
|
||||||
continue
|
|
||||||
jsonpatch_path = f'{resource.jsonpatch_path}/{attr.key}'
|
|
||||||
if isinstance(attr.value, datetime):
|
|
||||||
jsonpatch_value = f'{attr.value.isoformat()}Z'
|
|
||||||
elif isinstance(attr.value, Enum):
|
|
||||||
jsonpatch_value = attr.value.name
|
|
||||||
else:
|
|
||||||
jsonpatch_value = attr.value
|
|
||||||
jsonpatch.append(
|
|
||||||
{
|
|
||||||
'op': 'replace',
|
|
||||||
'path': jsonpatch_path,
|
|
||||||
'value': jsonpatch_value
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if jsonpatch:
|
|
||||||
namespace = '/users'
|
|
||||||
room = f'/users/{resource.user_hashid}'
|
|
||||||
socketio.emit('patch', jsonpatch, namespace=namespace, room=room)
|
|
||||||
|
|
||||||
|
|
||||||
def job_after_update(mapper, connection, job):
|
|
||||||
for attr in db.inspect(job).attrs:
|
|
||||||
if attr.key != 'status':
|
|
||||||
continue
|
|
||||||
if not attr.load_history().has_changes():
|
|
||||||
return
|
|
||||||
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE:
|
|
||||||
return
|
|
||||||
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END:
|
|
||||||
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
|
||||||
return
|
|
||||||
msg = create_message(
|
|
||||||
job.user.email,
|
|
||||||
f'Status update for your Job "{job.title}"',
|
|
||||||
'tasks/email/notification',
|
|
||||||
job=job
|
|
||||||
)
|
|
||||||
mail.send(msg)
|
|
@ -1,40 +0,0 @@
|
|||||||
from datetime import datetime
|
|
||||||
from flask import current_app
|
|
||||||
from werkzeug.utils import secure_filename
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
class FileMixin:
|
|
||||||
'''
|
|
||||||
Mixin for db.Model classes. All file related models should use this.
|
|
||||||
'''
|
|
||||||
creation_date = db.Column(db.DateTime, default=datetime.utcnow)
|
|
||||||
filename = db.Column(db.String(255))
|
|
||||||
mimetype = db.Column(db.String(255))
|
|
||||||
|
|
||||||
def file_mixin_to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
return {
|
|
||||||
'creation_date': f'{self.creation_date.isoformat()}Z',
|
|
||||||
'filename': self.filename,
|
|
||||||
'mimetype': self.mimetype
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def create(cls, file_storage, **kwargs):
|
|
||||||
filename = kwargs.pop('filename', file_storage.filename)
|
|
||||||
mimetype = kwargs.pop('mimetype', file_storage.mimetype)
|
|
||||||
obj = cls(
|
|
||||||
filename=secure_filename(filename),
|
|
||||||
mimetype=mimetype,
|
|
||||||
**kwargs
|
|
||||||
)
|
|
||||||
db.session.add(obj)
|
|
||||||
db.session.flush(objects=[obj])
|
|
||||||
db.session.refresh(obj)
|
|
||||||
try:
|
|
||||||
file_storage.save(obj.path)
|
|
||||||
except (AttributeError, OSError) as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
db.session.rollback()
|
|
||||||
raise e
|
|
||||||
return obj
|
|
@ -1,171 +0,0 @@
|
|||||||
from datetime import datetime
|
|
||||||
from enum import IntEnum
|
|
||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from time import sleep
|
|
||||||
from pathlib import Path
|
|
||||||
import shutil
|
|
||||||
from app import db
|
|
||||||
from app.extensions.nopaque_sqlalchemy_extras import ContainerColumn, IntEnumColumn
|
|
||||||
|
|
||||||
|
|
||||||
class JobStatus(IntEnum):
|
|
||||||
INITIALIZING = 1
|
|
||||||
SUBMITTED = 2
|
|
||||||
QUEUED = 3
|
|
||||||
RUNNING = 4
|
|
||||||
CANCELING = 5
|
|
||||||
CANCELED = 6
|
|
||||||
COMPLETED = 7
|
|
||||||
FAILED = 8
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get(job_status: 'JobStatus | int | str') -> 'JobStatus':
|
|
||||||
if isinstance(job_status, JobStatus):
|
|
||||||
return job_status
|
|
||||||
if isinstance(job_status, int):
|
|
||||||
return JobStatus(job_status)
|
|
||||||
if isinstance(job_status, str):
|
|
||||||
return JobStatus[job_status]
|
|
||||||
raise TypeError('job_status must be JobStatus, int, or str')
|
|
||||||
|
|
||||||
|
|
||||||
class Job(HashidMixin, db.Model):
|
|
||||||
'''
|
|
||||||
Class to define Jobs.
|
|
||||||
'''
|
|
||||||
__tablename__ = 'jobs'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Fields
|
|
||||||
creation_date = \
|
|
||||||
db.Column(db.DateTime(), default=datetime.utcnow)
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
end_date = db.Column(db.DateTime())
|
|
||||||
service = db.Column(db.String(64))
|
|
||||||
service_args = db.Column(ContainerColumn(dict, 255))
|
|
||||||
service_version = db.Column(db.String(16))
|
|
||||||
status = db.Column(
|
|
||||||
IntEnumColumn(JobStatus),
|
|
||||||
default=JobStatus.INITIALIZING
|
|
||||||
)
|
|
||||||
title = db.Column(db.String(32))
|
|
||||||
# Relationships
|
|
||||||
inputs = db.relationship(
|
|
||||||
'JobInput',
|
|
||||||
back_populates='job',
|
|
||||||
cascade='all, delete-orphan',
|
|
||||||
lazy='dynamic'
|
|
||||||
)
|
|
||||||
results = db.relationship(
|
|
||||||
'JobResult',
|
|
||||||
back_populates='job',
|
|
||||||
cascade='all, delete-orphan',
|
|
||||||
lazy='dynamic'
|
|
||||||
)
|
|
||||||
user = db.relationship(
|
|
||||||
'User',
|
|
||||||
back_populates='jobs'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<Job {self.title}>'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.user.jsonpatch_path}/jobs/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.user.path / 'jobs' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for('jobs.job', job_id=self.id)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.user.hashid
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def create(**kwargs):
|
|
||||||
job = Job(**kwargs)
|
|
||||||
db.session.add(job)
|
|
||||||
db.session.flush(objects=[job])
|
|
||||||
db.session.refresh(job)
|
|
||||||
job_inputs_dir = job.path / 'inputs'
|
|
||||||
job_pipeline_data_dir = job.path / 'pipeline_data'
|
|
||||||
job_results_dir = job.path / 'results'
|
|
||||||
try:
|
|
||||||
job.path.mkdir()
|
|
||||||
job_inputs_dir.mkdir()
|
|
||||||
job_pipeline_data_dir.mkdir()
|
|
||||||
job_results_dir.mkdir()
|
|
||||||
except OSError as e:
|
|
||||||
# TODO: Potential leftover cleanup
|
|
||||||
current_app.logger.error(e)
|
|
||||||
db.session.rollback()
|
|
||||||
raise
|
|
||||||
return job
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
''' Delete the job and its inputs and results from the database. '''
|
|
||||||
if self.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: # noqa
|
|
||||||
self.status = JobStatus.CANCELING
|
|
||||||
db.session.commit()
|
|
||||||
while self.status != JobStatus.CANCELED:
|
|
||||||
# In case the daemon handled a job in any way
|
|
||||||
if self.status != JobStatus.CANCELING:
|
|
||||||
self.status = JobStatus.CANCELING
|
|
||||||
db.session.commit()
|
|
||||||
sleep(1)
|
|
||||||
db.session.refresh(self)
|
|
||||||
try:
|
|
||||||
shutil.rmtree(self.path)
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
db.session.rollback()
|
|
||||||
raise e
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def restart(self):
|
|
||||||
''' Restart a job - only if the status is failed '''
|
|
||||||
if self.status != JobStatus.FAILED:
|
|
||||||
raise Exception('Job status is not "failed"')
|
|
||||||
shutil.rmtree(self.path / 'results', ignore_errors=True)
|
|
||||||
shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True)
|
|
||||||
for result in self.results:
|
|
||||||
db.session.delete(result)
|
|
||||||
self.end_date = None
|
|
||||||
self.status = JobStatus.SUBMITTED
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'creation_date': f'{self.creation_date.isoformat()}Z',
|
|
||||||
'description': self.description,
|
|
||||||
'end_date': (
|
|
||||||
None if self.end_date is None
|
|
||||||
else f'{self.end_date.isoformat()}Z'
|
|
||||||
),
|
|
||||||
'service': self.service,
|
|
||||||
'service_args': self.service_args,
|
|
||||||
'service_version': self.service_version,
|
|
||||||
'status': self.status.name,
|
|
||||||
'title': self.title
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
json_serializeable['inputs'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.inputs
|
|
||||||
}
|
|
||||||
json_serializeable['results'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.results
|
|
||||||
}
|
|
||||||
return json_serializeable
|
|
@ -1,65 +0,0 @@
|
|||||||
from flask import url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from pathlib import Path
|
|
||||||
from app import db
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
|
|
||||||
|
|
||||||
class JobInput(FileMixin, HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'job_inputs'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
|
|
||||||
# Relationships
|
|
||||||
job = db.relationship(
|
|
||||||
'Job',
|
|
||||||
back_populates='inputs'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<JobInput {self.filename}>'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def content_url(self):
|
|
||||||
return url_for(
|
|
||||||
'jobs.download_job_input',
|
|
||||||
job_id=self.job.id,
|
|
||||||
job_input_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.job.jsonpatch_path}/inputs/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.job.path / 'inputs' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for(
|
|
||||||
'jobs.job',
|
|
||||||
job_id=self.job_id,
|
|
||||||
_anchor=f'job-{self.job.hashid}-input-{self.hashid}'
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.job.user.hashid
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_id(self):
|
|
||||||
return self.job.user.id
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
**self.file_mixin_to_json_serializeable()
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['job'] = \
|
|
||||||
self.job.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,71 +0,0 @@
|
|||||||
from flask import url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from pathlib import Path
|
|
||||||
from app import db
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
|
|
||||||
|
|
||||||
class JobResult(FileMixin, HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'job_results'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
|
|
||||||
# Fields
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
# Relationships
|
|
||||||
job = db.relationship(
|
|
||||||
'Job',
|
|
||||||
back_populates='results'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<JobResult {self.filename}>'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def download_url(self):
|
|
||||||
return url_for(
|
|
||||||
'jobs.download_job_result',
|
|
||||||
job_id=self.job_id,
|
|
||||||
job_result_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.job.jsonpatch_path}/results/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.job.path / 'results' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for(
|
|
||||||
'jobs.job',
|
|
||||||
job_id=self.job_id,
|
|
||||||
_anchor=f'job-{self.job.hashid}-result-{self.hashid}'
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.job.user.hashid
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_id(self):
|
|
||||||
return self.job.user.id
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'description': self.description,
|
|
||||||
**self.file_mixin_to_json_serializeable(
|
|
||||||
backrefs=backrefs,
|
|
||||||
relationships=relationships
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['job'] = \
|
|
||||||
self.job.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,99 +0,0 @@
|
|||||||
from enum import IntEnum
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
class Permission(IntEnum):
|
|
||||||
'''
|
|
||||||
Defines User permissions as integers by the power of 2. User permission
|
|
||||||
can be evaluated using the bitwise operator &.
|
|
||||||
'''
|
|
||||||
ADMINISTRATE = 1
|
|
||||||
CONTRIBUTE = 2
|
|
||||||
USE_API = 4
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get(permission: 'Permission | int | str') -> 'Permission':
|
|
||||||
if isinstance(permission, Permission):
|
|
||||||
return permission
|
|
||||||
if isinstance(permission, int):
|
|
||||||
return Permission(permission)
|
|
||||||
if isinstance(permission, str):
|
|
||||||
return Permission[permission]
|
|
||||||
raise TypeError('permission must be Permission, int, or str')
|
|
||||||
|
|
||||||
|
|
||||||
class Role(HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'roles'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Fields
|
|
||||||
name = db.Column(db.String(64), unique=True)
|
|
||||||
default = db.Column(db.Boolean, default=False, index=True)
|
|
||||||
permissions = db.Column(db.Integer, default=0)
|
|
||||||
# Relationships
|
|
||||||
users = db.relationship('User', back_populates='role', lazy='dynamic')
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<Role {self.name}>'
|
|
||||||
|
|
||||||
def has_permission(self, permission: Permission | int | str):
|
|
||||||
p = Permission.get(permission)
|
|
||||||
return self.permissions & p.value == p.value
|
|
||||||
|
|
||||||
def add_permission(self, permission: Permission | int | str):
|
|
||||||
p = Permission.get(permission)
|
|
||||||
if not self.has_permission(p):
|
|
||||||
self.permissions += p.value
|
|
||||||
|
|
||||||
def remove_permission(self, permission: Permission | int | str):
|
|
||||||
p = Permission.get(permission)
|
|
||||||
if self.has_permission(p):
|
|
||||||
self.permissions -= p.value
|
|
||||||
|
|
||||||
def reset_permissions(self):
|
|
||||||
self.permissions = 0
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'default': self.default,
|
|
||||||
'name': self.name,
|
|
||||||
'permissions': [
|
|
||||||
x.name for x in Permission
|
|
||||||
if self.has_permission(x.value)
|
|
||||||
]
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
pass
|
|
||||||
if relationships:
|
|
||||||
json_serializeable['users'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.users
|
|
||||||
}
|
|
||||||
return json_serializeable
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def insert_defaults():
|
|
||||||
roles = {
|
|
||||||
'User': [],
|
|
||||||
'API user': [Permission.USE_API],
|
|
||||||
'Contributor': [Permission.CONTRIBUTE],
|
|
||||||
'Administrator': [
|
|
||||||
Permission.ADMINISTRATE,
|
|
||||||
Permission.CONTRIBUTE,
|
|
||||||
Permission.USE_API
|
|
||||||
],
|
|
||||||
'System user': []
|
|
||||||
}
|
|
||||||
default_role_name = 'User'
|
|
||||||
for role_name, permissions in roles.items():
|
|
||||||
role = Role.query.filter_by(name=role_name).first()
|
|
||||||
if role is None:
|
|
||||||
role = Role(name=role_name)
|
|
||||||
role.reset_permissions()
|
|
||||||
for permission in permissions:
|
|
||||||
role.add_permission(permission)
|
|
||||||
role.default = role.name == default_role_name
|
|
||||||
db.session.add(role)
|
|
||||||
db.session.commit()
|
|
@ -1,136 +0,0 @@
|
|||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from tqdm import tqdm
|
|
||||||
from pathlib import Path
|
|
||||||
import requests
|
|
||||||
import yaml
|
|
||||||
from app import db
|
|
||||||
from app.extensions.nopaque_sqlalchemy_extras import ContainerColumn
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
from .user import User
|
|
||||||
|
|
||||||
|
|
||||||
class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'spacy_nlp_pipeline_models'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Fields
|
|
||||||
title = db.Column(db.String(64))
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
version = db.Column(db.String(16))
|
|
||||||
compatible_service_versions = db.Column(ContainerColumn(list, 255))
|
|
||||||
publisher = db.Column(db.String(128))
|
|
||||||
publisher_url = db.Column(db.String(512))
|
|
||||||
publishing_url = db.Column(db.String(512))
|
|
||||||
publishing_year = db.Column(db.Integer)
|
|
||||||
pipeline_name = db.Column(db.String(64))
|
|
||||||
is_public = db.Column(db.Boolean, default=False)
|
|
||||||
# Relationships
|
|
||||||
user = db.relationship('User', back_populates='spacy_nlp_pipeline_models')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.user.jsonpatch_path}/spacy_nlp_pipeline_models/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for(
|
|
||||||
'contributions.spacy_nlp_pipeline_models.entity',
|
|
||||||
spacy_nlp_pipeline_model_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.user.hashid
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def insert_defaults(force_download=False):
|
|
||||||
nopaque_user = User.query.filter_by(username='nopaque').first()
|
|
||||||
default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml'
|
|
||||||
with default_records_file.open('r') as f:
|
|
||||||
default_records = yaml.safe_load(f)
|
|
||||||
for m in default_records:
|
|
||||||
model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
|
|
||||||
if model is not None:
|
|
||||||
model.compatible_service_versions = m['compatible_service_versions']
|
|
||||||
model.description = m['description']
|
|
||||||
model.filename = m['url'].split('/')[-1]
|
|
||||||
model.publisher = m['publisher']
|
|
||||||
model.publisher_url = m['publisher_url']
|
|
||||||
model.publishing_url = m['publishing_url']
|
|
||||||
model.publishing_year = m['publishing_year']
|
|
||||||
model.is_public = True
|
|
||||||
model.title = m['title']
|
|
||||||
model.version = m['version']
|
|
||||||
model.pipeline_name = m['pipeline_name']
|
|
||||||
else:
|
|
||||||
model = SpaCyNLPPipelineModel(
|
|
||||||
compatible_service_versions=m['compatible_service_versions'],
|
|
||||||
description=m['description'],
|
|
||||||
filename=m['url'].split('/')[-1],
|
|
||||||
publisher=m['publisher'],
|
|
||||||
publisher_url=m['publisher_url'],
|
|
||||||
publishing_url=m['publishing_url'],
|
|
||||||
publishing_year=m['publishing_year'],
|
|
||||||
is_public=True,
|
|
||||||
title=m['title'],
|
|
||||||
user=nopaque_user,
|
|
||||||
version=m['version'],
|
|
||||||
pipeline_name=m['pipeline_name']
|
|
||||||
)
|
|
||||||
db.session.add(model)
|
|
||||||
db.session.flush(objects=[model])
|
|
||||||
db.session.refresh(model)
|
|
||||||
if not model.path.exists() or force_download:
|
|
||||||
r = requests.get(m['url'], stream=True)
|
|
||||||
pbar = tqdm(
|
|
||||||
desc=f'{model.title} ({model.filename})',
|
|
||||||
unit="B",
|
|
||||||
unit_scale=True,
|
|
||||||
unit_divisor=1024,
|
|
||||||
total=int(r.headers['Content-Length'])
|
|
||||||
)
|
|
||||||
pbar.clear()
|
|
||||||
with open(model.path, 'wb') as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=1024):
|
|
||||||
if chunk: # filter out keep-alive new chunks
|
|
||||||
pbar.update(len(chunk))
|
|
||||||
f.write(chunk)
|
|
||||||
pbar.close()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
try:
|
|
||||||
self.path.unlink(missing_ok=True)
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
raise
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'compatible_service_versions': self.compatible_service_versions,
|
|
||||||
'description': self.description,
|
|
||||||
'publisher': self.publisher,
|
|
||||||
'publisher_url': self.publisher_url,
|
|
||||||
'publishing_url': self.publishing_url,
|
|
||||||
'publishing_year': self.publishing_year,
|
|
||||||
'pipeline_name': self.pipeline_name,
|
|
||||||
'is_public': self.is_public,
|
|
||||||
'title': self.title,
|
|
||||||
'version': self.version,
|
|
||||||
**self.file_mixin_to_json_serializeable()
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,132 +0,0 @@
|
|||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from tqdm import tqdm
|
|
||||||
from pathlib import Path
|
|
||||||
import requests
|
|
||||||
import yaml
|
|
||||||
from app import db
|
|
||||||
from app.extensions.nopaque_sqlalchemy_extras import ContainerColumn
|
|
||||||
from .file_mixin import FileMixin
|
|
||||||
from .user import User
|
|
||||||
|
|
||||||
|
|
||||||
class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
|
|
||||||
__tablename__ = 'tesseract_ocr_pipeline_models'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Fields
|
|
||||||
title = db.Column(db.String(64))
|
|
||||||
description = db.Column(db.String(255))
|
|
||||||
version = db.Column(db.String(16))
|
|
||||||
compatible_service_versions = db.Column(ContainerColumn(list, 255))
|
|
||||||
publisher = db.Column(db.String(128))
|
|
||||||
publisher_url = db.Column(db.String(512))
|
|
||||||
publishing_url = db.Column(db.String(512))
|
|
||||||
publishing_year = db.Column(db.Integer)
|
|
||||||
is_public = db.Column(db.Boolean, default=False)
|
|
||||||
# Relationships
|
|
||||||
user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'{self.user.jsonpatch_path}/tesseract_ocr_pipeline_models/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self):
|
|
||||||
return url_for(
|
|
||||||
'contributions.tesseract_ocr_pipeline_models.entity',
|
|
||||||
tesseract_ocr_pipeline_model_id=self.id
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def user_hashid(self):
|
|
||||||
return self.user.hashid
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def insert_defaults(force_download=False):
|
|
||||||
nopaque_user = User.query.filter_by(username='nopaque').first()
|
|
||||||
default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml'
|
|
||||||
with default_records_file.open('r') as f:
|
|
||||||
default_records = yaml.safe_load(f)
|
|
||||||
for m in default_records:
|
|
||||||
model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
|
|
||||||
if model is not None:
|
|
||||||
model.compatible_service_versions = m['compatible_service_versions']
|
|
||||||
model.description = m['description']
|
|
||||||
model.filename = f'{model.id}.traineddata'
|
|
||||||
model.publisher = m['publisher']
|
|
||||||
model.publisher_url = m['publisher_url']
|
|
||||||
model.publishing_url = m['publishing_url']
|
|
||||||
model.publishing_year = m['publishing_year']
|
|
||||||
model.is_public = True
|
|
||||||
model.title = m['title']
|
|
||||||
model.version = m['version']
|
|
||||||
else:
|
|
||||||
model = TesseractOCRPipelineModel(
|
|
||||||
compatible_service_versions=m['compatible_service_versions'],
|
|
||||||
description=m['description'],
|
|
||||||
publisher=m['publisher'],
|
|
||||||
publisher_url=m['publisher_url'],
|
|
||||||
publishing_url=m['publishing_url'],
|
|
||||||
publishing_year=m['publishing_year'],
|
|
||||||
is_public=True,
|
|
||||||
title=m['title'],
|
|
||||||
user=nopaque_user,
|
|
||||||
version=m['version']
|
|
||||||
)
|
|
||||||
db.session.add(model)
|
|
||||||
db.session.flush(objects=[model])
|
|
||||||
db.session.refresh(model)
|
|
||||||
model.filename = f'{model.id}.traineddata'
|
|
||||||
if not model.path.exists() or force_download:
|
|
||||||
r = requests.get(m['url'], stream=True)
|
|
||||||
pbar = tqdm(
|
|
||||||
desc=f'{model.title} ({model.filename})',
|
|
||||||
unit="B",
|
|
||||||
unit_scale=True,
|
|
||||||
unit_divisor=1024,
|
|
||||||
total=int(r.headers['Content-Length'])
|
|
||||||
)
|
|
||||||
pbar.clear()
|
|
||||||
with open(model.path, 'wb') as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=1024):
|
|
||||||
if chunk: # filter out keep-alive new chunks
|
|
||||||
pbar.update(len(chunk))
|
|
||||||
f.write(chunk)
|
|
||||||
pbar.close()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
try:
|
|
||||||
self.path.unlink(missing_ok=True)
|
|
||||||
except OSError as e:
|
|
||||||
current_app.logger.error(e)
|
|
||||||
raise
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'compatible_service_versions': self.compatible_service_versions,
|
|
||||||
'description': self.description,
|
|
||||||
'publisher': self.publisher,
|
|
||||||
'publisher_url': self.publisher_url,
|
|
||||||
'publishing_url': self.publishing_url,
|
|
||||||
'publishing_year': self.publishing_year,
|
|
||||||
'is_public': self.is_public,
|
|
||||||
'title': self.title,
|
|
||||||
'version': self.version,
|
|
||||||
**self.file_mixin_to_json_serializeable()
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
@ -1,48 +0,0 @@
|
|||||||
from datetime import datetime, timedelta
|
|
||||||
from app import db
|
|
||||||
|
|
||||||
|
|
||||||
class Token(db.Model):
|
|
||||||
__tablename__ = 'tokens'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
|
||||||
# Fields
|
|
||||||
access_token = db.Column(db.String(64), index=True)
|
|
||||||
access_expiration = db.Column(db.DateTime)
|
|
||||||
refresh_token = db.Column(db.String(64), index=True)
|
|
||||||
refresh_expiration = db.Column(db.DateTime)
|
|
||||||
# Relationships
|
|
||||||
user = db.relationship('User', back_populates='tokens')
|
|
||||||
|
|
||||||
def expire(self):
|
|
||||||
self.access_expiration = datetime.utcnow()
|
|
||||||
self.refresh_expiration = datetime.utcnow()
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'access_token': self.access_token,
|
|
||||||
'access_expiration': (
|
|
||||||
None if self.access_expiration is None
|
|
||||||
else f'{self.access_expiration.isoformat()}Z'
|
|
||||||
),
|
|
||||||
'refresh_token': self.refresh_token,
|
|
||||||
'refresh_expiration': (
|
|
||||||
None if self.refresh_expiration is None
|
|
||||||
else f'{self.refresh_expiration.isoformat()}Z'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['user'] = \
|
|
||||||
self.user.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
pass
|
|
||||||
return json_serializeable
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def clean():
|
|
||||||
"""Remove any tokens that have been expired for more than a day."""
|
|
||||||
yesterday = datetime.utcnow() - timedelta(days=1)
|
|
||||||
Token.query.filter(Token.refresh_expiration < yesterday).delete()
|
|
@ -1,453 +0,0 @@
|
|||||||
from datetime import datetime, timedelta
|
|
||||||
from enum import IntEnum
|
|
||||||
from flask import current_app, url_for
|
|
||||||
from flask_hashids import HashidMixin
|
|
||||||
from flask_login import UserMixin
|
|
||||||
from sqlalchemy.ext.associationproxy import association_proxy
|
|
||||||
from pathlib import Path
|
|
||||||
from werkzeug.security import generate_password_hash, check_password_hash
|
|
||||||
import jwt
|
|
||||||
import re
|
|
||||||
import secrets
|
|
||||||
import shutil
|
|
||||||
from app import db, hashids
|
|
||||||
from app.extensions.nopaque_sqlalchemy_extras import IntEnumColumn
|
|
||||||
from .corpus import Corpus
|
|
||||||
from .corpus_follower_association import CorpusFollowerAssociation
|
|
||||||
from .corpus_follower_role import CorpusFollowerRole
|
|
||||||
from .role import Permission, Role
|
|
||||||
from .token import Token
|
|
||||||
|
|
||||||
|
|
||||||
class ProfilePrivacySettings(IntEnum):
|
|
||||||
SHOW_EMAIL = 1
|
|
||||||
SHOW_LAST_SEEN = 2
|
|
||||||
SHOW_MEMBER_SINCE = 4
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get(profile_privacy_setting: 'ProfilePrivacySettings | int | str') -> 'ProfilePrivacySettings':
|
|
||||||
if isinstance(profile_privacy_setting, ProfilePrivacySettings):
|
|
||||||
return profile_privacy_setting
|
|
||||||
if isinstance(profile_privacy_setting, int):
|
|
||||||
return ProfilePrivacySettings(profile_privacy_setting)
|
|
||||||
if isinstance(profile_privacy_setting, str):
|
|
||||||
return ProfilePrivacySettings[profile_privacy_setting]
|
|
||||||
raise TypeError('profile_privacy_setting must be ProfilePrivacySettings, int, or str')
|
|
||||||
|
|
||||||
|
|
||||||
class UserSettingJobStatusMailNotificationLevel(IntEnum):
|
|
||||||
NONE = 1
|
|
||||||
END = 2
|
|
||||||
ALL = 3
|
|
||||||
|
|
||||||
|
|
||||||
class User(HashidMixin, UserMixin, db.Model):
|
|
||||||
__tablename__ = 'users'
|
|
||||||
# Primary key
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
|
||||||
# Foreign keys
|
|
||||||
role_id = db.Column(db.Integer, db.ForeignKey('roles.id'))
|
|
||||||
# Fields
|
|
||||||
email = db.Column(db.String(254), index=True, unique=True)
|
|
||||||
username = db.Column(db.String(64), index=True, unique=True)
|
|
||||||
username_pattern = re.compile(r'^[A-Za-zÄÖÜäöüß0-9_.]*$')
|
|
||||||
password_hash = db.Column(db.String(128))
|
|
||||||
confirmed = db.Column(db.Boolean, default=False)
|
|
||||||
terms_of_use_accepted = db.Column(db.Boolean, default=False)
|
|
||||||
member_since = db.Column(db.DateTime(), default=datetime.utcnow)
|
|
||||||
setting_job_status_mail_notification_level = db.Column(
|
|
||||||
IntEnumColumn(UserSettingJobStatusMailNotificationLevel),
|
|
||||||
default=UserSettingJobStatusMailNotificationLevel.END
|
|
||||||
)
|
|
||||||
last_seen = db.Column(db.DateTime())
|
|
||||||
full_name = db.Column(db.String(64))
|
|
||||||
about_me = db.Column(db.String(256))
|
|
||||||
location = db.Column(db.String(64))
|
|
||||||
website = db.Column(db.String(128))
|
|
||||||
organization = db.Column(db.String(128))
|
|
||||||
is_public = db.Column(db.Boolean, default=False)
|
|
||||||
profile_privacy_settings = db.Column(db.Integer(), default=0)
|
|
||||||
# Relationships
|
|
||||||
avatar = db.relationship(
|
|
||||||
'Avatar',
|
|
||||||
back_populates='user',
|
|
||||||
cascade='all, delete-orphan',
|
|
||||||
uselist=False
|
|
||||||
)
|
|
||||||
corpora = db.relationship(
|
|
||||||
'Corpus',
|
|
||||||
back_populates='user',
|
|
||||||
cascade='all, delete-orphan',
|
|
||||||
lazy='dynamic'
|
|
||||||
)
|
|
||||||
corpus_follower_associations = db.relationship(
|
|
||||||
'CorpusFollowerAssociation',
|
|
||||||
back_populates='follower',
|
|
||||||
cascade='all, delete-orphan'
|
|
||||||
)
|
|
||||||
followed_corpora = association_proxy(
|
|
||||||
'corpus_follower_associations',
|
|
||||||
'corpus',
|
|
||||||
creator=lambda c: CorpusFollowerAssociation(corpus=c)
|
|
||||||
)
|
|
||||||
jobs = db.relationship(
|
|
||||||
'Job',
|
|
||||||
back_populates='user',
|
|
||||||
cascade='all, delete-orphan',
|
|
||||||
lazy='dynamic'
|
|
||||||
)
|
|
||||||
role = db.relationship(
|
|
||||||
'Role',
|
|
||||||
back_populates='users'
|
|
||||||
)
|
|
||||||
spacy_nlp_pipeline_models = db.relationship(
|
|
||||||
'SpaCyNLPPipelineModel',
|
|
||||||
back_populates='user',
|
|
||||||
cascade='all, delete-orphan',
|
|
||||||
lazy='dynamic'
|
|
||||||
)
|
|
||||||
tesseract_ocr_pipeline_models = db.relationship(
|
|
||||||
'TesseractOCRPipelineModel',
|
|
||||||
back_populates='user',
|
|
||||||
cascade='all, delete-orphan',
|
|
||||||
lazy='dynamic'
|
|
||||||
)
|
|
||||||
tokens = db.relationship(
|
|
||||||
'Token',
|
|
||||||
back_populates='user',
|
|
||||||
cascade='all, delete-orphan',
|
|
||||||
lazy='dynamic'
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
if 'role' not in kwargs:
|
|
||||||
kwargs['role'] = (
|
|
||||||
Role.query.filter_by(name='Administrator').first()
|
|
||||||
if kwargs['email'] == current_app.config['NOPAQUE_ADMIN']
|
|
||||||
else Role.query.filter_by(default=True).first()
|
|
||||||
)
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'<User {self.username}>'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_administrator(self):
|
|
||||||
return self.can(Permission.ADMINISTRATE)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def jsonpatch_path(self):
|
|
||||||
return f'/users/{self.hashid}'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def password(self):
|
|
||||||
raise AttributeError('password is not a readable attribute')
|
|
||||||
|
|
||||||
@password.setter
|
|
||||||
def password(self, password):
|
|
||||||
#pbkdf2:sha256
|
|
||||||
self.password_hash = generate_password_hash(password, method='pbkdf2')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return current_app.config.get('NOPAQUE_DATA_DIR') / 'users' / f'{self.id}'
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def create(**kwargs):
|
|
||||||
user = User(**kwargs)
|
|
||||||
db.session.add(user)
|
|
||||||
db.session.flush(objects=[user])
|
|
||||||
db.session.refresh(user)
|
|
||||||
user_spacy_nlp_pipeline_models_dir = user.path / 'spacy_nlp_pipeline_models'
|
|
||||||
user_tesseract_ocr_pipeline_models_dir = user.path / 'tesseract_ocr_pipeline_models'
|
|
||||||
user_corpora_dir = user.path / 'corpora'
|
|
||||||
user_jobs_dir = user.path / 'jobs'
|
|
||||||
try:
|
|
||||||
user.path.mkdir()
|
|
||||||
user_spacy_nlp_pipeline_models_dir.mkdir()
|
|
||||||
user_tesseract_ocr_pipeline_models_dir.mkdir()
|
|
||||||
user_corpora_dir.mkdir()
|
|
||||||
user_jobs_dir.mkdir()
|
|
||||||
except OSError as e:
|
|
||||||
# TODO: Potential leftover cleanup
|
|
||||||
current_app.logger.error(e)
|
|
||||||
db.session.rollback()
|
|
||||||
raise
|
|
||||||
return user
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def insert_defaults():
|
|
||||||
nopaque_user = User.query.filter_by(username='nopaque').first()
|
|
||||||
system_user_role = Role.query.filter_by(name='System user').first()
|
|
||||||
if nopaque_user is None:
|
|
||||||
nopaque_user = User.create(
|
|
||||||
username='nopaque',
|
|
||||||
role=system_user_role
|
|
||||||
)
|
|
||||||
db.session.add(nopaque_user)
|
|
||||||
elif nopaque_user.role != system_user_role:
|
|
||||||
nopaque_user.role = system_user_role
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def reset_password(token, new_password):
|
|
||||||
try:
|
|
||||||
payload = jwt.decode(
|
|
||||||
token,
|
|
||||||
current_app.config['SECRET_KEY'],
|
|
||||||
algorithms=['HS256'],
|
|
||||||
issuer=current_app.config['SERVER_NAME'],
|
|
||||||
options={'require': ['exp', 'iat', 'iss', 'purpose', 'sub']}
|
|
||||||
)
|
|
||||||
except jwt.PyJWTError:
|
|
||||||
return False
|
|
||||||
if payload.get('purpose') != 'User.reset_password':
|
|
||||||
return False
|
|
||||||
user_hashid = payload.get('sub')
|
|
||||||
user_id = hashids.decode(user_hashid)
|
|
||||||
user = User.query.get(user_id)
|
|
||||||
if user is None:
|
|
||||||
return False
|
|
||||||
user.password = new_password
|
|
||||||
db.session.add(user)
|
|
||||||
return True
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def verify_access_token(access_token, refresh_token=None):
|
|
||||||
token = Token.query.filter(Token.access_token == access_token).first()
|
|
||||||
if token is not None:
|
|
||||||
if token.access_expiration > datetime.utcnow():
|
|
||||||
token.user.ping()
|
|
||||||
db.session.commit()
|
|
||||||
if token.user.role.name != 'System user':
|
|
||||||
return token.user
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def verify_refresh_token(refresh_token, access_token):
|
|
||||||
token = Token.query.filter((Token.refresh_token == refresh_token) & (Token.access_token == access_token)).first()
|
|
||||||
if token is not None:
|
|
||||||
if token.refresh_expiration > datetime.utcnow():
|
|
||||||
return token
|
|
||||||
# someone tried to refresh with an expired token
|
|
||||||
# revoke all tokens from this user as a precaution
|
|
||||||
token.user.revoke_auth_tokens()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
def can(self, permission):
|
|
||||||
return self.role is not None and self.role.has_permission(permission)
|
|
||||||
|
|
||||||
def confirm(self, confirmation_token):
|
|
||||||
try:
|
|
||||||
payload = jwt.decode(
|
|
||||||
confirmation_token,
|
|
||||||
current_app.config['SECRET_KEY'],
|
|
||||||
algorithms=['HS256'],
|
|
||||||
issuer=current_app.config['SERVER_NAME'],
|
|
||||||
options={'require': ['exp', 'iat', 'iss', 'purpose', 'sub']}
|
|
||||||
)
|
|
||||||
except jwt.PyJWTError:
|
|
||||||
return False
|
|
||||||
if payload.get('purpose') != 'user.confirm':
|
|
||||||
return False
|
|
||||||
if payload.get('sub') != self.hashid:
|
|
||||||
return False
|
|
||||||
self.confirmed = True
|
|
||||||
db.session.add(self)
|
|
||||||
return True
|
|
||||||
|
|
||||||
def delete(self):
|
|
||||||
shutil.rmtree(self.path, ignore_errors=True)
|
|
||||||
db.session.delete(self)
|
|
||||||
|
|
||||||
def generate_auth_token(self):
|
|
||||||
return Token(
|
|
||||||
access_token=secrets.token_urlsafe(),
|
|
||||||
access_expiration=datetime.utcnow() + timedelta(minutes=15),
|
|
||||||
refresh_token=secrets.token_urlsafe(),
|
|
||||||
refresh_expiration=datetime.utcnow() + timedelta(days=7),
|
|
||||||
user=self
|
|
||||||
)
|
|
||||||
|
|
||||||
def generate_confirm_token(self, expiration=3600):
|
|
||||||
now = datetime.utcnow()
|
|
||||||
payload = {
|
|
||||||
'exp': now + timedelta(seconds=expiration),
|
|
||||||
'iat': now,
|
|
||||||
'iss': current_app.config['SERVER_NAME'],
|
|
||||||
'purpose': 'user.confirm',
|
|
||||||
'sub': self.hashid
|
|
||||||
}
|
|
||||||
return jwt.encode(
|
|
||||||
payload,
|
|
||||||
current_app.config['SECRET_KEY'],
|
|
||||||
algorithm='HS256'
|
|
||||||
)
|
|
||||||
|
|
||||||
def generate_reset_password_token(self, expiration=3600):
|
|
||||||
now = datetime.utcnow()
|
|
||||||
payload = {
|
|
||||||
'exp': now + timedelta(seconds=expiration),
|
|
||||||
'iat': now,
|
|
||||||
'iss': current_app.config['SERVER_NAME'],
|
|
||||||
'purpose': 'User.reset_password',
|
|
||||||
'sub': self.hashid
|
|
||||||
}
|
|
||||||
return jwt.encode(
|
|
||||||
payload,
|
|
||||||
current_app.config['SECRET_KEY'],
|
|
||||||
algorithm='HS256'
|
|
||||||
)
|
|
||||||
|
|
||||||
def ping(self):
|
|
||||||
self.last_seen = datetime.utcnow()
|
|
||||||
|
|
||||||
def revoke_auth_tokens(self):
|
|
||||||
for token in self.tokens:
|
|
||||||
db.session.delete(token)
|
|
||||||
|
|
||||||
def verify_password(self, password):
|
|
||||||
if self.role.name == 'System user':
|
|
||||||
return False
|
|
||||||
return check_password_hash(self.password_hash, password)
|
|
||||||
|
|
||||||
#region Profile Privacy settings
|
|
||||||
def has_profile_privacy_setting(self, setting):
|
|
||||||
s = ProfilePrivacySettings.get(setting)
|
|
||||||
return self.profile_privacy_settings & s.value == s.value
|
|
||||||
|
|
||||||
def add_profile_privacy_setting(self, setting):
|
|
||||||
s = ProfilePrivacySettings.get(setting)
|
|
||||||
if not self.has_profile_privacy_setting(s):
|
|
||||||
self.profile_privacy_settings += s.value
|
|
||||||
|
|
||||||
def remove_profile_privacy_setting(self, setting):
|
|
||||||
s = ProfilePrivacySettings.get(setting)
|
|
||||||
if self.has_profile_privacy_setting(s):
|
|
||||||
self.profile_privacy_settings -= s.value
|
|
||||||
|
|
||||||
def reset_profile_privacy_settings(self):
|
|
||||||
self.profile_privacy_settings = 0
|
|
||||||
#endregion Profile Privacy settings
|
|
||||||
|
|
||||||
def follow_corpus(self, corpus, role=None):
|
|
||||||
if role is None:
|
|
||||||
cfr = CorpusFollowerRole.query.filter_by(default=True).first()
|
|
||||||
else:
|
|
||||||
cfr = role
|
|
||||||
if self.is_following_corpus(corpus):
|
|
||||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus=corpus, follower=self).first()
|
|
||||||
if cfa.role != cfr:
|
|
||||||
cfa.role = cfr
|
|
||||||
else:
|
|
||||||
cfa = CorpusFollowerAssociation(corpus=corpus, role=cfr, follower=self)
|
|
||||||
db.session.add(cfa)
|
|
||||||
|
|
||||||
def unfollow_corpus(self, corpus):
|
|
||||||
if not self.is_following_corpus(corpus):
|
|
||||||
return
|
|
||||||
self.followed_corpora.remove(corpus)
|
|
||||||
|
|
||||||
def is_following_corpus(self, corpus):
|
|
||||||
return corpus in self.followed_corpora
|
|
||||||
|
|
||||||
def generate_follow_corpus_token(self, corpus_hashid, role_name, expiration=7):
|
|
||||||
now = datetime.utcnow()
|
|
||||||
payload = {
|
|
||||||
'exp': expiration,
|
|
||||||
'iat': now,
|
|
||||||
'iss': current_app.config['SERVER_NAME'],
|
|
||||||
'purpose': 'User.follow_corpus',
|
|
||||||
'role_name': role_name,
|
|
||||||
'sub': corpus_hashid
|
|
||||||
}
|
|
||||||
return jwt.encode(
|
|
||||||
payload,
|
|
||||||
current_app.config['SECRET_KEY'],
|
|
||||||
algorithm='HS256'
|
|
||||||
)
|
|
||||||
|
|
||||||
def follow_corpus_by_token(self, token):
|
|
||||||
try:
|
|
||||||
payload = jwt.decode(
|
|
||||||
token,
|
|
||||||
current_app.config['SECRET_KEY'],
|
|
||||||
algorithms=['HS256'],
|
|
||||||
issuer=current_app.config['SERVER_NAME'],
|
|
||||||
options={'require': ['exp', 'iat', 'iss', 'purpose', 'role_name', 'sub']}
|
|
||||||
)
|
|
||||||
except jwt.PyJWTError:
|
|
||||||
return False
|
|
||||||
if payload.get('purpose') != 'User.follow_corpus':
|
|
||||||
return False
|
|
||||||
corpus_hashid = payload.get('sub')
|
|
||||||
corpus_id = hashids.decode(corpus_hashid)
|
|
||||||
corpus = Corpus.query.get_or_404(corpus_id)
|
|
||||||
if corpus is None:
|
|
||||||
return False
|
|
||||||
role_name = payload.get('role_name')
|
|
||||||
role = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
|
||||||
if role is None:
|
|
||||||
return False
|
|
||||||
self.follow_corpus(corpus, role)
|
|
||||||
# db.session.add(self)
|
|
||||||
return True
|
|
||||||
|
|
||||||
def to_json_serializeable(self, backrefs=False, relationships=False, filter_by_privacy_settings=False):
|
|
||||||
json_serializeable = {
|
|
||||||
'id': self.hashid,
|
|
||||||
'confirmed': self.confirmed,
|
|
||||||
'avatar': url_for('users.user_avatar', user_id=self.id),
|
|
||||||
'email': self.email,
|
|
||||||
'last_seen': (
|
|
||||||
None if self.last_seen is None
|
|
||||||
else f'{self.last_seen.isoformat()}Z'
|
|
||||||
),
|
|
||||||
'member_since': f'{self.member_since.isoformat()}Z',
|
|
||||||
'username': self.username,
|
|
||||||
'full_name': self.full_name,
|
|
||||||
'about_me': self.about_me,
|
|
||||||
'website': self.website,
|
|
||||||
'location': self.location,
|
|
||||||
'organization': self.organization,
|
|
||||||
'job_status_mail_notification_level': \
|
|
||||||
self.setting_job_status_mail_notification_level.name,
|
|
||||||
'profile_privacy_settings': {
|
|
||||||
'is_public': self.is_public,
|
|
||||||
'show_email': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL),
|
|
||||||
'show_last_seen': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_LAST_SEEN),
|
|
||||||
'show_member_since': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_MEMBER_SINCE)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if backrefs:
|
|
||||||
json_serializeable['role'] = \
|
|
||||||
self.role.to_json_serializeable(backrefs=True)
|
|
||||||
if relationships:
|
|
||||||
json_serializeable['corpus_follower_associations'] = {
|
|
||||||
x.hashid: x.to_json_serializeable()
|
|
||||||
for x in self.corpus_follower_associations
|
|
||||||
}
|
|
||||||
json_serializeable['corpora'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.corpora
|
|
||||||
}
|
|
||||||
json_serializeable['jobs'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.jobs
|
|
||||||
}
|
|
||||||
json_serializeable['tesseract_ocr_pipeline_models'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.tesseract_ocr_pipeline_models
|
|
||||||
}
|
|
||||||
json_serializeable['spacy_nlp_pipeline_models'] = {
|
|
||||||
x.hashid: x.to_json_serializeable(relationships=True)
|
|
||||||
for x in self.spacy_nlp_pipeline_models
|
|
||||||
}
|
|
||||||
|
|
||||||
if filter_by_privacy_settings:
|
|
||||||
if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL):
|
|
||||||
json_serializeable.pop('email')
|
|
||||||
if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_LAST_SEEN):
|
|
||||||
json_serializeable.pop('last_seen')
|
|
||||||
if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_MEMBER_SINCE):
|
|
||||||
json_serializeable.pop('member_since')
|
|
||||||
return json_serializeable
|
|
@ -1,37 +0,0 @@
|
|||||||
from cqi import CQiClient
|
|
||||||
from threading import Lock
|
|
||||||
from flask import session
|
|
||||||
|
|
||||||
|
|
||||||
class SessionManager:
|
|
||||||
@staticmethod
|
|
||||||
def setup():
|
|
||||||
session['cqi_over_sio'] = {}
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def teardown():
|
|
||||||
session.pop('cqi_over_sio')
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def set_corpus_id(corpus_id: int):
|
|
||||||
session['cqi_over_sio']['corpus_id'] = corpus_id
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_corpus_id() -> int:
|
|
||||||
return session['cqi_over_sio']['corpus_id']
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def set_cqi_client(cqi_client: CQiClient):
|
|
||||||
session['cqi_over_sio']['cqi_client'] = cqi_client
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_cqi_client() -> CQiClient:
|
|
||||||
return session['cqi_over_sio']['cqi_client']
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def set_cqi_client_lock(cqi_client_lock: Lock):
|
|
||||||
session['cqi_over_sio']['cqi_client_lock'] = cqi_client_lock
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_cqi_client_lock() -> Lock:
|
|
||||||
return session['cqi_over_sio']['cqi_client_lock']
|
|
@ -1,109 +0,0 @@
|
|||||||
from flask import current_app, Flask
|
|
||||||
from flask_login import current_user
|
|
||||||
from flask_socketio import Namespace
|
|
||||||
from app import db, hashids, socketio
|
|
||||||
from app.decorators import socketio_admin_required, socketio_login_required
|
|
||||||
from app.models import Job, JobStatus
|
|
||||||
|
|
||||||
|
|
||||||
def _delete_job(app: Flask, job_id: int):
|
|
||||||
with app.app_context():
|
|
||||||
job = Job.query.get(job_id)
|
|
||||||
job.delete()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
|
|
||||||
def _restart_job(app, job_id):
|
|
||||||
with app.app_context():
|
|
||||||
job = Job.query.get(job_id)
|
|
||||||
job.restart()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
|
|
||||||
class UsersNamespace(Namespace):
|
|
||||||
@socketio_login_required
|
|
||||||
def on_delete(self, job_hashid: str) -> dict:
|
|
||||||
job_id = hashids.decode(job_hashid)
|
|
||||||
|
|
||||||
if not isinstance(job_id, int):
|
|
||||||
return {'status': 400, 'statusText': 'Bad Request'}
|
|
||||||
|
|
||||||
job = Job.query.get(job_id)
|
|
||||||
|
|
||||||
if job is None:
|
|
||||||
return {'status': 404, 'statusText': 'Not found'}
|
|
||||||
|
|
||||||
if not (
|
|
||||||
job.user == current_user
|
|
||||||
or current_user.is_administrator
|
|
||||||
):
|
|
||||||
return {'status': 403, 'statusText': 'Forbidden'}
|
|
||||||
|
|
||||||
socketio.start_background_task(
|
|
||||||
_delete_job,
|
|
||||||
current_app._get_current_object(),
|
|
||||||
job_id
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'body': f'Job "{job.title}" marked for deletion',
|
|
||||||
'status': 202,
|
|
||||||
'statusText': 'Accepted'
|
|
||||||
}
|
|
||||||
|
|
||||||
@socketio_admin_required
|
|
||||||
def on_log(self, job_hashid: str):
|
|
||||||
job_id = hashids.decode(job_hashid)
|
|
||||||
|
|
||||||
if not isinstance(job_id, int):
|
|
||||||
return {'status': 400, 'statusText': 'Bad Request'}
|
|
||||||
|
|
||||||
job = Job.query.get(job_id)
|
|
||||||
|
|
||||||
if job is None:
|
|
||||||
return {'status': 404, 'statusText': 'Not found'}
|
|
||||||
|
|
||||||
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
|
||||||
return {'status': 409, 'statusText': 'Conflict'}
|
|
||||||
|
|
||||||
with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
|
|
||||||
log = log_file.read()
|
|
||||||
|
|
||||||
return {
|
|
||||||
'body': log,
|
|
||||||
'status': 200,
|
|
||||||
'statusText': 'Forbidden'
|
|
||||||
}
|
|
||||||
|
|
||||||
socketio_login_required
|
|
||||||
def on_restart(self, job_hashid: str):
|
|
||||||
job_id = hashids.decode(job_hashid)
|
|
||||||
|
|
||||||
if not isinstance(job_id, int):
|
|
||||||
return {'status': 400, 'statusText': 'Bad Request'}
|
|
||||||
|
|
||||||
job = Job.query.get(job_id)
|
|
||||||
|
|
||||||
if job is None:
|
|
||||||
return {'status': 404, 'statusText': 'Not found'}
|
|
||||||
|
|
||||||
if not (
|
|
||||||
job.user == current_user
|
|
||||||
or current_user.is_administrator
|
|
||||||
):
|
|
||||||
return {'status': 403, 'statusText': 'Forbidden'}
|
|
||||||
|
|
||||||
if job.status == JobStatus.FAILED:
|
|
||||||
return {'status': 409, 'statusText': 'Conflict'}
|
|
||||||
|
|
||||||
socketio.start_background_task(
|
|
||||||
_restart_job,
|
|
||||||
current_app._get_current_object(),
|
|
||||||
job_id
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'body': f'Job "{job.title}" marked for restarting',
|
|
||||||
'status': 202,
|
|
||||||
'statusText': 'Accepted'
|
|
||||||
}
|
|
@ -1,116 +0,0 @@
|
|||||||
from flask import current_app, Flask
|
|
||||||
from flask_login import current_user
|
|
||||||
from flask_socketio import join_room, leave_room, Namespace
|
|
||||||
from app import db, hashids, socketio
|
|
||||||
from app.decorators import socketio_login_required
|
|
||||||
from app.models import User
|
|
||||||
|
|
||||||
|
|
||||||
def _delete_user(app: Flask, user_id: int):
|
|
||||||
with app.app_context():
|
|
||||||
user = User.query.get(user_id)
|
|
||||||
user.delete()
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
|
|
||||||
class UsersNamespace(Namespace):
|
|
||||||
@socketio_login_required
|
|
||||||
def on_get(self, user_hashid: str) -> dict:
|
|
||||||
user_id = hashids.decode(user_hashid)
|
|
||||||
|
|
||||||
if not isinstance(user_id, int):
|
|
||||||
return {'status': 400, 'statusText': 'Bad Request'}
|
|
||||||
|
|
||||||
user = User.query.get(user_id)
|
|
||||||
|
|
||||||
if user is None:
|
|
||||||
return {'status': 404, 'statusText': 'Not found'}
|
|
||||||
|
|
||||||
if not (
|
|
||||||
user == current_user
|
|
||||||
or current_user.is_administrator
|
|
||||||
):
|
|
||||||
return {'status': 403, 'statusText': 'Forbidden'}
|
|
||||||
|
|
||||||
return {
|
|
||||||
'body': user.to_json_serializeable(
|
|
||||||
backrefs=True,
|
|
||||||
relationships=True
|
|
||||||
),
|
|
||||||
'status': 200,
|
|
||||||
'statusText': 'OK'
|
|
||||||
}
|
|
||||||
|
|
||||||
@socketio_login_required
|
|
||||||
def on_subscribe(self, user_hashid: str) -> dict:
|
|
||||||
user_id = hashids.decode(user_hashid)
|
|
||||||
|
|
||||||
if not isinstance(user_id, int):
|
|
||||||
return {'status': 400, 'statusText': 'Bad Request'}
|
|
||||||
|
|
||||||
user = User.query.get(user_id)
|
|
||||||
|
|
||||||
if user is None:
|
|
||||||
return {'status': 404, 'statusText': 'Not found'}
|
|
||||||
|
|
||||||
if not (
|
|
||||||
user == current_user
|
|
||||||
or current_user.is_administrator
|
|
||||||
):
|
|
||||||
return {'status': 403, 'statusText': 'Forbidden'}
|
|
||||||
|
|
||||||
join_room(f'/users/{user.hashid}')
|
|
||||||
|
|
||||||
return {'status': 200, 'statusText': 'OK'}
|
|
||||||
|
|
||||||
@socketio_login_required
|
|
||||||
def on_unsubscribe(self, user_hashid: str) -> dict:
|
|
||||||
user_id = hashids.decode(user_hashid)
|
|
||||||
|
|
||||||
if not isinstance(user_id, int):
|
|
||||||
return {'status': 400, 'statusText': 'Bad Request'}
|
|
||||||
|
|
||||||
user = User.query.get(user_id)
|
|
||||||
|
|
||||||
if user is None:
|
|
||||||
return {'status': 404, 'statusText': 'Not found'}
|
|
||||||
|
|
||||||
if not (
|
|
||||||
user == current_user
|
|
||||||
or current_user.is_administrator
|
|
||||||
):
|
|
||||||
return {'status': 403, 'statusText': 'Forbidden'}
|
|
||||||
|
|
||||||
leave_room(f'/users/{user.hashid}')
|
|
||||||
|
|
||||||
return {'status': 200, 'statusText': 'OK'}
|
|
||||||
|
|
||||||
@socketio_login_required
|
|
||||||
def on_delete(self, user_hashid: str) -> dict:
|
|
||||||
user_id = hashids.decode(user_hashid)
|
|
||||||
|
|
||||||
if not isinstance(user_id, int):
|
|
||||||
return {'status': 400, 'statusText': 'Bad Request'}
|
|
||||||
|
|
||||||
user = User.query.get(user_id)
|
|
||||||
|
|
||||||
if user is None:
|
|
||||||
return {'status': 404, 'statusText': 'Not found'}
|
|
||||||
|
|
||||||
if not (
|
|
||||||
user == current_user
|
|
||||||
or current_user.is_administrator
|
|
||||||
):
|
|
||||||
return {'status': 403, 'statusText': 'Forbidden'}
|
|
||||||
|
|
||||||
socketio.start_background_task(
|
|
||||||
_delete_user,
|
|
||||||
current_app._get_current_object(),
|
|
||||||
user.id
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'body': f'User "{user.username}" marked for deletion',
|
|
||||||
'status': 202,
|
|
||||||
'statusText': 'Accepted'
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user