Compare commits

..

No commits in common. "master" and "1.0.1" have entirely different histories.

492 changed files with 6989 additions and 1575968 deletions

View File

@ -5,9 +5,9 @@
!app
!migrations
!tests
!.flaskenv
!boot.sh
!config.py
!docker-nopaque-entrypoint.sh
!nopaque.py
!requirements.txt
!requirements.freezed.txt
!wsgi.py

View File

@ -1,20 +1,32 @@
##############################################################################
# Environment variables used by Docker Compose config files. #
# Variables for use in Docker Compose YAML files #
##############################################################################
# HINT: Use this bash command `id -u`
# NOTE: 0 (= root user) is not allowed
HOST_UID=
# HINT: Use this bash command `id -g`
# NOTE: 0 (= root group) is not allowed
HOST_GID=
# HINT: Use this bash command `getent group docker | cut -d: -f3`
HOST_DOCKER_GID=
# DEFAULT: nopaque
NOPAQUE_DOCKER_NETWORK_NAME=nopaque
# DOCKER_DEFAULT_NETWORK_NAME=
# DEFAULT: ./volumes/db/data
# NOTE: Use `.` as <project-basedir>
# DOCKER_DB_SERVICE_DATA_VOLUME_SOURCE_PATH=
# DEFAULT: ./volumes/mq/data
# NOTE: Use `.` as <project-basedir>
# DOCKER_MQ_SERVICE_DATA_VOLUME_SOURCE_PATH=
# NOTE: This must be a network share and it must be available on all
# Docker Swarm nodes, mounted to the same path.
HOST_NOPAQUE_DATA_PATH=/mnt/nopaque
# Docker Swarm nodes, mounted to the same path with the same
# user and group ownership.
DOCKER_NOPAQUE_SERVICE_DATA_VOLUME_SOURCE_PATH=
# DEFAULT: ./volumes/nopaque/logs
# NOTE: Use `.` as <project-basedir>
# DOCKER_NOPAQUE_SERVICE_LOGS_VOLUME_SOURCE_PATH=.

1
.flaskenv Normal file
View File

@ -0,0 +1 @@
FLASK_APP=nopaque.py

2
.gitignore vendored
View File

@ -2,6 +2,8 @@
app/static/gen/
volumes/
docker-compose.override.yml
logs/
!logs/dummy
*.env
*.pjentsch-testing

17
.vscode/settings.json vendored
View File

@ -1,17 +1,9 @@
{
"editor.rulers": [79],
"editor.tabSize": 4,
"emmet.includeLanguages": {
"jinja-html": "html"
},
"files.associations": {
".flaskenv": "env",
"*.env.tpl": "env",
"*.txt.j2": "jinja"
},
"files.insertFinalNewline": true,
"files.trimFinalNewlines": true,
"files.trimTrailingWhitespace": true,
"[css]": {
"editor.tabSize": 2
},
"[html]": {
"editor.tabSize": 2
},
@ -20,5 +12,8 @@
},
"[jinja-html]": {
"editor.tabSize": 2
},
"[scss]": {
"editor.tabSize": 2
}
}

View File

@ -4,13 +4,11 @@ FROM python:3.10.13-slim-bookworm
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
# Set environment variables
ENV LANG="C.UTF-8"
ENV PYTHONDONTWRITEBYTECODE="1"
ENV PYTHONUNBUFFERED="1"
# Install system dependencies
RUN apt-get update \
&& apt-get install --no-install-recommends --yes \
build-essential \
@ -19,39 +17,37 @@ RUN apt-get update \
&& rm --recursive /var/lib/apt/lists/*
# Create a non-root user
RUN useradd --create-home --no-log-init nopaque \
&& groupadd docker \
&& usermod --append --groups docker nopaque
USER nopaque
WORKDIR /home/nopaque
# Create a Python virtual environment
ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv"
RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}"
ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}"
# Install Python dependencies
COPY --chown=nopaque:nopaque requirements.freezed.txt requirements.freezed.txt
RUN python3 -m pip install --requirement requirements.freezed.txt \
&& rm requirements.freezed.txt
# Install the application
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
COPY --chown=nopaque:nopaque app app
COPY --chown=nopaque:nopaque migrations migrations
COPY --chown=nopaque:nopaque tests tests
COPY --chown=nopaque:nopaque boot.sh config.py wsgi.py ./
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./
EXPOSE 5000
RUN python3 -m pip install --requirement requirements.txt \
&& mkdir logs
USER root
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
EXPOSE 5000
ENTRYPOINT ["docker-nopaque-entrypoint.sh"]

View File

@ -35,7 +35,7 @@ username@hostname:~$ sudo mount --types cifs --options gid=${USER},password=nopa
# Clone the nopaque repository
username@hostname:~$ git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
# Create data directories
username@hostname:~$ mkdir -p volumes/{db,mq}
username@hostname:~$ mkdir data/{db,logs,mq}
username@hostname:~$ cp db.env.tpl db.env
username@hostname:~$ cp .env.tpl .env
# Fill out the variables within these files.

View File

@ -120,7 +120,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz'
@ -132,7 +131,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz'
@ -144,7 +142,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'English'
description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz'
@ -156,7 +153,6 @@
version: '3.4.1'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Spanish'
description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz'
@ -168,7 +164,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'French'
description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz'
@ -180,7 +175,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Italian'
description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner'
url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz'
@ -192,7 +186,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Polish'
description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz'
@ -204,7 +197,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Russian'
description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz'
@ -216,7 +208,6 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Chinese'
description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz'
@ -228,4 +219,3 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'

View File

@ -10,7 +10,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Amharic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
@ -23,7 +22,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Arabic'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
@ -36,7 +34,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Assamese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
@ -49,7 +46,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Azerbaijani'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
@ -62,7 +58,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Azerbaijani - Cyrillic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
@ -75,7 +70,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Belarusian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
@ -88,7 +82,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Bengali'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
@ -101,7 +94,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Tibetan'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
@ -114,7 +106,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Bosnian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
@ -127,7 +118,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Bulgarian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
@ -140,7 +130,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Catalan; Valencian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
@ -153,7 +142,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Cebuano'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
@ -166,7 +154,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Czech'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
@ -179,7 +166,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Chinese - Simplified'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
@ -192,7 +178,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Chinese - Traditional'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
@ -205,7 +190,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Cherokee'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
@ -218,7 +202,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Welsh'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
@ -231,7 +214,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Danish'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
@ -244,7 +226,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'German'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
@ -257,7 +238,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Dzongkha'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
@ -270,7 +250,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Greek, Modern (1453-)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
@ -283,7 +262,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'English'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
@ -296,7 +274,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'English, Middle (1100-1500)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
@ -309,7 +286,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Esperanto'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
@ -322,7 +298,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Estonian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
@ -335,7 +310,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Basque'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
@ -348,7 +322,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Persian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
@ -361,7 +334,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Finnish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
@ -374,7 +346,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'French'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
@ -387,7 +358,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'German Fraktur'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
@ -400,7 +370,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'French, Middle (ca. 1400-1600)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
@ -413,7 +382,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Irish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
@ -426,7 +394,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Galician'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
@ -439,7 +406,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Greek, Ancient (-1453)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
@ -452,7 +418,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Gujarati'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
@ -465,7 +430,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Haitian; Haitian Creole'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
@ -478,7 +442,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Hebrew'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
@ -491,7 +454,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Hindi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
@ -504,7 +466,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Croatian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
@ -517,7 +478,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Hungarian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
@ -530,7 +490,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Inuktitut'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
@ -543,7 +502,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Indonesian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
@ -556,7 +514,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Icelandic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
@ -569,7 +526,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Italian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
@ -582,7 +538,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'Italian - Old'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
@ -595,7 +550,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Javanese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
@ -608,7 +562,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Japanese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
@ -621,7 +574,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Kannada'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
@ -634,7 +586,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Georgian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
@ -647,7 +598,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Georgian - Old'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
@ -660,7 +610,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Kazakh'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
@ -673,7 +622,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Central Khmer'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
@ -686,7 +634,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Kirghiz; Kyrgyz'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
@ -699,7 +646,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Korean'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
@ -712,7 +658,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Kurdish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
@ -725,7 +670,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Lao'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
@ -738,7 +682,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Latin'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
@ -751,7 +694,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Latvian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
@ -764,7 +706,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Lithuanian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
@ -777,7 +718,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Malayalam'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
@ -790,7 +730,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Marathi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
@ -803,7 +742,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Macedonian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
@ -816,7 +754,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Maltese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
@ -829,7 +766,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Malay'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
@ -842,7 +778,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Burmese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
@ -855,7 +790,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Nepali'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
@ -868,7 +802,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Dutch; Flemish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
@ -881,7 +814,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Norwegian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
@ -894,7 +826,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Oriya'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
@ -907,7 +838,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Panjabi; Punjabi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
@ -920,7 +850,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Polish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
@ -933,7 +862,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Portuguese'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
@ -946,7 +874,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Pushto; Pashto'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
@ -959,7 +886,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Romanian; Moldavian; Moldovan'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
@ -972,7 +898,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Russian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
@ -985,7 +910,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Sanskrit'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
@ -998,7 +922,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Sinhala; Sinhalese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
@ -1011,7 +934,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Slovak'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
@ -1024,7 +946,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Slovenian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
@ -1037,7 +958,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
- title: 'Spanish; Castilian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
@ -1050,7 +970,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
- title: 'Spanish; Castilian - Old'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
@ -1063,7 +982,6 @@
- '0.1.0'
- '0.1.1'
- '0.1.2'
- '0.1.3b'
# - title: 'Albanian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
@ -1076,7 +994,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Serbian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
@ -1089,7 +1006,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Serbian - Latin'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
@ -1102,7 +1018,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Swahili'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
@ -1115,7 +1030,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Swedish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
@ -1128,7 +1042,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Syriac'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
@ -1141,7 +1054,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Tamil'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
@ -1154,7 +1066,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Telugu'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
@ -1167,7 +1078,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Tajik'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
@ -1180,7 +1090,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Tagalog'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
@ -1193,7 +1102,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Thai'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
@ -1206,7 +1114,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Tigrinya'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
@ -1219,7 +1126,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Turkish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
@ -1232,7 +1138,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Uighur; Uyghur'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
@ -1245,7 +1150,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Ukrainian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
@ -1258,7 +1162,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Urdu'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
@ -1271,7 +1174,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Uzbek'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
@ -1284,7 +1186,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Uzbek - Cyrillic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
@ -1297,7 +1198,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Vietnamese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
@ -1310,7 +1210,6 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'
# - title: 'Yiddish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
@ -1323,4 +1222,3 @@
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - '0.1.3b'

View File

@ -2,10 +2,9 @@ from apifairy import APIFairy
from config import Config
from docker import DockerClient
from flask import Flask
from flask.logging import default_handler
from flask_admin import Admin
from flask_apscheduler import APScheduler
from flask_assets import Environment
from flask_breadcrumbs import Breadcrumbs, default_breadcrumb_root
from flask_login import LoginManager
from flask_mail import Mail
from flask_marshmallow import Marshmallow
@ -14,143 +13,95 @@ from flask_paranoid import Paranoid
from flask_socketio import SocketIO
from flask_sqlalchemy import SQLAlchemy
from flask_hashids import Hashids
from logging import Formatter, StreamHandler
from werkzeug.middleware.proxy_fix import ProxyFix
from .extensions.nopaque_flask_admin_views import AdminIndexView, ModelView
docker_client = DockerClient.from_env()
admin = Admin()
apifairy = APIFairy()
assets = Environment()
breadcrumbs = Breadcrumbs()
db = SQLAlchemy()
docker_client = DockerClient()
hashids = Hashids()
login = LoginManager()
login.login_view = 'auth.login'
login.login_message = 'Please log in to access this page.'
ma = Marshmallow()
mail = Mail()
migrate = Migrate(compare_type=True)
paranoid = Paranoid()
paranoid.redirect_view = '/'
scheduler = APScheduler()
socketio = SocketIO()
def create_app(config: Config = Config) -> Flask:
''' Creates an initialized Flask object. '''
''' Creates an initialized Flask (WSGI Application) object. '''
app = Flask(__name__)
app.config.from_object(config)
# region Logging
log_formatter = Formatter(
fmt=app.config['NOPAQUE_LOG_FORMAT'],
datefmt=app.config['NOPAQUE_LOG_DATE_FORMAT']
)
log_handler = StreamHandler()
log_handler.setFormatter(log_formatter)
log_handler.setLevel(app.config['NOPAQUE_LOG_LEVEL'])
app.logger.setLevel('DEBUG')
app.logger.removeHandler(default_handler)
app.logger.addHandler(log_handler)
# endregion Logging
# region Middlewares
if app.config['NOPAQUE_PROXY_FIX_ENABLED']:
app.wsgi_app = ProxyFix(
app.wsgi_app,
x_for=app.config['NOPAQUE_PROXY_FIX_X_FOR'],
x_host=app.config['NOPAQUE_PROXY_FIX_X_HOST'],
x_port=app.config['NOPAQUE_PROXY_FIX_X_PORT'],
x_prefix=app.config['NOPAQUE_PROXY_FIX_X_PREFIX'],
x_proto=app.config['NOPAQUE_PROXY_FIX_X_PROTO']
)
# endregion Middlewares
# region Extensions
config.init_app(app)
docker_client.login(
app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
registry=app.config['NOPAQUE_DOCKER_REGISTRY']
)
from .models import AnonymousUser, User
admin.init_app(app, index_view=AdminIndexView())
apifairy.init_app(app)
assets.init_app(app)
breadcrumbs.init_app(app)
db.init_app(app)
hashids.init_app(app)
login.init_app(app)
login.anonymous_user = AnonymousUser
login.login_view = 'auth.login'
login.user_loader(lambda user_id: User.query.get(int(user_id)))
ma.init_app(app)
mail.init_app(app)
migrate.init_app(app, db)
paranoid.init_app(app)
paranoid.redirect_view = '/'
scheduler.init_app(app)
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI'])
# endregion Extensions
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
# region Blueprints
from .blueprints.api import bp as api_blueprint
from .admin import bp as admin_blueprint
default_breadcrumb_root(admin_blueprint, '.admin')
app.register_blueprint(admin_blueprint, url_prefix='/admin')
from .api import bp as api_blueprint
app.register_blueprint(api_blueprint, url_prefix='/api')
from .blueprints.auth import bp as auth_blueprint
from .auth import bp as auth_blueprint
default_breadcrumb_root(auth_blueprint, '.')
app.register_blueprint(auth_blueprint)
from .blueprints.contributions import bp as contributions_blueprint
from .contributions import bp as contributions_blueprint
default_breadcrumb_root(contributions_blueprint, '.contributions')
app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
from .blueprints.corpora import bp as corpora_blueprint
from .corpora import bp as corpora_blueprint
from .corpora.cqi_over_sio import CQiNamespace
default_breadcrumb_root(corpora_blueprint, '.corpora')
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
socketio.on_namespace(CQiNamespace('/cqi_over_sio'))
from .blueprints.errors import bp as errors_bp
from .errors import bp as errors_bp
app.register_blueprint(errors_bp)
from .blueprints.jobs import bp as jobs_blueprint
from .jobs import bp as jobs_blueprint
default_breadcrumb_root(jobs_blueprint, '.jobs')
app.register_blueprint(jobs_blueprint, url_prefix='/jobs')
from .blueprints.main import bp as main_blueprint
from .main import bp as main_blueprint
default_breadcrumb_root(main_blueprint, '.')
app.register_blueprint(main_blueprint, cli_group=None)
from .blueprints.services import bp as services_blueprint
from .services import bp as services_blueprint
default_breadcrumb_root(services_blueprint, '.services')
app.register_blueprint(services_blueprint, url_prefix='/services')
from .blueprints.settings import bp as settings_blueprint
from .settings import bp as settings_blueprint
default_breadcrumb_root(settings_blueprint, '.settings')
app.register_blueprint(settings_blueprint, url_prefix='/settings')
from .blueprints.users import bp as users_blueprint
from .users import bp as users_blueprint
default_breadcrumb_root(users_blueprint, '.users')
app.register_blueprint(users_blueprint, cli_group='user', url_prefix='/users')
from .blueprints.workshops import bp as workshops_blueprint
from .workshops import bp as workshops_blueprint
app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
from .models import _models
for model in _models:
admin.add_view(ModelView(model, db.session, category='Database'))
# endregion Blueprints
# region SocketIO Namespaces
from .namespaces.cqi_over_sio import CQiOverSocketIONamespace
socketio.on_namespace(CQiOverSocketIONamespace('/cqi_over_sio'))
# endregion SocketIO Namespaces
# region Database event Listeners
from .models.event_listeners import register_event_listeners
register_event_listeners()
# endregion Database event Listeners
# region Add scheduler jobs
if app.config['NOPAQUE_IS_PRIMARY_INSTANCE']:
from .jobs import handle_corpora
scheduler.add_job('handle_corpora', handle_corpora, seconds=3, trigger='interval')
from .jobs import handle_jobs
scheduler.add_job('handle_jobs', handle_jobs, seconds=3, trigger='interval')
# endregion Add scheduler jobs
return app

20
app/admin/__init__.py Normal file
View File

@ -0,0 +1,20 @@
from flask import Blueprint
from flask_login import login_required
from app.decorators import admin_required
bp = Blueprint('admin', __name__)
@bp.before_request
@login_required
@admin_required
def before_request():
'''
Ensures that the routes in this package can be visited only by users with
administrator privileges (login_required and admin_required).
'''
pass
from . import json_routes, routes

16
app/admin/forms.py Normal file
View File

@ -0,0 +1,16 @@
from flask_wtf import FlaskForm
from wtforms import SelectField, SubmitField
from app.models import Role
class UpdateUserForm(FlaskForm):
role = SelectField('Role')
submit = SubmitField()
def __init__(self, user, *args, **kwargs):
if 'data' not in kwargs:
kwargs['data'] = {'role': user.role.hashid}
if 'prefix' not in kwargs:
kwargs['prefix'] = 'update-user-form'
super().__init__(*args, **kwargs)
self.role.choices = [(x.hashid, x.name) for x in Role.query.all()]

23
app/admin/json_routes.py Normal file
View File

@ -0,0 +1,23 @@
from flask import abort, request
from app import db
from app.decorators import content_negotiation
from app.models import User
from . import bp
@bp.route('/users/<hashid:user_id>/confirmed', methods=['PUT'])
@content_negotiation(consumes='application/json', produces='application/json')
def update_user_role(user_id):
confirmed = request.json
if not isinstance(confirmed, bool):
abort(400)
user = User.query.get_or_404(user_id)
user.confirmed = confirmed
db.session.commit()
response_data = {
'message': (
f'User "{user.username}" is now '
f'{"confirmed" if confirmed else "unconfirmed"}'
)
}
return response_data, 200

146
app/admin/routes.py Normal file
View File

@ -0,0 +1,146 @@
from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from app import db, hashids
from app.models import Avatar, Corpus, Role, User
from app.users.settings.forms import (
UpdateAvatarForm,
UpdatePasswordForm,
UpdateNotificationsForm,
UpdateAccountInformationForm,
UpdateProfileInformationForm
)
from . import bp
from .forms import UpdateUserForm
from app.users.utils import (
user_endpoint_arguments_constructor as user_eac,
user_dynamic_list_constructor as user_dlc
)
@bp.route('')
@register_breadcrumb(bp, '.', '<i class="material-icons left">admin_panel_settings</i>Administration')
def admin():
return render_template(
'admin/admin.html.j2',
title='Administration'
)
@bp.route('/corpora')
@register_breadcrumb(bp, '.corpora', 'Corpora')
def corpora():
corpora = Corpus.query.all()
return render_template(
'admin/corpora.html.j2',
title='Corpora',
corpora=corpora
)
@bp.route('/users')
@register_breadcrumb(bp, '.users', '<i class="material-icons left">group</i>Users')
def users():
users = User.query.all()
return render_template(
'admin/users.html.j2',
title='Users',
users=users
)
@bp.route('/users/<hashid:user_id>')
@register_breadcrumb(bp, '.users.entity', '', dynamic_list_constructor=user_dlc)
def user(user_id):
user = User.query.get_or_404(user_id)
corpora = Corpus.query.filter(Corpus.user == user).all()
return render_template(
'admin/user.html.j2',
title=user.username,
user=user,
corpora=corpora
)
@bp.route('/users/<hashid:user_id>/settings', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.users.entity.settings', '<i class="material-icons left">settings</i>Settings')
def user_settings(user_id):
user = User.query.get_or_404(user_id)
update_account_information_form = UpdateAccountInformationForm(user)
update_profile_information_form = UpdateProfileInformationForm(user)
update_avatar_form = UpdateAvatarForm()
update_password_form = UpdatePasswordForm(user)
update_notifications_form = UpdateNotificationsForm(user)
update_user_form = UpdateUserForm(user)
# region handle update profile information form
if update_profile_information_form.submit.data and update_profile_information_form.validate():
user.about_me = update_profile_information_form.about_me.data
user.location = update_profile_information_form.location.data
user.organization = update_profile_information_form.organization.data
user.website = update_profile_information_form.website.data
user.full_name = update_profile_information_form.full_name.data
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update profile information form
# region handle update avatar form
if update_avatar_form.submit.data and update_avatar_form.validate():
try:
Avatar.create(
update_avatar_form.avatar.data,
user=user
)
except (AttributeError, OSError):
abort(500)
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update avatar form
# region handle update account information form
if update_account_information_form.submit.data and update_account_information_form.validate():
user.email = update_account_information_form.email.data
user.username = update_account_information_form.username.data
db.session.commit()
flash('Profile settings updated')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update account information form
# region handle update password form
if update_password_form.submit.data and update_password_form.validate():
user.password = update_password_form.new_password.data
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update password form
# region handle update notifications form
if update_notifications_form.submit.data and update_notifications_form.validate():
user.setting_job_status_mail_notification_level = \
update_notifications_form.job_status_mail_notification_level.data
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update notifications form
# region handle update user form
if update_user_form.submit.data and update_user_form.validate():
role_id = hashids.decode(update_user_form.role.data)
user.role = Role.query.get(role_id)
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update user form
return render_template(
'admin/user_settings.html.j2',
title='Settings',
update_account_information_form=update_account_information_form,
update_avatar_form=update_avatar_form,
update_notifications_form=update_notifications_form,
update_password_form=update_password_form,
update_profile_information_form=update_profile_information_form,
update_user_form=update_user_form,
user=user
)

View File

@ -5,8 +5,8 @@ from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError
from app import db, hashids
from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel
from .auth import auth_error_responses, token_auth
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema
from .auth import auth_error_responses, token_auth
bp = Blueprint('jobs', __name__)
@ -77,7 +77,7 @@ def delete_job(job_id):
job = Job.query.get(job_id)
if job is None:
abort(404)
if not (job.user == current_user or current_user.is_administrator):
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
try:
job.delete()
@ -97,6 +97,6 @@ def get_job(job_id):
job = Job.query.get(job_id)
if job is None:
abort(404)
if not (job.user == current_user or current_user.is_administrator):
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
return job

View File

@ -10,7 +10,7 @@ from app.models import (
User,
UserSettingJobStatusMailNotificationLevel
)
from app.blueprints.services import SERVICES
from app.services import SERVICES

View File

@ -3,11 +3,11 @@ from apifairy import authenticate, body, response
from apifairy.decorators import other_responses
from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError
from app.email import create_message, send
from app import db
from app.email import create_message, send
from app.models import User
from .auth import auth_error_responses, token_auth
from .schemas import EmptySchema, UserSchema
from .auth import auth_error_responses, token_auth
bp = Blueprint('users', __name__)
@ -60,7 +60,7 @@ def delete_user(user_id):
user = User.query.get(user_id)
if user is None:
abort(404)
if not (user == current_user or current_user.is_administrator):
if not (user == current_user or current_user.is_administrator()):
abort(403)
user.delete()
db.session.commit()
@ -78,7 +78,7 @@ def get_user(user_id):
user = User.query.get(user_id)
if user is None:
abort(404)
if not (user == current_user or current_user.is_administrator):
if not (user == current_user or current_user.is_administrator()):
abort(403)
return user
@ -94,6 +94,6 @@ def get_user_by_username(username):
user = User.query.filter(User.username == username).first()
if user is None:
abort(404)
if not (user == current_user or current_user.is_administrator):
if not (user == current_user or current_user.is_administrator()):
abort(403)
return user

View File

@ -1,7 +1,5 @@
from flask import Blueprint
bp = Blueprint('inputs', __name__)
bp = Blueprint('auth', __name__)
from . import routes

View File

@ -60,11 +60,7 @@ class RegistrationForm(FlaskForm):
def validate_username(self, field):
if User.query.filter_by(username=field.data).first():
raise ValidationError('Username already registered')
def validate_terms_of_use_accepted(self, field):
if not field.data:
raise ValidationError('Terms of Use not accepted')
raise ValidationError('Username already in use')
class LoginForm(FlaskForm):

View File

@ -1,4 +1,5 @@
from flask import abort, flash, redirect, render_template, request, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user, login_user, login_required, logout_user
from app import db
from app.email import create_message, send
@ -12,7 +13,24 @@ from .forms import (
)
@bp.before_app_request
def before_request():
"""
Checks if a user is unconfirmed when visiting specific sites. Redirects to
unconfirmed view if user is unconfirmed.
"""
if current_user.is_authenticated:
current_user.ping()
db.session.commit()
if (not current_user.confirmed
and request.endpoint
and request.blueprint != 'auth'
and request.endpoint != 'static'):
return redirect(url_for('auth.unconfirmed'))
@bp.route('/register', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.register', 'Register')
def register():
if current_user.is_authenticated:
return redirect(url_for('main.dashboard'))
@ -49,6 +67,7 @@ def register():
@bp.route('/login', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.login', 'Login')
def login():
if current_user.is_authenticated:
return redirect(url_for('main.dashboard'))
@ -79,6 +98,7 @@ def logout():
@bp.route('/unconfirmed')
@register_breadcrumb(bp, '.unconfirmed', 'Unconfirmed')
@login_required
def unconfirmed():
if current_user.confirmed:
@ -121,6 +141,7 @@ def confirm(token):
@bp.route('/reset-password-request', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.reset_password_request', 'Password Reset')
def reset_password_request():
if current_user.is_authenticated:
return redirect(url_for('main.dashboard'))
@ -150,6 +171,7 @@ def reset_password_request():
@bp.route('/reset-password/<token>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.reset_password', 'Password Reset')
def reset_password(token):
if current_user.is_authenticated:
return redirect(url_for('main.dashboard'))

View File

@ -1,29 +0,0 @@
from flask import Blueprint, redirect, request, url_for
from flask_login import current_user
from app import db
bp = Blueprint('auth', __name__)
@bp.before_app_request
def before_request():
if not current_user.is_authenticated:
return
current_user.ping()
db.session.commit()
if (
not current_user.confirmed
and request.endpoint
and request.blueprint != 'auth'
and request.endpoint != 'static'
):
return redirect(url_for('auth.unconfirmed'))
if not current_user.terms_of_use_accepted:
return redirect(url_for('main.terms_of_use'))
from . import routes

View File

@ -1,25 +0,0 @@
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('contributions', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes
from .spacy_nlp_pipeline_models import bp as spacy_nlp_pipeline_models_bp
bp.register_blueprint(spacy_nlp_pipeline_models_bp, url_prefix='/spacy-nlp-pipeline-models')
from .tesseract_ocr_pipeline_models import bp as tesseract_ocr_pipeline_models_bp
bp.register_blueprint(tesseract_ocr_pipeline_models_bp, url_prefix='/tesseract-ocr-pipeline-models')

View File

@ -1,7 +0,0 @@
from flask import render_template
from . import bp
@bp.route('')
def index():
return render_template('contributions/index.html.j2', title='Contributions')

View File

@ -1,299 +0,0 @@
from datetime import datetime
from flask import (
abort,
current_app,
flash,
Flask,
jsonify,
redirect,
request,
render_template,
url_for
)
from flask_login import current_user
from string import punctuation
from threading import Thread
import nltk
from app import db
from app.models import (
Corpus,
CorpusFollowerAssociation,
CorpusFollowerRole,
User
)
from . import bp
from .decorators import corpus_follower_permission_required
from .forms import CreateCorpusForm
def _delete_corpus(app: Flask, corpus_id: int):
with app.app_context():
corpus: Corpus = Corpus.query.get(corpus_id)
corpus.delete()
db.session.commit()
def _build_corpus(app: Flask, corpus_id: int):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.build()
db.session.commit()
@bp.route('')
def corpora():
return redirect(url_for('main.dashboard', _anchor='corpora'))
@bp.route('/create', methods=['GET', 'POST'])
def create_corpus():
form = CreateCorpusForm()
if form.validate_on_submit():
try:
corpus = Corpus.create(
title=form.title.data,
description=form.description.data,
user=current_user
)
except OSError:
abort(500)
db.session.commit()
flash(f'Corpus "{corpus.title}" created', 'corpus')
return redirect(corpus.url)
return render_template(
'corpora/create.html.j2',
title='Create corpus',
form=form
)
@bp.route('/<hashid:corpus_id>')
def corpus(corpus_id: int):
corpus = Corpus.query.get_or_404(corpus_id)
cfa = CorpusFollowerAssociation.query.filter_by(
corpus_id=corpus_id,
follower_id=current_user.id
).first()
if cfa is None:
if corpus.user == current_user or current_user.is_administrator:
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
else:
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
else:
cfr = cfa.role
cfrs = CorpusFollowerRole.query.all()
# TODO: Better solution for filtering admin
users = User.query.filter(
User.is_public == True,
User.id != current_user.id,
User.id != corpus.user.id,
User.role_id < 4
).all()
if (
corpus.user == current_user
or current_user.is_administrator
):
return render_template(
'corpora/corpus.html.j2',
title=corpus.title,
corpus=corpus,
cfr=cfr,
cfrs=cfrs,
users=users
)
if (
current_user.is_following_corpus(corpus)
or corpus.is_public
):
cfas = CorpusFollowerAssociation.query.filter(
Corpus.id == corpus_id,
CorpusFollowerAssociation.follower_id != corpus.user.id
).all()
return render_template(
'corpora/public_corpus.html.j2',
title=corpus.title,
corpus=corpus,
cfrs=cfrs,
cfr=cfr,
cfas=cfas,
users=users
)
abort(403)
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
def delete_corpus(corpus_id: int):
corpus = Corpus.query.get_or_404(corpus_id)
if not (
corpus.user == current_user
or current_user.is_administrator
):
abort(403)
thread = Thread(
target=_delete_corpus,
args=(current_app._get_current_object(), corpus.id)
)
thread.start()
return jsonify(f'Corpus "{corpus.title}" marked for deletion.'), 202
@bp.route('/<hashid:corpus_id>/build', methods=['POST'])
def build_corpus(corpus_id: int):
corpus = Corpus.query.get_or_404(corpus_id)
cfa = CorpusFollowerAssociation.query.filter_by(
corpus_id=corpus_id,
follower_id=current_user.id
).first()
if not (
cfa is not None and cfa.role.has_permission('MANAGE_FILES')
or corpus.user == current_user
or current_user.is_administrator
):
abort(403)
if len(corpus.files.all()) == 0:
abort(409)
thread = Thread(
target=_build_corpus,
args=(current_app._get_current_object(), corpus.id)
)
thread.start()
return jsonify(f'Corpus "{corpus.title}" marked for building.'), 202
@bp.route('/<hashid:corpus_id>/create-share-link', methods=['POST'])
def create_share_link(corpus_id: int):
data = request.json
expiration_date = data['expiration_date']
if not isinstance(expiration_date, str):
abort(400)
role_name = data['role_name']
if not isinstance(role_name, str):
abort(400)
corpus = Corpus.query.get_or_404(corpus_id)
cfa = CorpusFollowerAssociation.query.filter_by(
corpus_id=corpus_id,
follower_id=current_user.id
).first()
if not (
cfa is not None and cfa.role.has_permission('MANAGE_FOLLOWERS')
or corpus.user == current_user
or current_user.is_administrator
):
abort(403)
_expiration_date = datetime.strptime(expiration_date, '%b %d, %Y')
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
if cfr is None:
abort(400)
token = current_user.generate_follow_corpus_token(
corpus.hashid,
role_name,
_expiration_date
)
corpus_share_link = url_for(
'corpora.follow_corpus',
corpus_id=corpus_id,
token=token,
_external=True
)
return jsonify(corpus_share_link)
@bp.route('/<hashid:corpus_id>/analysis')
@corpus_follower_permission_required('VIEW')
def analysis(corpus_id: int):
corpus = Corpus.query.get_or_404(corpus_id)
return render_template(
'corpora/analysis.html.j2',
corpus=corpus,
title=f'Analyse Corpus {corpus.title}'
)
@bp.route('/<hashid:corpus_id>/analysis/stopwords')
def get_stopwords(corpus_id: int):
languages = [
'german',
'english',
'catalan',
'greek',
'spanish',
'french',
'italian',
'russian',
'chinese'
]
nltk.download('stopwords', quiet=True)
stopwords = {
language: nltk.corpus.stopwords.words(language)
for language in languages
}
stopwords['punctuation'] = list(punctuation)
stopwords['punctuation'] += ['', '|', '', '', '', '--']
stopwords['user_stopwords'] = []
return jsonify(stopwords)
@bp.route('/<hashid:corpus_id>/follow/<token>')
def follow_corpus(corpus_id: int, token: str):
corpus = Corpus.query.get_or_404(corpus_id)
if not current_user.follow_corpus_by_token(token):
abort(403)
db.session.commit()
flash(f'You are following "{corpus.title}" now', category='corpus')
return redirect(corpus.url)
@bp.route('/<hashid:corpus_id>/is-public', methods=['PUT'])
def update_is_public(corpus_id):
new_value = request.json
if not isinstance(new_value, bool):
abort(400)
corpus = Corpus.query.get_or_404(corpus_id)
if not (
corpus.user == current_user
or current_user.is_administrator
):
abort(403)
corpus.is_public = new_value
db.session.commit()
return jsonify(f'Corpus "{corpus.title}" is now {"public" if new_value else "private"}'), 200

View File

@ -1,13 +0,0 @@
from flask import Blueprint
bp = Blueprint('jobs', __name__)
from . import routes
from .inputs import bp as inputs_bp
bp.register_blueprint(inputs_bp, url_prefix='/<hashid:job_id>/inputs')
from .results import bp as results_bp
bp.register_blueprint(results_bp, url_prefix='/<hashid:job_id>/results')

View File

@ -1,27 +0,0 @@
from flask import abort, send_from_directory
from flask_login import current_user, login_required
from app.models import JobInput
from . import bp
@bp.route('/<hashid:job_input_id>/download')
@login_required
def download_job_input(job_id: int, job_input_id: int):
job_input = JobInput.query.filter_by(
job_id=job_id,
id=job_input_id
).first_or_404()
if not (
job_input.job.user == current_user
or current_user.is_administrator
):
abort(403)
return send_from_directory(
job_input.path.parent,
job_input.path.name,
as_attachment=True,
download_name=job_input.filename,
mimetype=job_input.mimetype
)

View File

@ -1,7 +0,0 @@
from flask import Blueprint
bp = Blueprint('results', __name__)
from . import routes

View File

@ -1,27 +0,0 @@
from flask import abort, send_from_directory
from flask_login import current_user, login_required
from app.models import JobResult
from . import bp
@bp.route('/<hashid:job_result_id>/download')
@login_required
def download_job_result(job_id: int, job_result_id: int):
job_result = JobResult.query.filter_by(
job_id=job_id,
id=job_result_id
).first_or_404()
if not (
job_result.job.user == current_user
or current_user.is_administrator
):
abort(403)
return send_from_directory(
job_result.path.parent,
job_result.path.name,
as_attachment=True,
download_name=job_result.filename,
mimetype=job_result.mimetype
)

View File

@ -1,111 +0,0 @@
from flask import (
abort,
current_app,
Flask,
jsonify,
redirect,
render_template,
url_for
)
from flask_login import current_user, login_required
from threading import Thread
from app import db
from app.decorators import admin_required
from app.models import Job, JobStatus
from . import bp
@bp.route('')
@login_required
def index():
return redirect(url_for('main.dashboard', _anchor='jobs'))
@bp.route('/<hashid:job_id>')
@login_required
def job(job_id: int):
job = Job.query.get_or_404(job_id)
if not (
job.user == current_user
or current_user.is_administrator
):
abort(403)
return render_template(
'jobs/job.html.j2',
title='Job',
job=job
)
def _delete_job(app: Flask, job_id: int):
with app.app_context():
job = Job.query.get(job_id)
job.delete()
db.session.commit()
@bp.route('/<hashid:job_id>', methods=['DELETE'])
@login_required
def delete_job(job_id: int):
job = Job.query.get_or_404(job_id)
if not (
job.user == current_user
or current_user.is_administrator
):
abort(403)
thread = Thread(
target=_delete_job,
args=(current_app._get_current_object(), job.id)
)
thread.start()
return jsonify(f'Job "{job.title}" marked for deletion.'), 202
@bp.route('/<hashid:job_id>/log')
@admin_required
def job_log(job_id: int):
job = Job.query.get_or_404(job_id)
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
abort(409)
log_file_path = job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt'
with log_file_path.open() as log_file:
log = log_file.read()
return jsonify(log)
def _restart_job(app: Flask, job_id: int):
with app.app_context():
job = Job.query.get(job_id)
job.restart()
db.session.commit()
@bp.route('/<hashid:job_id>/restart', methods=['POST'])
@login_required
def restart_job(job_id: int):
job = Job.query.get_or_404(job_id)
if not (
job.user == current_user
or current_user.is_administrator
):
abort(403)
if job.status != JobStatus.FAILED:
abort(409)
thread = Thread(
target=_restart_job,
args=(current_app._get_current_object(), job.id)
)
thread.start()
return jsonify(f'Job "{job.title}" marked for restarting.'), 202

View File

@ -1,7 +0,0 @@
from flask import Blueprint
bp = Blueprint('settings', __name__)
from . import routes

View File

@ -1,158 +0,0 @@
from flask import (
abort,
flash,
jsonify,
redirect,
render_template,
request,
url_for
)
from flask_login import current_user, login_required
from app import db
from app.models import Avatar
from . import bp
from .forms import (
UpdateAvatarForm,
UpdatePasswordForm,
UpdateNotificationsForm,
UpdateAccountInformationForm,
UpdateProfileInformationForm
)
@bp.route('', methods=['GET', 'POST'])
@login_required
def index():
update_account_information_form = UpdateAccountInformationForm(current_user)
update_profile_information_form = UpdateProfileInformationForm(current_user)
update_avatar_form = UpdateAvatarForm()
update_password_form = UpdatePasswordForm(current_user)
update_notifications_form = UpdateNotificationsForm(current_user)
# region handle update profile information form
if update_profile_information_form.submit.data and update_profile_information_form.validate():
current_user.about_me = update_profile_information_form.about_me.data
current_user.location = update_profile_information_form.location.data
current_user.organization = update_profile_information_form.organization.data
current_user.website = update_profile_information_form.website.data
current_user.full_name = update_profile_information_form.full_name.data
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.index'))
# endregion handle update profile information form
# region handle update avatar form
if update_avatar_form.submit.data and update_avatar_form.validate():
try:
Avatar.create(
update_avatar_form.avatar.data,
user=current_user
)
except (AttributeError, OSError):
abort(500)
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.index'))
# endregion handle update avatar form
# region handle update account information form
if update_account_information_form.submit.data and update_account_information_form.validate():
current_user.email = update_account_information_form.email.data
current_user.username = update_account_information_form.username.data
db.session.commit()
flash('Profile settings updated')
return redirect(url_for('.index'))
# endregion handle update account information form
# region handle update password form
if update_password_form.submit.data and update_password_form.validate():
current_user.password = update_password_form.new_password.data
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.index'))
# endregion handle update password form
# region handle update notifications form
if update_notifications_form.submit.data and update_notifications_form.validate():
current_user.setting_job_status_mail_notification_level = \
update_notifications_form.job_status_mail_notification_level.data
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.index'))
# endregion handle update notifications form
return render_template(
'settings/index.html.j2',
title='Settings',
update_account_information_form=update_account_information_form,
update_avatar_form=update_avatar_form,
update_notifications_form=update_notifications_form,
update_password_form=update_password_form,
update_profile_information_form=update_profile_information_form,
user=current_user
)
@bp.route('/profile-is-public', methods=['PUT'])
@login_required
def update_profile_is_public():
new_value = request.json
if not isinstance(new_value, bool):
abort(400)
current_user.is_public = new_value
db.session.commit()
return jsonify('Your changes have been saved'), 200
@bp.route('/profile-show-email', methods=['PUT'])
@login_required
def update_profile_show_email():
new_value = request.json
if not isinstance(new_value, bool):
abort(400)
if new_value:
current_user.add_profile_privacy_setting('SHOW_EMAIL')
else:
current_user.remove_profile_privacy_setting('SHOW_EMAIL')
db.session.commit()
return jsonify('Your changes have been saved'), 200
@bp.route('/profile-show-last-seen', methods=['PUT'])
@login_required
def update_profile_show_last_seen():
new_value = request.json
if not isinstance(new_value, bool):
abort(400)
if new_value:
current_user.add_profile_privacy_setting('SHOW_LAST_SEEN')
else:
current_user.remove_profile_privacy_setting('SHOW_LAST_SEEN')
db.session.commit()
return jsonify('Your changes have been saved'), 200
@bp.route('/profile-show-member-since', methods=['PUT'])
@login_required
def update_profile_show_member_since():
new_value = request.json
if not isinstance(new_value, bool):
abort(400)
if new_value:
current_user.add_profile_privacy_setting('SHOW_MEMBER_SINCE')
else:
current_user.remove_profile_privacy_setting('SHOW_MEMBER_SINCE')
db.session.commit()
return jsonify('Your changes have been saved'), 200

View File

@ -1,7 +0,0 @@
from flask import Blueprint
bp = Blueprint('users', __name__)
from . import cli, events, routes

View File

@ -1,58 +0,0 @@
from flask_login import current_user
from flask_socketio import join_room, leave_room
from app import hashids, socketio
from app.decorators import socketio_login_required
from app.models import User
@socketio.on('SUBSCRIBE User')
@socketio_login_required
def subscribe(user_hashid: str) -> dict:
if not isinstance(user_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
user_id = hashids.decode(user_hashid)
if not isinstance(user_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
user = User.query.get(user_id)
if user is None:
return {'status': 404, 'statusText': 'Not Found'}
if not (
user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
join_room(f'/users/{user.hashid}')
return {'status': 200, 'statusText': 'OK'}
@socketio.on('UNSUBSCRIBE User')
@socketio_login_required
def unsubscribe(user_hashid: str) -> dict:
if not isinstance(user_hashid, str):
return {'status': 400, 'statusText': 'Bad Request'}
user_id = hashids.decode(user_hashid)
if not isinstance(user_id, int):
return {'status': 400, 'statusText': 'Bad Request'}
user = User.query.get(user_id)
if user is None:
return {'status': 404, 'statusText': 'Not Found'}
if not (
user == current_user
or current_user.is_administrator
):
return {'status': 403, 'statusText': 'Forbidden'}
leave_room(f'/users/{user.hashid}')
return {'status': 200, 'statusText': 'OK'}

View File

@ -1,150 +0,0 @@
from flask import (
abort,
current_app,
Flask,
jsonify,
redirect,
render_template,
request,
send_from_directory,
url_for
)
from flask_login import current_user, login_required, logout_user
from threading import Thread
from app import db
from app.models import Avatar, User
from . import bp
@bp.route('')
@login_required
def index():
return redirect(url_for('main.social_area', _anchor='users'))
@bp.route('/<hashid:user_id>')
@login_required
def user(user_id: int):
user = User.query.get_or_404(user_id)
if not (
user.is_public
or user == current_user
or current_user.is_administrator
):
abort(403)
accept_json = request.accept_mimetypes.accept_json
accept_html = request.accept_mimetypes.accept_html
if accept_json and not accept_html:
return user.to_json_serializeable(
backrefs=True,
relationships=True
)
return render_template(
'users/user.html.j2',
title=user.username,
user=user
)
def _delete_user(app: Flask, user_id: int):
with app.app_context():
user = User.query.get(user_id)
user.delete()
db.session.commit()
@bp.route('/<hashid:user_id>', methods=['DELETE'])
@login_required
def delete_user(user_id: int):
user = User.query.get_or_404(user_id)
if not (
user == current_user
or current_user.is_administrator
):
abort(403)
if user == current_user:
logout_user()
thread = Thread(
target=_delete_user,
args=(current_app._get_current_object(), user.id)
)
thread.start()
return jsonify(f'User "{user.username}" marked for deletion'), 202
@bp.route('/<hashid:user_id>/avatar')
@login_required
def user_avatar(user_id: int):
user = User.query.get_or_404(user_id)
if not (
user.is_public
or user == current_user
or current_user.is_administrator
):
abort(403)
if user.avatar is None:
return redirect(url_for('static', filename='images/user_avatar.png'))
return send_from_directory(
user.avatar.path.parent,
user.avatar.path.name,
as_attachment=True,
download_name=user.avatar.filename,
mimetype=user.avatar.mimetype
)
def _delete_avatar(app: Flask, avatar_id: int):
with app.app_context():
avatar = Avatar.query.get(avatar_id)
avatar.delete()
db.session.commit()
@bp.route('/<hashid:user_id>/avatar', methods=['DELETE'])
@login_required
def delete_user_avatar(user_id: int):
user = User.query.get_or_404(user_id)
if user.avatar is None:
abort(409)
if not (
user == current_user
or current_user.is_administrator
):
abort(403)
thread = Thread(
target=_delete_avatar,
args=(current_app._get_current_object(), user.avatar.id)
)
thread.start()
return jsonify('Avatar marked for deletion'), 202
# TODO: Move this to main blueprint(?)
@bp.route('/accept-terms-of-use', methods=['POST'])
@login_required
def accept_terms_of_use():
if not (
current_user.is_authenticated
or current_user.confirmed
):
abort(403)
current_user.terms_of_use_accepted = True
db.session.commit()
return jsonify('You accepted the terms of use'), 202

View File

@ -0,0 +1,23 @@
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('contributions', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import (
routes,
spacy_nlp_pipeline_models,
tesseract_ocr_pipeline_models,
transkribus_htr_pipeline_models
)

View File

@ -0,0 +1,9 @@
from flask import redirect, url_for
from flask_breadcrumbs import register_breadcrumb
from . import bp
@bp.route('')
@register_breadcrumb(bp, '.', '<i class="material-icons left">new_label</i>My Contributions')
def contributions():
return redirect(url_for('main.dashboard', _anchor='contributions'))

View File

@ -1,7 +1,7 @@
from flask_wtf.file import FileField, FileRequired
from wtforms import StringField, ValidationError
from wtforms.validators import InputRequired, Length
from app.blueprints.services import SERVICES
from app.services import SERVICES
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
@ -16,8 +16,8 @@ class CreateSpaCyNLPPipelineModelForm(ContributionBaseForm):
)
def validate_spacy_model_file(self, field):
if not field.data.filename.lower().endswith(('.tar.gz', ('.whl'))):
raise ValidationError('.tar.gz or .whl files only!')
if not field.data.filename.lower().endswith('.tar.gz'):
raise ValidationError('.tar.gz files only!')
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:

View File

@ -1,14 +1,13 @@
from flask import abort, current_app, request
from flask_login import current_user, login_required
from flask_login import current_user
from threading import Thread
from app import db
from app.decorators import content_negotiation, permission_required
from app.models import SpaCyNLPPipelineModel
from . import bp
from .. import bp
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
@login_required
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
@content_negotiation(produces='application/json')
def delete_spacy_model(spacy_nlp_pipeline_model_id):
def _delete_spacy_model(app, spacy_nlp_pipeline_model_id):
@ -18,7 +17,7 @@ def delete_spacy_model(spacy_nlp_pipeline_model_id):
db.session.commit()
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator):
if not (snpm.user == current_user or current_user.is_administrator()):
abort(403)
thread = Thread(
target=_delete_spacy_model,
@ -32,7 +31,7 @@ def delete_spacy_model(spacy_nlp_pipeline_model_id):
return response_data, 202
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>/is_public', methods=['PUT'])
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>/is_public', methods=['PUT'])
@permission_required('CONTRIBUTE')
@content_negotiation(consumes='application/json', produces='application/json')
def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
@ -40,7 +39,7 @@ def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
if not isinstance(is_public, bool):
abort(400)
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator):
if not (snpm.user == current_user or current_user.is_administrator()):
abort(403)
snpm.is_public = is_public
db.session.commit()

View File

@ -1,5 +1,6 @@
from flask import abort, flash, redirect, render_template, url_for
from flask_login import current_user, login_required
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
from app import db
from app.models import SpaCyNLPPipelineModel
from . import bp
@ -7,17 +8,23 @@ from .forms import (
CreateSpaCyNLPPipelineModelForm,
UpdateSpaCyNLPPipelineModelForm
)
from .utils import (
spacy_nlp_pipeline_model_dlc as spacy_nlp_pipeline_model_dlc
)
@bp.route('/')
@login_required
def index():
return redirect(url_for('contributions.index', _anchor='spacy-nlp-pipeline-models'))
@bp.route('/spacy-nlp-pipeline-models')
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models', 'SpaCy NLP Pipeline Models')
def spacy_nlp_pipeline_models():
return render_template(
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_models.html.j2',
title='SpaCy NLP Pipeline Models'
)
@bp.route('/create', methods=['GET', 'POST'])
@login_required
def create():
@bp.route('/spacy-nlp-pipeline-models/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.create', 'Create')
def create_spacy_nlp_pipeline_model():
form = CreateSpaCyNLPPipelineModelForm()
if form.is_submitted():
if not form.validate():
@ -41,7 +48,7 @@ def create():
abort(500)
db.session.commit()
flash(f'SpaCy NLP Pipeline model "{snpm.title}" created')
return {}, 201, {'Location': url_for('.index')}
return {}, 201, {'Location': url_for('.spacy_nlp_pipeline_models')}
return render_template(
'contributions/spacy_nlp_pipeline_models/create.html.j2',
title='Create SpaCy NLP Pipeline Model',
@ -49,11 +56,11 @@ def create():
)
@bp.route('/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
@login_required
def entity(spacy_nlp_pipeline_model_id):
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.entity', '', dynamic_list_constructor=spacy_nlp_pipeline_model_dlc)
def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id):
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator):
if not (snpm.user == current_user or current_user.is_administrator()):
abort(403)
form = UpdateSpaCyNLPPipelineModelForm(data=snpm.to_json_serializeable())
if form.validate_on_submit():
@ -61,9 +68,9 @@ def entity(spacy_nlp_pipeline_model_id):
if db.session.is_modified(snpm):
flash(f'SpaCy NLP Pipeline model "{snpm.title}" updated')
db.session.commit()
return redirect(url_for('.index'))
return redirect(url_for('.spacy_nlp_pipeline_models'))
return render_template(
'contributions/spacy_nlp_pipeline_models/entity.html.j2',
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_model.html.j2',
title=f'{snpm.title} {snpm.version}',
form=form,
spacy_nlp_pipeline_model=snpm

View File

@ -0,0 +1,13 @@
from flask import request, url_for
from app.models import SpaCyNLPPipelineModel
def spacy_nlp_pipeline_model_dlc():
snpm_id = request.view_args['spacy_nlp_pipeline_model_id']
snpm = SpaCyNLPPipelineModel.query.get_or_404(snpm_id)
return [
{
'text': f'{snpm.title} {snpm.version}',
'url': url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=snpm_id)
}
]

View File

@ -0,0 +1,2 @@
from .. import bp
from . import json_routes, routes

View File

@ -1,6 +1,6 @@
from flask_wtf.file import FileField, FileRequired
from wtforms import ValidationError
from app.blueprints.services import SERVICES
from app.services import SERVICES
from ..forms import ContributionBaseForm, UpdateContributionBaseForm

View File

@ -7,7 +7,7 @@ from app.models import TesseractOCRPipelineModel
from . import bp
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
@content_negotiation(produces='application/json')
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
def _delete_tesseract_ocr_pipeline_model(app, tesseract_ocr_pipeline_model_id):
@ -17,7 +17,7 @@ def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
db.session.commit()
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (topm.user == current_user or current_user.is_administrator):
if not (topm.user == current_user or current_user.is_administrator()):
abort(403)
thread = Thread(
target=_delete_tesseract_ocr_pipeline_model,
@ -31,7 +31,7 @@ def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
return response_data, 202
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>/is_public', methods=['PUT'])
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>/is_public', methods=['PUT'])
@permission_required('CONTRIBUTE')
@content_negotiation(consumes='application/json', produces='application/json')
def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_id):
@ -39,7 +39,7 @@ def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_i
if not isinstance(is_public, bool):
abort(400)
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (topm.user == current_user or current_user.is_administrator):
if not (topm.user == current_user or current_user.is_administrator()):
abort(403)
topm.is_public = is_public
db.session.commit()

View File

@ -1,4 +1,5 @@
from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
from app import db
from app.models import TesseractOCRPipelineModel
@ -7,15 +8,23 @@ from .forms import (
CreateTesseractOCRPipelineModelForm,
UpdateTesseractOCRPipelineModelForm
)
from .utils import (
tesseract_ocr_pipeline_model_dlc as tesseract_ocr_pipeline_model_dlc
)
@bp.route('/')
def index():
return redirect(url_for('contributions.index', _anchor='tesseract-ocr-pipeline-models'))
@bp.route('/tesseract-ocr-pipeline-models')
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models', 'Tesseract OCR Pipeline Models')
def tesseract_ocr_pipeline_models():
return render_template(
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_models.html.j2',
title='Tesseract OCR Pipeline Models'
)
@bp.route('/create', methods=['GET', 'POST'])
def create():
@bp.route('/tesseract-ocr-pipeline-models/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.create', 'Create')
def create_tesseract_ocr_pipeline_model():
form = CreateTesseractOCRPipelineModelForm()
if form.is_submitted():
if not form.validate():
@ -38,7 +47,7 @@ def create():
abort(500)
db.session.commit()
flash(f'Tesseract OCR Pipeline model "{topm.title}" created')
return {}, 201, {'Location': url_for('.index')}
return {}, 201, {'Location': url_for('.tesseract_ocr_pipeline_models')}
return render_template(
'contributions/tesseract_ocr_pipeline_models/create.html.j2',
title='Create Tesseract OCR Pipeline Model',
@ -46,10 +55,11 @@ def create():
)
@bp.route('/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
def entity(tesseract_ocr_pipeline_model_id):
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.entity', '', dynamic_list_constructor=tesseract_ocr_pipeline_model_dlc)
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (topm.user == current_user or current_user.is_administrator):
if not (topm.user == current_user or current_user.is_administrator()):
abort(403)
form = UpdateTesseractOCRPipelineModelForm(data=topm.to_json_serializeable())
if form.validate_on_submit():
@ -57,9 +67,9 @@ def entity(tesseract_ocr_pipeline_model_id):
if db.session.is_modified(topm):
flash(f'Tesseract OCR Pipeline model "{topm.title}" updated')
db.session.commit()
return redirect(url_for('.index'))
return redirect(url_for('.tesseract_ocr_pipeline_models'))
return render_template(
'contributions/tesseract_ocr_pipeline_models/entity.html.j2',
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_model.html.j2',
title=f'{topm.title} {topm.version}',
form=form,
tesseract_ocr_pipeline_model=topm

View File

@ -0,0 +1,13 @@
from flask import request, url_for
from app.models import TesseractOCRPipelineModel
def tesseract_ocr_pipeline_model_dlc():
topm_id = request.view_args['tesseract_ocr_pipeline_model_id']
topm = TesseractOCRPipelineModel.query.get_or_404(topm_id)
return [
{
'text': f'{topm.title} {topm.version}',
'url': url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=topm_id)
}
]

View File

@ -0,0 +1,2 @@
from .. import bp
from . import routes

View File

@ -0,0 +1,7 @@
from flask import abort
from . import bp
@bp.route('/transkribus_htr_pipeline_models')
def transkribus_htr_pipeline_models():
return abort(503)

View File

@ -1,34 +1,33 @@
from datetime import datetime
from flask import current_app
from pathlib import Path
import json
import shutil
from app import db
from app.models import User, Corpus, CorpusFile
from datetime import datetime
import json
import os
import shutil
class SandpaperConverter:
def __init__(self, json_db_file: Path, data_dir: Path):
def __init__(self, json_db_file, data_dir):
self.json_db_file = json_db_file
self.data_dir = data_dir
def run(self):
with self.json_db_file.open('r') as f:
json_db: list[dict] = json.load(f)
with open(self.json_db_file, 'r') as f:
json_db = json.loads(f.read())
for json_user in json_db:
if not json_user['confirmed']:
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
continue
user_dir = self.data_dir / f'{json_user["id"]}'
user_dir = os.path.join(self.data_dir, str(json_user['id']))
self.convert_user(json_user, user_dir)
db.session.commit()
def convert_user(self, json_user: dict, user_dir: Path):
def convert_user(self, json_user, user_dir):
current_app.logger.info(f'Create User {json_user["username"]}...')
try:
user = User.create(
user = User(
confirmed=json_user['confirmed'],
email=json_user['email'],
last_seen=datetime.fromtimestamp(json_user['last_seen']),
@ -36,34 +35,47 @@ class SandpaperConverter:
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
username=json_user['username']
)
except OSError:
db.session.add(user)
db.session.flush(objects=[user])
db.session.refresh(user)
try:
user.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise Exception('Internal Server Error')
for json_corpus in json_user['corpora'].values():
if not json_corpus['files'].values():
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
continue
corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}'
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
self.convert_corpus(json_corpus, user, corpus_dir)
current_app.logger.info('Done')
def convert_corpus(self, json_corpus: dict, user: User, corpus_dir: Path):
def convert_corpus(self, json_corpus, user, corpus_dir):
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
try:
corpus = Corpus.create(
corpus = Corpus(
user=user,
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
description=json_corpus['description'],
title=json_corpus['title']
)
except OSError:
db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
try:
corpus.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise Exception('Internal Server Error')
for json_corpus_file in json_corpus['files'].values():
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
current_app.logger.info('Done')
def convert_corpus_file(self, json_corpus_file: dict, corpus: Corpus, corpus_dir: Path):
def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
corpus_file = CorpusFile(
corpus=corpus,
@ -87,13 +99,13 @@ class SandpaperConverter:
db.session.refresh(corpus_file)
try:
shutil.copy2(
corpus_dir / json_corpus_file['filename'],
os.path.join(corpus_dir, json_corpus_file['filename']),
corpus_file.path
)
except:
current_app.logger.warning(
'Can not convert corpus file: '
f'{corpus_dir / json_corpus_file["filename"]}'
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
' -> '
f'{corpus_file.path}'
)

View File

@ -1,25 +1,69 @@
from flask import current_app
from pathlib import Path
def normalize_vrt_file(input_file: Path, output_file: Path):
def normalize_vrt_file(input_file, output_file):
def check_pos_attribute_order(vrt_lines):
# The following orders are possible:
# since 26.02.2019: 'word,lemma,simple_pos,pos,ner'
# since 26.03.2021: 'word,pos,lemma,simple_pos,ner'
# since 27.01.2022: 'word,pos,lemma,simple_pos'
# This Function tries to find out which order we have by looking at the
# number of attributes and the position of the simple_pos attribute
SIMPLE_POS_LABELS = [
'ADJ', 'ADP', 'ADV', 'AUX', 'CONJ',
'DET', 'INTJ', 'NOUN', 'NUM', 'PART',
'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',
'VERB', 'X'
]
for line in vrt_lines:
if line.startswith('<'):
continue
pos_attrs = line.rstrip('\n').split('\t')
num_pos_attrs = len(pos_attrs)
if num_pos_attrs == 4:
if pos_attrs[3] in SIMPLE_POS_LABELS:
return ['word', 'pos', 'lemma', 'simple_pos']
continue
elif num_pos_attrs == 5:
if pos_attrs[2] in SIMPLE_POS_LABELS:
return ['word', 'lemma', 'simple_pos', 'pos', 'ner']
elif pos_attrs[3] in SIMPLE_POS_LABELS:
return ['word', 'pos', 'lemma', 'simple_pos', 'ner']
continue
return None
def check_has_ent_as_s_attr(vrt_lines):
for line in vrt_lines:
if line.startswith('<ent'):
return True
return False
def pos_attrs_to_string_1(pos_attrs):
return f'{pos_attrs[0]}\t{pos_attrs[3]}\t{pos_attrs[1]}\t{pos_attrs[2]}\n'
def pos_attrs_to_string_2(pos_attrs):
return f'{pos_attrs[0]}\t{pos_attrs[1]}\t{pos_attrs[2]}\t{pos_attrs[3]}\n'
current_app.logger.info(f'Converting {input_file}...')
with input_file.open() as f:
with open(input_file) as f:
input_vrt_lines = f.readlines()
pos_attr_order = _check_pos_attribute_order(input_vrt_lines)
has_ent_as_s_attr = _check_has_ent_as_s_attr(input_vrt_lines)
pos_attr_order = check_pos_attribute_order(input_vrt_lines)
has_ent_as_s_attr = check_has_ent_as_s_attr(input_vrt_lines)
current_app.logger.info(f'Detected pos_attr_order: [{",".join(pos_attr_order)}]')
current_app.logger.info(f'Detected has_ent_as_s_attr: {has_ent_as_s_attr}')
if pos_attr_order == ['word', 'lemma', 'simple_pos', 'pos', 'ner']:
pos_attrs_to_string_function = _pos_attrs_to_string_1
pos_attrs_to_string_function = pos_attrs_to_string_1
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos', 'ner']:
pos_attrs_to_string_function = _pos_attrs_to_string_2
pos_attrs_to_string_function = pos_attrs_to_string_2
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos']:
pos_attrs_to_string_function = _pos_attrs_to_string_2
pos_attrs_to_string_function = pos_attrs_to_string_2
else:
raise Exception('Can not handle format')
@ -69,49 +113,5 @@ def normalize_vrt_file(input_file: Path, output_file: Path):
current_ent = pos_attrs[4]
output_vrt += pos_attrs_to_string_function(pos_attrs)
with output_file.open(mode='w') as f:
with open(output_file, 'w') as f:
f.write(output_vrt)
def _check_pos_attribute_order(vrt_lines: list[str]) -> list[str]:
# The following orders are possible:
# since 26.02.2019: 'word,lemma,simple_pos,pos,ner'
# since 26.03.2021: 'word,pos,lemma,simple_pos,ner'
# since 27.01.2022: 'word,pos,lemma,simple_pos'
# This Function tries to find out which order we have by looking at the
# number of attributes and the position of the simple_pos attribute
SIMPLE_POS_LABELS = [
'ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM',
'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X'
]
for line in vrt_lines:
if line.startswith('<'):
continue
pos_attrs = line.rstrip('\n').split('\t')
num_pos_attrs = len(pos_attrs)
if num_pos_attrs == 4:
if pos_attrs[3] in SIMPLE_POS_LABELS:
return ['word', 'pos', 'lemma', 'simple_pos']
continue
elif num_pos_attrs == 5:
if pos_attrs[2] in SIMPLE_POS_LABELS:
return ['word', 'lemma', 'simple_pos', 'pos', 'ner']
elif pos_attrs[3] in SIMPLE_POS_LABELS:
return ['word', 'pos', 'lemma', 'simple_pos', 'ner']
continue
# TODO: raise exception "can't determine attribute order"
def _check_has_ent_as_s_attr(vrt_lines: list[str]) -> bool:
for line in vrt_lines:
if line.startswith('<ent'):
return True
return False
def _pos_attrs_to_string_1(pos_attrs: list[str]) -> str:
return f'{pos_attrs[0]}\t{pos_attrs[3]}\t{pos_attrs[1]}\t{pos_attrs[2]}\n'
def _pos_attrs_to_string_2(pos_attrs: list[str]) -> str:
return f'{pos_attrs[0]}\t{pos_attrs[1]}\t{pos_attrs[2]}\t{pos_attrs[3]}\n'

View File

@ -16,4 +16,4 @@ def before_request():
pass
from . import cli, files, followers, routes
from . import cli, files, followers, routes, json_routes

View File

@ -1,7 +1,7 @@
from flask import current_app
from app.models import Corpus, CorpusStatus
import os
import shutil
from app import db
from app.models import Corpus, CorpusStatus
from . import bp
@ -18,17 +18,10 @@ def reset():
]
for corpus in [x for x in Corpus.query.all() if x.status in status]:
print(f'Resetting corpus {corpus}')
corpus_cwb_dir = corpus.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
shutil.rmtree(corpus.path / 'cwb', ignore_errors=True)
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
current_app.logger.error(e)
raise
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
os.mkdir(os.path.join(corpus.path, 'cwb'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
corpus.status = CorpusStatus.UNPREPARED
corpus.num_analysis_sessions = 0
db.session.commit()

View File

@ -1,16 +1,17 @@
from cqi import CQiClient
from cqi.errors import CQiException
from cqi.status import CQiStatus
from flask import current_app
from docker.models.containers import Container
from flask import current_app, session
from flask_login import current_user
from flask_socketio import Namespace
from inspect import signature
from threading import Lock
from typing import Callable, Dict, List, Optional
from app import db, docker_client, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusStatus
from . import cqi_extension_functions
from .utils import SessionManager
from . import extensions
'''
@ -18,7 +19,7 @@ This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
Basic concept:
1. A client connects to the namespace.
1. A client connects to the "/cqi_over_sio" namespace.
2. The client emits the "init" event and provides a corpus id for the corpus
that should be analysed in this session.
1.1 The analysis session counter of the corpus is incremented.
@ -27,17 +28,17 @@ Basic concept:
1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient, the Lock and the corpus id in the session for
subsequential use.
3. The client emits "exec" events, within which it provides the name of a CQi
API function and the corresponding arguments.
3.1 The "exec" event handler will execute the function, make sure that
the result is serializable and returns the result back to the client.
4. The client disconnects from the namespace
4.1 The analysis session counter of the corpus is decremented.
4.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
2. The client emits the "exec" event provides the name of a CQi API function
arguments (optional).
- The event "exec" handler will execute the function, make sure that the
result is serializable and returns the result back to the client.
4. Wait for more events
5. The client disconnects from the "/cqi_over_sio" namespace
1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
'''
CQI_API_FUNCTION_NAMES = [
CQI_API_FUNCTION_NAMES: List[str] = [
'ask_feature_cl_2_3',
'ask_feature_cqi_1_0',
'ask_feature_cqp_2_3',
@ -85,91 +86,68 @@ CQI_API_FUNCTION_NAMES = [
]
CQI_EXTENSION_FUNCTION_NAMES = [
'ext_corpus_update_db',
'ext_corpus_static_data',
'ext_corpus_paginate_corpus',
'ext_cqp_paginate_subcorpus',
'ext_cqp_partial_export_subcorpus',
'ext_cqp_export_subcorpus',
]
class CQiOverSocketIONamespace(Namespace):
class CQiNamespace(Namespace):
@socketio_login_required
def on_connect(self):
pass
@socketio_login_required
def on_init(self, corpus_hashid: str) -> dict:
corpus_id = hashids.decode(corpus_hashid)
if not isinstance(corpus_id, int):
return {'code': 400, 'msg': 'Bad Request'}
corpus = Corpus.query.get(corpus_id)
if corpus is None:
def on_init(self, db_corpus_hashid: str):
db_corpus_id: int = hashids.decode(db_corpus_hashid)
db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
if db_corpus is None:
return {'code': 404, 'msg': 'Not Found'}
if not (
corpus.user == current_user
or current_user.is_following_corpus(corpus)
or current_user.is_administrator
):
if not (db_corpus.user == current_user
or current_user.is_following_corpus(db_corpus)
or current_user.is_administrator()):
return {'code': 403, 'msg': 'Forbidden'}
if corpus.status not in [
if db_corpus.status not in [
CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION
]:
return {'code': 424, 'msg': 'Failed Dependency'}
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
if db_corpus.num_analysis_sessions is None:
db_corpus.num_analysis_sessions = 0
db.session.commit()
retry_counter = 20
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
db.session.commit()
retry_counter: int = 20
while db_corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
if retry_counter == 0:
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
return {'code': 408, 'msg': 'Request Timeout'}
socketio.sleep(3)
retry_counter -= 1
db.session.refresh(corpus)
cqpserver_container_name = f'nopaque-cqpserver-{corpus_id}'
cqpserver_container = docker_client.containers.get(cqpserver_container_name)
cqpserver_ip_address = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
cqi_client = CQiClient(cqpserver_ip_address)
cqi_client_lock = Lock()
SessionManager.setup()
SessionManager.set_corpus_id(corpus_id)
SessionManager.set_cqi_client(cqi_client)
SessionManager.set_cqi_client_lock(cqi_client_lock)
db.session.refresh(db_corpus)
# cqi_client: CQiClient = CQiClient(f'cqpserver_{db_corpus_id}')
cqpserver_container_name: str = f'cqpserver_{db_corpus_id}'
cqpserver_container: Container = docker_client.containers.get(cqpserver_container_name)
cqpserver_host: str = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
cqi_client: CQiClient = CQiClient(cqpserver_host)
session['cqi_over_sio'] = {
'cqi_client': cqi_client,
'cqi_client_lock': Lock(),
'db_corpus_id': db_corpus_id
}
return {'code': 200, 'msg': 'OK'}
@socketio_login_required
def on_exec(self, fn_name: str, fn_args: dict = {}) -> dict:
def on_exec(self, fn_name: str, fn_args: Dict = {}):
try:
cqi_client = SessionManager.get_cqi_client()
cqi_client_lock = SessionManager.get_cqi_client_lock()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
except KeyError:
return {'code': 424, 'msg': 'Failed Dependency'}
if fn_name in CQI_API_FUNCTION_NAMES:
fn = getattr(cqi_client.api, fn_name)
elif fn_name in CQI_EXTENSION_FUNCTION_NAMES:
fn = getattr(cqi_extension_functions, fn_name)
fn: Callable = getattr(cqi_client.api, fn_name)
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
fn: Callable = getattr(extensions, fn_name)
else:
return {'code': 400, 'msg': 'Bad Request'}
for param in signature(fn).parameters.values():
# Check if the parameter is optional or required
# The following is true for required parameters
if param.default is param.empty:
if param.name not in fn_args:
return {'code': 400, 'msg': 'Bad Request'}
@ -178,7 +156,6 @@ class CQiOverSocketIONamespace(Namespace):
continue
if type(fn_args[param.name]) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
cqi_client_lock.acquire()
try:
fn_return_value = fn(**fn_args)
@ -196,7 +173,6 @@ class CQiOverSocketIONamespace(Namespace):
}
finally:
cqi_client_lock.release()
if isinstance(fn_return_value, CQiStatus):
payload = {
'code': fn_return_value.code,
@ -204,31 +180,27 @@ class CQiOverSocketIONamespace(Namespace):
}
else:
payload = fn_return_value
return {'code': 200, 'msg': 'OK', 'payload': payload}
def on_disconnect(self):
try:
corpus_id = SessionManager.get_corpus_id()
cqi_client = SessionManager.get_cqi_client()
cqi_client_lock = SessionManager.get_cqi_client_lock()
SessionManager.teardown()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
except KeyError:
return
cqi_client_lock.acquire()
try:
session.pop('cqi_over_sio')
except KeyError:
pass
try:
cqi_client.api.ctrl_bye()
except (BrokenPipeError, CQiException):
pass
cqi_client_lock.release()
corpus = Corpus.query.get(corpus_id)
if corpus is None:
db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
if db_corpus is None:
return
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()

View File

@ -1,39 +1,56 @@
from collections import Counter
from cqi import CQiClient
from cqi.models.corpora import Corpus as CQiCorpus
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
from cqi.models.attributes import (
PositionalAttribute as CQiPositionalAttribute,
StructuralAttribute as CQiStructuralAttribute
)
from cqi.status import StatusOk as CQiStatusOk
from flask import current_app
from flask import session
from typing import Dict, List
import gzip
import json
import math
import os
from app import db
from app.models import Corpus
from .utils import SessionManager
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
'ext_corpus_update_db',
'ext_corpus_static_data',
'ext_corpus_paginate_corpus',
'ext_cqp_paginate_subcorpus',
'ext_cqp_partial_export_subcorpus',
'ext_cqp_export_subcorpus',
]
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
corpus_id = SessionManager.get_corpus_id()
cqi_client = SessionManager.get_cqi_client()
db_corpus = Corpus.query.get(corpus_id)
cqi_corpus = cqi_client.corpora.get(corpus)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
db_corpus.num_tokens = cqi_corpus.size
db.session.commit()
return CQiStatusOk()
def ext_corpus_static_data(corpus: str) -> dict:
corpus_id = SessionManager.get_corpus_id()
db_corpus = Corpus.query.get(corpus_id)
def ext_corpus_static_data(corpus: str) -> Dict:
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz'
if static_data_file_path.exists():
with static_data_file_path.open('rb') as f:
static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
if os.path.exists(static_data_file_path):
with open(static_data_file_path, 'rb') as f:
return f.read()
cqi_client = SessionManager.get_cqi_client()
cqi_corpus = cqi_client.corpora.get(corpus)
cqi_p_attrs = cqi_corpus.positional_attributes.list()
cqi_s_attrs = cqi_corpus.structural_attributes.list()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
cqi_p_attrs: List[CQiPositionalAttribute] = cqi_corpus.positional_attributes.list()
cqi_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list()
static_data = {
'corpus': {
@ -46,21 +63,21 @@ def ext_corpus_static_data(corpus: str) -> dict:
}
for p_attr in cqi_p_attrs:
current_app.logger.info(f'corpus.freqs.{p_attr.name}')
print(f'corpus.freqs.{p_attr.name}')
static_data['corpus']['freqs'][p_attr.name] = []
p_attr_id_list = list(range(p_attr.lexicon_size))
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list))
del p_attr_id_list
current_app.logger.info(f'p_attrs.{p_attr.name}')
print(f'p_attrs.{p_attr.name}')
static_data['p_attrs'][p_attr.name] = []
cpos_list = list(range(cqi_corpus.size))
cpos_list: List[int] = list(range(cqi_corpus.size))
static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list))
del cpos_list
current_app.logger.info(f'values.p_attrs.{p_attr.name}')
print(f'values.p_attrs.{p_attr.name}')
static_data['values']['p_attrs'][p_attr.name] = []
p_attr_id_list = list(range(p_attr.lexicon_size))
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list))
del p_attr_id_list
@ -76,9 +93,9 @@ def ext_corpus_static_data(corpus: str) -> dict:
# Note: Needs more testing, don't use it in production #
##############################################################
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
first_match = 0
last_match = cqi_subcorpus.size - 1
cqi_subcorpus: CQiSubcorpus = cqi_corpus.subcorpora.get('Last')
first_match: int = 0
last_match: int = cqi_subcorpus.size - 1
match_boundaries = zip(
range(first_match, last_match + 1),
cqi_subcorpus.dump(
@ -96,7 +113,7 @@ def ext_corpus_static_data(corpus: str) -> dict:
del cqi_subcorpus, first_match, last_match
for id, lbound, rbound in match_boundaries:
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
del match_boundaries
@ -108,33 +125,33 @@ def ext_corpus_static_data(corpus: str) -> dict:
# This is a very slow operation, thats why we only use it for
# the text attribute
lbound, rbound = s_attr.cpos_by_id(id)
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
cpos_list = list(range(lbound, rbound + 1))
cpos_list: List[int] = list(range(lbound, rbound + 1))
for p_attr in cqi_p_attrs:
p_attr_ids = []
p_attr_ids: List[int] = []
p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list))
current_app.logger.info(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
del p_attr_ids
del cpos_list
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
current_app.logger.info(f's_attrs.{s_attr.name}.values')
sub_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
print(f's_attrs.{s_attr.name}.values')
static_data['s_attrs'][s_attr.name]['values'] = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
s_attr_id_list = list(range(s_attr.size))
sub_s_attr_values = []
s_attr_id_list: List[int] = list(range(s_attr.size))
sub_s_attr_values: List[str] = []
for sub_s_attr in sub_s_attrs:
tmp = []
tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list))
sub_s_attr_values.append(tmp)
del tmp
del s_attr_id_list
current_app.logger.info(f'values.s_attrs.{s_attr.name}')
print(f'values.s_attrs.{s_attr.name}')
static_data['values']['s_attrs'][s_attr.name] = [
{
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id]
@ -144,11 +161,11 @@ def ext_corpus_static_data(corpus: str) -> dict:
} for s_attr_id in range(0, s_attr.size)
]
del sub_s_attr_values
current_app.logger.info('Saving static data to file')
print('Saving static data to file')
with gzip.open(static_data_file_path, 'wt') as f:
json.dump(static_data, f)
del static_data
current_app.logger.info('Sending static data to client')
print('Sending static data to client')
with open(static_data_file_path, 'rb') as f:
return f.read()
@ -157,8 +174,8 @@ def ext_corpus_paginate_corpus(
corpus: str,
page: int = 1,
per_page: int = 20
) -> dict:
cqi_client = SessionManager.get_cqi_client()
) -> Dict:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus)
# Sanity checks
if (
@ -173,7 +190,7 @@ def ext_corpus_paginate_corpus(
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = _lookups_by_cpos(cqi_corpus, cpos_list)
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
@ -203,9 +220,9 @@ def ext_cqp_paginate_subcorpus(
context: int = 50,
page: int = 1,
per_page: int = 20
) -> dict:
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client = SessionManager.get_cqi_client()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
@ -220,7 +237,7 @@ def ext_cqp_paginate_subcorpus(
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = _export_subcorpus(
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
@ -250,147 +267,22 @@ def ext_cqp_partial_export_subcorpus(
subcorpus: str,
match_id_list: list,
context: int = 50
) -> dict:
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client = SessionManager.get_cqi_client()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = _partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return cqi_subcorpus_partial_export
def ext_cqp_export_subcorpus(subcorpus: str, context: int = 50) -> dict:
def ext_cqp_export_subcorpus(
subcorpus: str,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client = SessionManager.get_cqi_client()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = _export_subcorpus(cqi_subcorpus, context=context)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
return cqi_subcorpus_export
def _lookups_by_cpos(corpus: CQiCorpus, cpos_list: list[int]) -> dict:
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
cpos_attr_values = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
for attr in corpus.structural_attributes.list():
# We only want to iterate over non subattributes, identifiable by
# attr.has_values == False
if attr.has_values:
continue
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1:
continue
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
if len(occured_attr_ids) == 0:
continue
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
if len(subattrs) == 0:
continue
lookup_name = f'{attr.name}_lookup'
lookups[lookup_name] = {}
for attr_id in occured_attr_ids:
lookups[lookup_name][attr_id] = {}
for subattr in subattrs:
subattr_name = subattr.name[(len(attr.name) + 1):] # noqa
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
return lookups
def _partial_export_subcorpus(
subcorpus: CQiSubcorpus,
match_id_list: list[int],
context: int = 25
) -> dict:
if subcorpus.size == 0:
return {'matches': []}
match_boundaries = []
for match_id in match_id_list:
if match_id < 0 or match_id >= subcorpus.size:
continue
match_boundaries.append(
(
match_id,
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
)
)
cpos_set = set()
matches = []
for match_boundary in match_boundaries:
match_num, match_start, match_end = match_boundary
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = _lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}
def _export_subcorpus(
subcorpus: CQiSubcorpus,
context: int = 25,
cutoff: float = float('inf'),
offset: int = 0
) -> dict:
if subcorpus.size == 0:
return {'matches': []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
match_boundaries = zip(
range(first_match, last_match + 1),
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
for match_num, match_start, match_end in match_boundaries:
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = _lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}

View File

@ -0,0 +1,131 @@
from cqi.models.corpora import Corpus as CQiCorpus
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
from typing import Dict, List
def lookups_by_cpos(corpus: CQiCorpus, cpos_list: List[int]) -> Dict:
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
cpos_attr_values: List[str] = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
for attr in corpus.structural_attributes.list():
# We only want to iterate over non subattributes, identifiable by
# attr.has_values == False
if attr.has_values:
continue
cpos_attr_ids: List[int] = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1:
continue
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
if len(occured_attr_ids) == 0:
continue
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
if len(subattrs) == 0:
continue
lookup_name: str = f'{attr.name}_lookup'
lookups[lookup_name] = {}
for attr_id in occured_attr_ids:
lookups[lookup_name][attr_id] = {}
for subattr in subattrs:
subattr_name = subattr.name[(len(attr.name) + 1):] # noqa
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
return lookups
def partial_export_subcorpus(
subcorpus: CQiSubcorpus,
match_id_list: List[int],
context: int = 25
) -> Dict:
if subcorpus.size == 0:
return {"matches": []}
match_boundaries = []
for match_id in match_id_list:
if match_id < 0 or match_id >= subcorpus.size:
continue
match_boundaries.append(
(
match_id,
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
)
)
cpos_set = set()
matches = []
for match_boundary in match_boundaries:
match_num, match_start, match_end = match_boundary
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}
def export_subcorpus(
subcorpus: CQiSubcorpus,
context: int = 25,
cutoff: float = float('inf'),
offset: int = 0
) -> Dict:
if subcorpus.size == 0:
return {"matches": []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
match_boundaries = zip(
range(first_match, last_match + 1),
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
for match_num, match_start, match_end in match_boundaries:
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}

View File

@ -10,7 +10,7 @@ def corpus_follower_permission_required(*permissions):
def decorated_function(*args, **kwargs):
corpus_id = kwargs.get('corpus_id')
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator):
if not (corpus.user == current_user or current_user.is_administrator()):
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if cfa is None:
abort(403)
@ -26,7 +26,7 @@ def corpus_owner_or_admin_required(f):
def decorated_function(*args, **kwargs):
corpus_id = kwargs.get('corpus_id')
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator):
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
return f(*args, **kwargs)
return decorated_function

45
app/corpora/events.py Normal file
View File

@ -0,0 +1,45 @@
from flask_login import current_user
from flask_socketio import join_room
from app import hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
@socketio.on('GET /corpora/<corpus_id>')
@socketio_login_required
def get_corpus(corpus_hashid):
corpus_id = hashids.decode(corpus_hashid)
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'options': {'status': 404, 'statusText': 'Not found'}}
if not (
corpus.is_public
or corpus.user == current_user
or current_user.is_administrator()
):
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
return {
'body': corpus.to_json_serializable(),
'options': {
'status': 200,
'statusText': 'OK',
'headers': {'Content-Type: application/json'}
}
}
@socketio.on('SUBSCRIBE /corpora/<corpus_id>')
@socketio_login_required
def subscribe_corpus(corpus_hashid):
corpus_id = hashids.decode(corpus_hashid)
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'options': {'status': 404, 'statusText': 'Not found'}}
if not (
corpus.is_public
or corpus.user == current_user
or current_user.is_administrator()
):
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
join_room(f'/corpora/{corpus.hashid}')
return {'options': {'status': 200, 'statusText': 'OK'}}

View File

@ -0,0 +1,2 @@
from .. import bp
from . import json_routes, routes

View File

@ -1,7 +1,7 @@
from flask import current_app
from flask import abort, current_app
from threading import Thread
from app.decorators import content_negotiation
from app import db
from app.decorators import content_negotiation
from app.models import CorpusFile
from ..decorators import corpus_follower_permission_required
from . import bp

View File

@ -6,19 +6,25 @@ from flask import (
send_from_directory,
url_for
)
from flask_breadcrumbs import register_breadcrumb
import os
from app import db
from app.models import Corpus, CorpusFile, CorpusStatus
from ..decorators import corpus_follower_permission_required
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
from . import bp
from .forms import CreateCorpusFileForm, UpdateCorpusFileForm
from .utils import corpus_file_dynamic_list_constructor as corpus_file_dlc
@bp.route('/<hashid:corpus_id>/files')
@register_breadcrumb(bp, '.entity.files', 'Files', endpoint_arguments_constructor=corpus_eac)
def corpus_files(corpus_id):
return redirect(url_for('.corpus', _anchor='files', corpus_id=corpus_id))
@bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.entity.files.create', 'Create', endpoint_arguments_constructor=corpus_eac)
@corpus_follower_permission_required('MANAGE_FILES')
def create_corpus_file(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
@ -60,6 +66,7 @@ def create_corpus_file(corpus_id):
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.entity.files.entity', '', dynamic_list_constructor=corpus_file_dlc)
@corpus_follower_permission_required('MANAGE_FILES')
def corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
@ -85,9 +92,9 @@ def corpus_file(corpus_id, corpus_file_id):
def download_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
return send_from_directory(
corpus_file.path.parent,
corpus_file.path.name,
os.path.dirname(corpus_file.path),
os.path.basename(corpus_file.path),
as_attachment=True,
download_name=corpus_file.filename,
attachment_filename=corpus_file.filename,
mimetype=corpus_file.mimetype
)

View File

@ -0,0 +1,15 @@
from flask import request, url_for
from app.models import CorpusFile
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
def corpus_file_dynamic_list_constructor():
corpus_id = request.view_args['corpus_id']
corpus_file_id = request.view_args['corpus_file_id']
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
return [
{
'text': f'{corpus_file.author}: {corpus_file.title} ({corpus_file.publishing_year})',
'url': url_for('.corpus_file', corpus_id=corpus_id, corpus_file_id=corpus_file_id)
}
]

View File

@ -58,7 +58,7 @@ def delete_corpus_follower(corpus_id, follower_id):
current_user.id == follower_id
or current_user == cfa.corpus.user
or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
or current_user.is_administrator):
or current_user.is_administrator()):
abort(403)
if current_user.id == follower_id:
flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')

125
app/corpora/json_routes.py Normal file
View File

@ -0,0 +1,125 @@
from datetime import datetime
from flask import abort, current_app, request, url_for
from flask_login import current_user
from threading import Thread
from app import db
from app.decorators import content_negotiation
from app.models import Corpus, CorpusFollowerRole
from . import bp
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
import nltk
from string import punctuation
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
@corpus_owner_or_admin_required
@content_negotiation(produces='application/json')
def delete_corpus(corpus_id):
def _delete_corpus(app, corpus_id):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.delete()
db.session.commit()
corpus = Corpus.query.get_or_404(corpus_id)
thread = Thread(
target=_delete_corpus,
args=(current_app._get_current_object(), corpus.id)
)
thread.start()
response_data = {
'message': f'Corpus "{corpus.title}" marked for deletion',
'category': 'corpus'
}
return response_data, 200
@bp.route('/<hashid:corpus_id>/build', methods=['POST'])
@corpus_follower_permission_required('MANAGE_FILES')
@content_negotiation(produces='application/json')
def build_corpus(corpus_id):
def _build_corpus(app, corpus_id):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.build()
db.session.commit()
corpus = Corpus.query.get_or_404(corpus_id)
if len(corpus.files.all()) == 0:
abort(409)
thread = Thread(
target=_build_corpus,
args=(current_app._get_current_object(), corpus_id)
)
thread.start()
response_data = {
'message': f'Corpus "{corpus.title}" marked for building',
'category': 'corpus'
}
return response_data, 202
@bp.route('/stopwords')
@content_negotiation(produces='application/json')
def get_stopwords():
nltk.download('stopwords', quiet=True)
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
stopwords = {}
for language in languages:
stopwords[language] = nltk.corpus.stopwords.words(language)
stopwords['punctuation'] = list(punctuation) + ['', '|', '', '', '', '--']
stopwords['user_stopwords'] = []
response_data = stopwords
return response_data, 202
@bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
@content_negotiation(consumes='application/json', produces='application/json')
def generate_corpus_share_link(corpus_id):
data = request.json
if not isinstance(data, dict):
abort(400)
expiration = data.get('expiration')
if not isinstance(expiration, str):
abort(400)
role_name = data.get('role')
if not isinstance(role_name, str):
abort(400)
expiration_date = datetime.strptime(expiration, '%b %d, %Y')
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
if cfr is None:
abort(400)
corpus = Corpus.query.get_or_404(corpus_id)
token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date)
corpus_share_link = url_for(
'corpora.follow_corpus',
corpus_id=corpus_id,
token=token,
_external=True
)
response_data = {
'message': 'Corpus share link generated',
'category': 'corpus',
'corpusShareLink': corpus_share_link
}
return response_data, 200
@bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
@corpus_owner_or_admin_required
@content_negotiation(consumes='application/json', produces='application/json')
def update_corpus_is_public(corpus_id):
is_public = request.json
if not isinstance(is_public, bool):
abort(400)
corpus = Corpus.query.get_or_404(corpus_id)
corpus.is_public = is_public
db.session.commit()
response_data = {
'message': (
f'Corpus "{corpus.title}" is now'
f' {"public" if is_public else "private"}'
),
'category': 'corpus'
}
return response_data, 200

120
app/corpora/routes.py Normal file
View File

@ -0,0 +1,120 @@
from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
from app import db
from app.models import (
Corpus,
CorpusFollowerAssociation,
CorpusFollowerRole,
User
)
from . import bp
from .decorators import corpus_follower_permission_required
from .forms import CreateCorpusForm
from .utils import (
corpus_endpoint_arguments_constructor as corpus_eac,
corpus_dynamic_list_constructor as corpus_dlc
)
@bp.route('')
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">I</i>My Corpora')
def corpora():
return redirect(url_for('main.dashboard', _anchor='corpora'))
@bp.route('/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.create', 'Create')
def create_corpus():
form = CreateCorpusForm()
if form.validate_on_submit():
try:
corpus = Corpus.create(
title=form.title.data,
description=form.description.data,
user=current_user
)
except OSError:
abort(500)
db.session.commit()
flash(f'Corpus "{corpus.title}" created', 'corpus')
return redirect(corpus.url)
return render_template(
'corpora/create.html.j2',
title='Create corpus',
form=form
)
@bp.route('/<hashid:corpus_id>')
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=corpus_dlc)
def corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
cfrs = CorpusFollowerRole.query.all()
# TODO: Better solution for filtering admin
users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all()
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if cfa is None:
if corpus.user == current_user or current_user.is_administrator():
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
else:
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
else:
cfr = cfa.role
if corpus.user == current_user or current_user.is_administrator():
return render_template(
'corpora/corpus.html.j2',
title=corpus.title,
corpus=corpus,
cfr=cfr,
cfrs=cfrs,
users=users
)
if (current_user.is_following_corpus(corpus) or corpus.is_public):
cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all()
return render_template(
'corpora/public_corpus.html.j2',
title=corpus.title,
corpus=corpus,
cfrs=cfrs,
cfr=cfr,
cfas=cfas,
users=users
)
abort(403)
@bp.route('/<hashid:corpus_id>/analysis')
@corpus_follower_permission_required('VIEW')
@register_breadcrumb(bp, '.entity.analysis', 'Analysis', endpoint_arguments_constructor=corpus_eac)
def analysis(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
return render_template(
'corpora/analysis.html.j2',
corpus=corpus,
title=f'Analyse Corpus {corpus.title}'
)
@bp.route('/<hashid:corpus_id>/follow/<token>')
def follow_corpus(corpus_id, token):
corpus = Corpus.query.get_or_404(corpus_id)
if current_user.follow_corpus_by_token(token):
db.session.commit()
flash(f'You are following "{corpus.title}" now', category='corpus')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
abort(403)
@bp.route('/import', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.import', 'Import')
def import_corpus():
abort(503)
@bp.route('/<hashid:corpus_id>/export')
@corpus_follower_permission_required('VIEW')
@register_breadcrumb(bp, '.entity.export', 'Export', endpoint_arguments_constructor=corpus_eac)
def export_corpus(corpus_id):
abort(503)

17
app/corpora/utils.py Normal file
View File

@ -0,0 +1,17 @@
from flask import request, url_for
from app.models import Corpus
def corpus_endpoint_arguments_constructor():
return {'corpus_id': request.view_args['corpus_id']}
def corpus_dynamic_list_constructor():
corpus_id = request.view_args['corpus_id']
corpus = Corpus.query.get_or_404(corpus_id)
return [
{
'text': f'<i class="material-icons left">book</i>{corpus.title}',
'url': url_for('.corpus', corpus_id=corpus_id)
}
]

11
app/daemon/__init__.py Normal file
View File

@ -0,0 +1,11 @@
from app import db
from flask import Flask
from .corpus_utils import check_corpora
from .job_utils import check_jobs
def daemon(app: Flask):
with app.app_context():
check_corpora()
check_jobs()
db.session.commit()

View File

@ -1,16 +1,12 @@
from app import docker_client
from app.models import Corpus, CorpusStatus
from flask import current_app
import docker
import os
import shutil
from app import db, docker_client, scheduler
from app.models import Corpus, CorpusStatus
def handle_corpora():
with scheduler.app.app_context():
_handle_corpora()
def _handle_corpora():
def check_corpora():
corpora = Corpus.query.all()
for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
_create_build_corpus_service(corpus)
@ -21,14 +17,13 @@ def _handle_corpora():
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
_checkout_cqpserver_container(corpus)
_checkout_analysing_corpus_container(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
_create_cqpserver_container(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
_remove_cqpserver_container(corpus)
db.session.commit()
def _create_build_corpus_service(corpus: Corpus):
def _create_build_corpus_service(corpus):
''' # Docker service settings # '''
''' ## Command ## '''
command = ['bash', '-c']
@ -50,10 +45,12 @@ def _create_build_corpus_service(corpus: Corpus):
''' ## Constraints ## '''
constraints = ['node.role==worker']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1887'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
''' ## Labels ## '''
labels = {
'nopaque.server_name': current_app.config['SERVER_NAME']
'origin': current_app.config['SERVER_NAME'],
'type': 'corpus.build',
'corpus_id': str(corpus.id)
}
''' ## Mounts ## '''
mounts = []
@ -98,7 +95,7 @@ def _create_build_corpus_service(corpus: Corpus):
return
corpus.status = CorpusStatus.QUEUED
def _checkout_build_corpus_service(corpus: Corpus):
def _checkout_build_corpus_service(corpus):
service_name = f'build-corpus_{corpus.id}'
try:
service = docker_client.services.get(service_name)
@ -126,7 +123,8 @@ def _checkout_build_corpus_service(corpus: Corpus):
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
def _create_cqpserver_container(corpus: Corpus):
def _create_cqpserver_container(corpus):
''' # Docker container settings # '''
''' ## Command ## '''
command = []
command.append(
@ -141,9 +139,9 @@ def _create_cqpserver_container(corpus: Corpus):
''' ## Entrypoint ## '''
entrypoint = ['bash', '-c']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1887'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
''' ## Name ## '''
name = f'nopaque-cqpserver-{corpus.id}'
name = f'cqpserver_{corpus.id}'
''' ## Network ## '''
network = f'{current_app.config["NOPAQUE_DOCKER_NETWORK_NAME"]}'
''' ## Volumes ## '''
@ -200,8 +198,8 @@ def _create_cqpserver_container(corpus: Corpus):
return
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
def _checkout_cqpserver_container(corpus: Corpus):
container_name = f'nopaque-cqpserver-{corpus.id}'
def _checkout_analysing_corpus_container(corpus):
container_name = f'cqpserver_{corpus.id}'
try:
docker_client.containers.get(container_name)
except docker.errors.NotFound as e:
@ -211,8 +209,8 @@ def _checkout_cqpserver_container(corpus: Corpus):
except docker.errors.DockerException as e:
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
def _remove_cqpserver_container(corpus: Corpus):
container_name = f'nopaque-cqpserver-{corpus.id}'
def _remove_cqpserver_container(corpus):
container_name = f'cqpserver_{corpus.id}'
try:
container = docker_client.containers.get(container_name)
except docker.errors.NotFound:

View File

@ -1,11 +1,4 @@
from datetime import datetime
from flask import current_app
from werkzeug.utils import secure_filename
import docker
import json
import os
import shutil
from app import db, docker_client, hashids, scheduler
from app import db, docker_client, hashids
from app.models import (
Job,
JobResult,
@ -13,13 +6,16 @@ from app.models import (
TesseractOCRPipelineModel,
SpaCyNLPPipelineModel
)
from datetime import datetime
from flask import current_app
from werkzeug.utils import secure_filename
import docker
import json
import os
import shutil
def handle_jobs():
with scheduler.app.app_context():
_handle_jobs()
def _handle_jobs():
def check_jobs():
jobs = Job.query.all()
for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
_create_job_service(job)
@ -27,9 +23,8 @@ def _handle_jobs():
_checkout_job_service(job)
for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
_remove_job_service(job)
db.session.commit()
def _create_job_service(job: Job):
def _create_job_service(job):
''' # Docker service settings # '''
''' ## Service specific settings ## '''
if job.service == 'file-setup-pipeline':
@ -86,7 +81,9 @@ def _create_job_service(job: Job):
constraints = ['node.role==worker']
''' ## Labels ## '''
labels = {
'origin': current_app.config['SERVER_NAME']
'origin': current_app.config['SERVER_NAME'],
'type': 'job',
'job_id': str(job.id)
}
''' ## Mounts ## '''
mounts = []
@ -167,7 +164,7 @@ def _create_job_service(job: Job):
return
job.status = JobStatus.QUEUED
def _checkout_job_service(job: Job):
def _checkout_job_service(job):
service_name = f'job_{job.id}'
try:
service = docker_client.services.get(service_name)
@ -216,7 +213,7 @@ def _checkout_job_service(job: Job):
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
def _remove_job_service(job: Job):
def _remove_job_service(job):
service_name = f'job_{job.id}'
try:
service = docker_client.services.get(service_name)

View File

@ -1,7 +1,8 @@
from flask import abort, request
from flask import abort, current_app, request
from flask_login import current_user
from functools import wraps
from typing import Optional
from threading import Thread
from typing import List, Union
from werkzeug.exceptions import NotAcceptable
from app.models import Permission
@ -23,21 +24,22 @@ def admin_required(f):
def socketio_login_required(f):
@wraps(f)
def wrapper(*args, **kwargs):
def decorated_function(*args, **kwargs):
if current_user.is_authenticated:
return f(*args, **kwargs)
return {'status': 401, 'statusText': 'Unauthorized'}
return wrapper
else:
return {'code': 401, 'msg': 'Unauthorized'}
return decorated_function
def socketio_permission_required(permission):
def decorator(f):
@wraps(f)
def wrapper(*args, **kwargs):
def decorated_function(*args, **kwargs):
if not current_user.can(permission):
return {'status': 403, 'statusText': 'Forbidden'}
return {'code': 403, 'msg': 'Forbidden'}
return f(*args, **kwargs)
return wrapper
return decorated_function
return decorator
@ -45,9 +47,27 @@ def socketio_admin_required(f):
return socketio_permission_required(Permission.ADMINISTRATE)(f)
def background(f):
'''
' This decorator executes a function in a Thread.
' Decorated functions need to be executed within a code block where an
' app context exists.
'
' NOTE: An app object is passed as a keyword argument to the decorated
' function.
'''
@wraps(f)
def wrapped(*args, **kwargs):
kwargs['app'] = current_app._get_current_object()
thread = Thread(target=f, args=args, kwargs=kwargs)
thread.start()
return thread
return wrapped
def content_negotiation(
produces: Optional[str | list[str]] = None,
consumes: Optional[str | list[str]] = None
produces: Union[str, List[str], None] = None,
consumes: Union[str, List[str], None] = None
):
def decorator(f):
@wraps(f)

View File

@ -1,32 +1,25 @@
from flask import current_app, Flask, render_template
from flask import current_app, render_template
from flask_mail import Message
from threading import Thread
from app import mail
def create_message(
recipient: str,
subject: str,
template: str,
**context
) -> Message:
message = Message(
body=render_template(f'{template}.txt.j2', **context),
html=render_template(f'{template}.html.j2', **context),
def create_message(recipient, subject, template, **kwargs):
subject_prefix: str = current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX']
msg: Message = Message(
body=render_template(f'{template}.txt.j2', **kwargs),
html=render_template(f'{template}.html.j2', **kwargs),
recipients=[recipient],
subject=f'[nopaque] {subject}'
subject=f'{subject_prefix} {subject}'
)
return message
return msg
def send(message: Message) -> Thread:
def _send(app: Flask, message: Message):
def send(msg, *args, **kwargs):
def _send(app, msg):
with app.app_context():
mail.send(message)
mail.send(msg)
thread = Thread(
target=_send,
args=[current_app._get_current_object(), message]
)
thread = Thread(target=_send, args=[current_app._get_current_object(), msg])
thread.start()
return thread

View File

@ -4,17 +4,11 @@ from . import bp
@bp.app_errorhandler(HTTPException)
def handle_http_exception(e: HTTPException):
def handle_http_exception(error):
''' Generic HTTP exception handler '''
accept_json = request.accept_mimetypes.accept_json
accept_html = request.accept_mimetypes.accept_html
if accept_json and not accept_html:
error = {
'code': e.code,
'name': e.name,
'description': e.description
}
return jsonify(error), e.code
return render_template('errors/error.html.j2', error=e), e.code
response = jsonify(str(error))
return response, error.code
return render_template('errors/error.html.j2', error=error), error.code

View File

@ -1,20 +0,0 @@
from flask import abort
from flask_admin import (
AdminIndexView as _AdminIndexView,
expose
)
from flask_admin.contrib.sqla import ModelView as _ModelView
from flask_login import current_user
class AdminIndexView(_AdminIndexView):
@expose('/')
def index(self):
if not current_user.is_administrator:
abort(403)
return super().index()
class ModelView(_ModelView):
def is_accessible(self):
return current_user.is_administrator

View File

@ -1,42 +0,0 @@
import json
from app import db
class ContainerColumn(db.TypeDecorator):
impl = db.String
def __init__(self, container_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.container_type = container_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.container_type):
return json.dumps(value)
elif isinstance(value, str) and isinstance(json.loads(value), self.container_type):
return value
else:
return TypeError()
def process_result_value(self, value, dialect):
return json.loads(value)
class IntEnumColumn(db.TypeDecorator):
impl = db.Integer
def __init__(self, enum_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.enum_type = enum_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.enum_type) and isinstance(value.value, int):
return value.value
elif isinstance(value, int):
return self.enum_type(value).value
elif isinstance(value, str):
return self.enum_type[value].value
else:
return TypeError()
def process_result_value(self, value, dialect):
return self.enum_type(value)

View File

@ -1,2 +1,18 @@
from .handle_corpora import handle_corpora
from .handle_jobs import handle_jobs
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('jobs', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes, json_routes

73
app/jobs/json_routes.py Normal file
View File

@ -0,0 +1,73 @@
from flask import abort, current_app
from flask_login import current_user
from threading import Thread
import os
from app import db
from app.decorators import admin_required, content_negotiation
from app.models import Job, JobStatus
from . import bp
@bp.route('/<hashid:job_id>', methods=['DELETE'])
@content_negotiation(produces='application/json')
def delete_job(job_id):
def _delete_job(app, job_id):
with app.app_context():
job = Job.query.get(job_id)
job.delete()
db.session.commit()
job = Job.query.get_or_404(job_id)
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
thread = Thread(
target=_delete_job,
args=(current_app._get_current_object(), job_id)
)
thread.start()
response_data = {
'message': f'Job "{job.title}" marked for deletion'
}
return response_data, 202
@bp.route('/<hashid:job_id>/log')
@admin_required
@content_negotiation(produces='application/json')
def job_log(job_id):
job = Job.query.get_or_404(job_id)
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
response = {'errors': {'message': 'Job status is not completed or failed'}}
return response, 409
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
log = log_file.read()
response_data = {
'jobLog': log
}
return response_data, 200
@bp.route('/<hashid:job_id>/restart', methods=['POST'])
@content_negotiation(produces='application/json')
def restart_job(job_id):
def _restart_job(app, job_id):
with app.app_context():
job = Job.query.get(job_id)
job.restart()
db.session.commit()
job = Job.query.get_or_404(job_id)
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
if job.status == JobStatus.FAILED:
response = {'errors': {'message': 'Job status is not "failed"'}}
return response, 409
thread = Thread(
target=_restart_job,
args=(current_app._get_current_object(), job_id)
)
thread.start()
response_data = {
'message': f'Job "{job.title}" marked for restarting'
}
return response_data, 202

60
app/jobs/routes.py Normal file
View File

@ -0,0 +1,60 @@
from flask import (
abort,
redirect,
render_template,
send_from_directory,
url_for
)
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
import os
from app.models import Job, JobInput, JobResult
from . import bp
from .utils import job_dynamic_list_constructor as job_dlc
@bp.route('')
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">J</i>My Jobs')
def corpora():
return redirect(url_for('main.dashboard', _anchor='jobs'))
@bp.route('/<hashid:job_id>')
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=job_dlc)
def job(job_id):
job = Job.query.get_or_404(job_id)
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
return render_template(
'jobs/job.html.j2',
title='Job',
job=job
)
@bp.route('/<hashid:job_id>/inputs/<hashid:job_input_id>/download')
def download_job_input(job_id, job_input_id):
job_input = JobInput.query.filter_by(job_id=job_id, id=job_input_id).first_or_404()
if not (job_input.job.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
os.path.dirname(job_input.path),
os.path.basename(job_input.path),
as_attachment=True,
attachment_filename=job_input.filename,
mimetype=job_input.mimetype
)
@bp.route('/<hashid:job_id>/results/<hashid:job_result_id>/download')
def download_job_result(job_id, job_result_id):
job_result = JobResult.query.filter_by(job_id=job_id, id=job_result_id).first_or_404()
if not (job_result.job.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
os.path.dirname(job_result.path),
os.path.basename(job_result.path),
as_attachment=True,
attachment_filename=job_result.filename,
mimetype=job_result.mimetype
)

13
app/jobs/utils.py Normal file
View File

@ -0,0 +1,13 @@
from flask import request, url_for
from app.models import Job
def job_dynamic_list_constructor():
job_id = request.view_args['job_id']
job = Job.query.get_or_404(job_id)
return [
{
'text': f'<i class="nopaque-icons left service-icons" data-service="{job.service}"></i>{job.title}',
'url': url_for('.job', job_id=job_id)
}
]

View File

@ -1,9 +1,7 @@
from flask import current_app
from flask_migrate import upgrade
from pathlib import Path
from app import db
import os
from app.models import (
Corpus,
CorpusFollowerRole,
Role,
SpaCyNLPPipelineModel,
@ -16,22 +14,25 @@ from . import bp
@bp.cli.command('deploy')
def deploy():
''' Run deployment tasks. '''
# Make default directories
print('Make default directories')
base_dir = current_app.config['NOPAQUE_DATA_DIR']
default_dirs: list[Path] = [
base_dir / 'tmp',
base_dir / 'users'
default_dirs = [
os.path.join(base_dir, 'tmp'),
os.path.join(base_dir, 'users')
]
for default_dir in default_dirs:
if not default_dir.exists():
default_dir.mkdir()
if not default_dir.is_dir():
raise NotADirectoryError(f'{default_dir} is not a directory')
for dir in default_dirs:
if os.path.exists(dir):
if not os.path.isdir(dir):
raise NotADirectoryError(f'{dir} is not a directory')
else:
os.mkdir(dir)
# migrate database to latest revision
print('Migrate database to latest revision')
upgrade()
# Insert/Update default database values
print('Insert/Update default Roles')
Role.insert_defaults()
print('Insert/Update default Users')
@ -43,9 +44,4 @@ def deploy():
print('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRPipelineModel.insert_defaults()
print('Stop running analysis sessions')
for corpus in Corpus.query.all():
corpus.num_analysis_sessions = 0
db.session.commit()
# TODO: Implement checks for if the nopaque network exists

View File

@ -1,11 +1,14 @@
from flask import flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user, login_required, login_user
from app.blueprints.auth.forms import LoginForm
from app.auth.forms import LoginForm
from app.models import Corpus, User
from sqlalchemy import or_
from . import bp
@bp.route('/', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.', '<i class="material-icons">home</i>')
def index():
form = LoginForm()
if form.validate_on_submit():
@ -24,6 +27,7 @@ def index():
@bp.route('/faq')
@register_breadcrumb(bp, '.faq', 'Frequently Asked Questions')
def faq():
return render_template(
'main/faq.html.j2',
@ -32,6 +36,7 @@ def faq():
@bp.route('/dashboard')
@register_breadcrumb(bp, '.dashboard', '<i class="material-icons left">dashboard</i>Dashboard')
@login_required
def dashboard():
return render_template(
@ -40,15 +45,8 @@ def dashboard():
)
@bp.route('/manual')
def manual():
return render_template(
'main/manual.html.j2',
title='Manual'
)
@bp.route('/news')
@register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News')
def news():
return render_template(
'main/news.html.j2',
@ -57,6 +55,7 @@ def news():
@bp.route('/privacy_policy')
@register_breadcrumb(bp, '.privacy_policy', 'Private statement (GDPR)')
def privacy_policy():
return render_template(
'main/privacy_policy.html.j2',
@ -65,6 +64,7 @@ def privacy_policy():
@bp.route('/terms_of_use')
@register_breadcrumb(bp, '.terms_of_use', 'Terms of Use')
def terms_of_use():
return render_template(
'main/terms_of_use.html.j2',
@ -72,14 +72,17 @@ def terms_of_use():
)
@bp.route('/social')
@bp.route('/social-area')
@register_breadcrumb(bp, '.social_area', '<i class="material-icons left">group</i>Social Area')
@login_required
def social():
def social_area():
print('test')
corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
print(corpora)
users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
return render_template(
'main/social.html.j2',
title='Social',
'main/social_area.html.j2',
title='Social Area',
corpora=corpora,
users=users
)

1819
app/models.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,45 +0,0 @@
from .anonymous_user import AnonymousUser
from .avatar import Avatar
from .corpus_file import CorpusFile
from .corpus_follower_association import CorpusFollowerAssociation
from .corpus_follower_role import CorpusFollowerPermission, CorpusFollowerRole
from .corpus import CorpusStatus, Corpus
from .job_input import JobInput
from .job_result import JobResult
from .job import JobStatus, Job
from .role import Permission, Role
from .spacy_nlp_pipeline_model import SpaCyNLPPipelineModel
from .tesseract_ocr_pipeline_model import TesseractOCRPipelineModel
from .token import Token
from .user import (
ProfilePrivacySettings,
UserSettingJobStatusMailNotificationLevel,
User
)
_models = [
Avatar,
CorpusFile,
CorpusFollowerAssociation,
CorpusFollowerRole,
Corpus,
JobInput,
JobResult,
Job,
Role,
SpaCyNLPPipelineModel,
TesseractOCRPipelineModel,
Token,
User
]
_enums = [
CorpusFollowerPermission,
CorpusStatus,
JobStatus,
Permission,
ProfilePrivacySettings,
UserSettingJobStatusMailNotificationLevel
]

View File

@ -1,10 +0,0 @@
from flask_login import AnonymousUserMixin
class AnonymousUser(AnonymousUserMixin):
def can(self, permissions):
return False
@property
def is_administrator(self):
return False

View File

@ -1,40 +0,0 @@
from flask import current_app
from flask_hashids import HashidMixin
from pathlib import Path
from app import db
from .file_mixin import FileMixin
class Avatar(HashidMixin, FileMixin, db.Model):
__tablename__ = 'avatars'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Relationships
user = db.relationship('User', back_populates='avatar')
@property
def path(self) -> Path:
return self.user.path / 'avatar'
# return os.path.join(self.user.path, 'avatar')
def delete(self):
try:
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
**self.file_mixin_to_json_serializeable()
}
if backrefs:
json_serializeable['user'] = \
self.user.to_json_serializeable(backrefs=True)
if relationships:
pass
return json_serializeable

View File

@ -1,199 +0,0 @@
from datetime import datetime
from enum import IntEnum
from flask import current_app, url_for
from flask_hashids import HashidMixin
from sqlalchemy.ext.associationproxy import association_proxy
from pathlib import Path
import shutil
import xml.etree.ElementTree as ET
from app import db
from app.converters.vrt import normalize_vrt_file
from app.extensions.nopaque_sqlalchemy_type_decorators import IntEnumColumn
from .corpus_follower_association import CorpusFollowerAssociation
class CorpusStatus(IntEnum):
UNPREPARED = 1
SUBMITTED = 2
QUEUED = 3
BUILDING = 4
BUILT = 5
FAILED = 6
STARTING_ANALYSIS_SESSION = 7
RUNNING_ANALYSIS_SESSION = 8
CANCELING_ANALYSIS_SESSION = 9
@staticmethod
def get(corpus_status: 'CorpusStatus | int | str') -> 'CorpusStatus':
if isinstance(corpus_status, CorpusStatus):
return corpus_status
if isinstance(corpus_status, int):
return CorpusStatus(corpus_status)
if isinstance(corpus_status, str):
return CorpusStatus[corpus_status]
raise TypeError('corpus_status must be CorpusStatus, int, or str')
class Corpus(HashidMixin, db.Model):
'''
Class to define a corpus.
'''
__tablename__ = 'corpora'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
description = db.Column(db.String(255))
status = db.Column(
IntEnumColumn(CorpusStatus),
default=CorpusStatus.UNPREPARED
)
title = db.Column(db.String(32))
num_analysis_sessions = db.Column(db.Integer, default=0)
num_tokens = db.Column(db.Integer, default=0)
is_public = db.Column(db.Boolean, default=False)
# Relationships
files = db.relationship(
'CorpusFile',
back_populates='corpus',
lazy='dynamic',
cascade='all, delete-orphan'
)
corpus_follower_associations = db.relationship(
'CorpusFollowerAssociation',
back_populates='corpus',
cascade='all, delete-orphan'
)
followers = association_proxy(
'corpus_follower_associations',
'follower',
creator=lambda u: CorpusFollowerAssociation(follower=u)
)
user = db.relationship('User', back_populates='corpora')
# "static" attributes
max_num_tokens = 2_147_483_647
def __repr__(self):
return f'<Corpus {self.title}>'
@property
def analysis_url(self):
return url_for('corpora.analysis', corpus_id=self.id)
@property
def jsonpatch_path(self):
return f'{self.user.jsonpatch_path}/corpora/{self.hashid}'
@property
def path(self) -> Path:
return self.user.path / 'corpora' / f'{self.id}'
@property
def url(self):
return url_for('corpora.corpus', corpus_id=self.id)
@property
def user_hashid(self):
return self.user.hashid
@staticmethod
def create(**kwargs):
corpus = Corpus(**kwargs)
db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
corpus_files_dir = corpus.path / 'files'
corpus_cwb_dir = corpus.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
corpus.path.mkdir()
corpus_files_dir.mkdir()
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise
return corpus
def build(self):
corpus_cwb_dir = self.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
shutil.rmtree(corpus_cwb_dir, ignore_errors=True)
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
current_app.logger.error(e)
self.status = CorpusStatus.FAILED
raise
corpus_element = ET.fromstring('<corpus>\n</corpus>')
for corpus_file in self.files:
normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt'
try:
normalize_vrt_file(corpus_file.path, normalized_vrt_path)
except:
self.status = CorpusStatus.FAILED
return
element_tree = ET.parse(normalized_vrt_path)
text_element = element_tree.getroot()
text_element.set('author', corpus_file.author)
text_element.set('title', corpus_file.title)
text_element.set(
'publishing_year',
f'{corpus_file.publishing_year}'
)
text_element.set('address', corpus_file.address or 'NULL')
text_element.set('booktitle', corpus_file.booktitle or 'NULL')
text_element.set('chapter', corpus_file.chapter or 'NULL')
text_element.set('editor', corpus_file.editor or 'NULL')
text_element.set('institution', corpus_file.institution or 'NULL')
text_element.set('journal', corpus_file.journal or 'NULL')
text_element.set('pages', f'{corpus_file.pages}' or 'NULL')
text_element.set('publisher', corpus_file.publisher or 'NULL')
text_element.set('school', corpus_file.school or 'NULL')
text_element.tail = '\n'
# corpus_element.insert(1, text_element)
corpus_element.append(text_element)
ET.ElementTree(corpus_element).write(
corpus_cwb_dir / 'corpus.vrt',
encoding='utf-8'
)
self.status = CorpusStatus.SUBMITTED
def delete(self):
shutil.rmtree(self.path, ignore_errors=True)
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'creation_date': f'{self.creation_date.isoformat()}Z',
'description': self.description,
'max_num_tokens': self.max_num_tokens,
'num_analysis_sessions': self.num_analysis_sessions,
'num_tokens': self.num_tokens,
'status': self.status.name,
'title': self.title,
'is_public': self.is_public
}
if backrefs:
json_serializeable['user'] = \
self.user.to_json_serializeable(backrefs=True)
if relationships:
json_serializeable['corpus_follower_associations'] = {
x.hashid: x.to_json_serializeable()
for x in self.corpus_follower_associations
}
json_serializeable['files'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.files
}
return json_serializeable

View File

@ -1,102 +0,0 @@
from flask import current_app, url_for
from flask_hashids import HashidMixin
from pathlib import Path
from app import db
from .corpus import CorpusStatus
from .file_mixin import FileMixin
class CorpusFile(FileMixin, HashidMixin, db.Model):
__tablename__ = 'corpus_files'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
# Fields
author = db.Column(db.String(255))
description = db.Column(db.String(255))
publishing_year = db.Column(db.Integer)
title = db.Column(db.String(255))
address = db.Column(db.String(255))
booktitle = db.Column(db.String(255))
chapter = db.Column(db.String(255))
editor = db.Column(db.String(255))
institution = db.Column(db.String(255))
journal = db.Column(db.String(255))
pages = db.Column(db.String(255))
publisher = db.Column(db.String(255))
school = db.Column(db.String(255))
# Relationships
corpus = db.relationship(
'Corpus',
back_populates='files'
)
@property
def download_url(self):
return url_for(
'corpora.download_corpus_file',
corpus_id=self.corpus_id,
corpus_file_id=self.id
)
@property
def jsonpatch_path(self):
return f'{self.corpus.jsonpatch_path}/files/{self.hashid}'
@property
def path(self) -> Path:
return self.corpus.path / 'files' / f'{self.id}'
@property
def url(self):
return url_for(
'corpora.corpus_file',
corpus_id=self.corpus_id,
corpus_file_id=self.id
)
@property
def user_hashid(self):
return self.corpus.user.hashid
@property
def user_id(self):
return self.corpus.user_id
def delete(self):
try:
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
self.corpus.status = CorpusStatus.UNPREPARED
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'address': self.address,
'author': self.author,
'description': self.description,
'booktitle': self.booktitle,
'chapter': self.chapter,
'editor': self.editor,
'institution': self.institution,
'journal': self.journal,
'pages': self.pages,
'publisher': self.publisher,
'publishing_year': self.publishing_year,
'school': self.school,
'title': self.title,
**self.file_mixin_to_json_serializeable(
backrefs=backrefs,
relationships=relationships
)
}
if backrefs:
json_serializeable['corpus'] = \
self.corpus.to_json_serializeable(backrefs=True)
if relationships:
pass
return json_serializeable

View File

@ -1,47 +0,0 @@
from flask_hashids import HashidMixin
from app import db
from .corpus_follower_role import CorpusFollowerRole
class CorpusFollowerAssociation(HashidMixin, db.Model):
__tablename__ = 'corpus_follower_associations'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
follower_id = db.Column(db.Integer, db.ForeignKey('users.id'))
role_id = db.Column(db.Integer, db.ForeignKey('corpus_follower_roles.id'))
# Relationships
corpus = db.relationship(
'Corpus',
back_populates='corpus_follower_associations'
)
follower = db.relationship(
'User',
back_populates='corpus_follower_associations'
)
role = db.relationship(
'CorpusFollowerRole',
back_populates='corpus_follower_associations'
)
def __init__(self, **kwargs):
if 'role' not in kwargs:
kwargs['role'] = CorpusFollowerRole.query.filter_by(default=True).first()
super().__init__(**kwargs)
def __repr__(self):
return f'<CorpusFollowerAssociation {self.follower.__repr__()} ~ {self.role.__repr__()} ~ {self.corpus.__repr__()}>'
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'corpus': self.corpus.to_json_serializeable(backrefs=True),
'follower': self.follower.to_json_serializeable(),
'role': self.role.to_json_serializeable()
}
if backrefs:
pass
if relationships:
pass
return json_serializeable

View File

@ -1,106 +0,0 @@
from flask_hashids import HashidMixin
from enum import IntEnum
from app import db
class CorpusFollowerPermission(IntEnum):
VIEW = 1
MANAGE_FILES = 2
MANAGE_FOLLOWERS = 4
MANAGE_CORPUS = 8
@staticmethod
def get(corpus_follower_permission: 'CorpusFollowerPermission | int | str') -> 'CorpusFollowerPermission':
if isinstance(corpus_follower_permission, CorpusFollowerPermission):
return corpus_follower_permission
if isinstance(corpus_follower_permission, int):
return CorpusFollowerPermission(corpus_follower_permission)
if isinstance(corpus_follower_permission, str):
return CorpusFollowerPermission[corpus_follower_permission]
raise TypeError('corpus_follower_permission must be CorpusFollowerPermission, int, or str')
class CorpusFollowerRole(HashidMixin, db.Model):
__tablename__ = 'corpus_follower_roles'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Fields
name = db.Column(db.String(64), unique=True)
default = db.Column(db.Boolean, default=False, index=True)
permissions = db.Column(db.Integer, default=0)
# Relationships
corpus_follower_associations = db.relationship(
'CorpusFollowerAssociation',
back_populates='role'
)
def __repr__(self):
return f'<CorpusFollowerRole {self.name}>'
def has_permission(self, permission: CorpusFollowerPermission | int | str):
perm = CorpusFollowerPermission.get(permission)
return self.permissions & perm.value == perm.value
def add_permission(self, permission: CorpusFollowerPermission | int | str):
perm = CorpusFollowerPermission.get(permission)
if not self.has_permission(perm):
self.permissions += perm.value
def remove_permission(self, permission: CorpusFollowerPermission | int | str):
perm = CorpusFollowerPermission.get(permission)
if self.has_permission(perm):
self.permissions -= perm.value
def reset_permissions(self):
self.permissions = 0
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'default': self.default,
'name': self.name,
'permissions': [
x.name
for x in CorpusFollowerPermission
if self.has_permission(x)
]
}
if backrefs:
pass
if relationships:
json_serializeable['corpus_follower_association'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.corpus_follower_association
}
return json_serializeable
@staticmethod
def insert_defaults():
roles = {
'Anonymous': [],
'Viewer': [
CorpusFollowerPermission.VIEW
],
'Contributor': [
CorpusFollowerPermission.VIEW,
CorpusFollowerPermission.MANAGE_FILES
],
'Administrator': [
CorpusFollowerPermission.VIEW,
CorpusFollowerPermission.MANAGE_FILES,
CorpusFollowerPermission.MANAGE_FOLLOWERS,
CorpusFollowerPermission.MANAGE_CORPUS
]
}
default_role_name = 'Viewer'
for role_name, permissions in roles.items():
role = CorpusFollowerRole.query.filter_by(name=role_name).first()
if role is None:
role = CorpusFollowerRole(name=role_name)
role.reset_permissions()
for permission in permissions:
role.add_permission(permission)
role.default = role.name == default_role_name
db.session.add(role)
db.session.commit()

View File

@ -1,133 +0,0 @@
from datetime import datetime
from enum import Enum
from app import db, mail, socketio
from app.email import create_message
from .corpus_file import CorpusFile
from .corpus_follower_association import CorpusFollowerAssociation
from .corpus import Corpus
from .job_input import JobInput
from .job_result import JobResult
from .job import Job, JobStatus
from .spacy_nlp_pipeline_model import SpaCyNLPPipelineModel
from .tesseract_ocr_pipeline_model import TesseractOCRPipelineModel
from .user import UserSettingJobStatusMailNotificationLevel
def register_event_listeners():
resources = [
Corpus,
CorpusFile,
Job,
JobInput,
JobResult,
SpaCyNLPPipelineModel,
TesseractOCRPipelineModel
]
for resource in resources:
db.event.listen(resource, 'after_delete', resource_after_delete)
db.event.listen(resource, 'after_insert', resource_after_insert)
db.event.listen(resource, 'after_update', resource_after_update)
db.event.listen(CorpusFollowerAssociation, 'after_delete', cfa_after_delete)
db.event.listen(CorpusFollowerAssociation, 'after_insert', cfa_after_insert)
db.event.listen(Job, 'after_update', job_after_update)
def resource_after_delete(mapper, connection, resource):
jsonpatch = [
{
'op': 'remove',
'path': resource.jsonpatch_path
}
]
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
def cfa_after_delete(mapper, connection, cfa):
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
jsonpatch = [
{
'op': 'remove',
'path': jsonpatch_path
}
]
room = f'/users/{cfa.corpus.user.hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
def resource_after_insert(mapper, connection, resource):
jsonpatch_value = resource.to_json_serializeable()
for attr in mapper.relationships:
jsonpatch_value[attr.key] = {}
jsonpatch = [
{
'op': 'add',
'path': resource.jsonpatch_path,
'value': jsonpatch_value
}
]
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
def cfa_after_insert(mapper, connection, cfa):
jsonpatch_value = cfa.to_json_serializeable()
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
jsonpatch = [
{
'op': 'add',
'path': jsonpatch_path,
'value': jsonpatch_value
}
]
room = f'/users/{cfa.corpus.user.hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
def resource_after_update(mapper, connection, resource):
jsonpatch = []
for attr in db.inspect(resource).attrs:
if attr.key in mapper.relationships:
continue
if not attr.load_history().has_changes():
continue
jsonpatch_path = f'{resource.jsonpatch_path}/{attr.key}'
if isinstance(attr.value, datetime):
jsonpatch_value = f'{attr.value.isoformat()}Z'
elif isinstance(attr.value, Enum):
jsonpatch_value = attr.value.name
else:
jsonpatch_value = attr.value
jsonpatch.append(
{
'op': 'replace',
'path': jsonpatch_path,
'value': jsonpatch_value
}
)
if jsonpatch:
room = f'/users/{resource.user_hashid}'
socketio.emit('PATCH', jsonpatch, room=room)
def job_after_update(mapper, connection, job):
for attr in db.inspect(job).attrs:
if attr.key != 'status':
continue
if not attr.load_history().has_changes():
return
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE:
return
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END:
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return
msg = create_message(
job.user.email,
f'Status update for your Job "{job.title}"',
'tasks/email/notification',
job=job
)
mail.send(msg)

View File

@ -1,40 +0,0 @@
from datetime import datetime
from flask import current_app
from werkzeug.utils import secure_filename
from app import db
class FileMixin:
'''
Mixin for db.Model classes. All file related models should use this.
'''
creation_date = db.Column(db.DateTime, default=datetime.utcnow)
filename = db.Column(db.String(255))
mimetype = db.Column(db.String(255))
def file_mixin_to_json_serializeable(self, backrefs=False, relationships=False):
return {
'creation_date': f'{self.creation_date.isoformat()}Z',
'filename': self.filename,
'mimetype': self.mimetype
}
@classmethod
def create(cls, file_storage, **kwargs):
filename = kwargs.pop('filename', file_storage.filename)
mimetype = kwargs.pop('mimetype', file_storage.mimetype)
obj = cls(
filename=secure_filename(filename),
mimetype=mimetype,
**kwargs
)
db.session.add(obj)
db.session.flush(objects=[obj])
db.session.refresh(obj)
try:
file_storage.save(obj.path)
except (AttributeError, OSError) as e:
current_app.logger.error(e)
db.session.rollback()
raise e
return obj

View File

@ -1,171 +0,0 @@
from datetime import datetime
from enum import IntEnum
from flask import current_app, url_for
from flask_hashids import HashidMixin
from time import sleep
from pathlib import Path
import shutil
from app import db
from app.extensions.nopaque_sqlalchemy_type_decorators import ContainerColumn, IntEnumColumn
class JobStatus(IntEnum):
INITIALIZING = 1
SUBMITTED = 2
QUEUED = 3
RUNNING = 4
CANCELING = 5
CANCELED = 6
COMPLETED = 7
FAILED = 8
@staticmethod
def get(job_status: 'JobStatus | int | str') -> 'JobStatus':
if isinstance(job_status, JobStatus):
return job_status
if isinstance(job_status, int):
return JobStatus(job_status)
if isinstance(job_status, str):
return JobStatus[job_status]
raise TypeError('job_status must be JobStatus, int, or str')
class Job(HashidMixin, db.Model):
'''
Class to define Jobs.
'''
__tablename__ = 'jobs'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
creation_date = \
db.Column(db.DateTime(), default=datetime.utcnow)
description = db.Column(db.String(255))
end_date = db.Column(db.DateTime())
service = db.Column(db.String(64))
service_args = db.Column(ContainerColumn(dict, 255))
service_version = db.Column(db.String(16))
status = db.Column(
IntEnumColumn(JobStatus),
default=JobStatus.INITIALIZING
)
title = db.Column(db.String(32))
# Relationships
inputs = db.relationship(
'JobInput',
back_populates='job',
cascade='all, delete-orphan',
lazy='dynamic'
)
results = db.relationship(
'JobResult',
back_populates='job',
cascade='all, delete-orphan',
lazy='dynamic'
)
user = db.relationship(
'User',
back_populates='jobs'
)
def __repr__(self):
return f'<Job {self.title}>'
@property
def jsonpatch_path(self):
return f'{self.user.jsonpatch_path}/jobs/{self.hashid}'
@property
def path(self) -> Path:
return self.user.path / 'jobs' / f'{self.id}'
@property
def url(self):
return url_for('jobs.job', job_id=self.id)
@property
def user_hashid(self):
return self.user.hashid
@staticmethod
def create(**kwargs):
job = Job(**kwargs)
db.session.add(job)
db.session.flush(objects=[job])
db.session.refresh(job)
job_inputs_dir = job.path / 'inputs'
job_pipeline_data_dir = job.path / 'pipeline_data'
job_results_dir = job.path / 'results'
try:
job.path.mkdir()
job_inputs_dir.mkdir()
job_pipeline_data_dir.mkdir()
job_results_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise
return job
def delete(self):
''' Delete the job and its inputs and results from the database. '''
if self.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: # noqa
self.status = JobStatus.CANCELING
db.session.commit()
while self.status != JobStatus.CANCELED:
# In case the daemon handled a job in any way
if self.status != JobStatus.CANCELING:
self.status = JobStatus.CANCELING
db.session.commit()
sleep(1)
db.session.refresh(self)
try:
shutil.rmtree(self.path)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise e
db.session.delete(self)
def restart(self):
''' Restart a job - only if the status is failed '''
if self.status != JobStatus.FAILED:
raise Exception('Job status is not "failed"')
shutil.rmtree(self.path / 'results', ignore_errors=True)
shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True)
for result in self.results:
db.session.delete(result)
self.end_date = None
self.status = JobStatus.SUBMITTED
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
'creation_date': f'{self.creation_date.isoformat()}Z',
'description': self.description,
'end_date': (
None if self.end_date is None
else f'{self.end_date.isoformat()}Z'
),
'service': self.service,
'service_args': self.service_args,
'service_version': self.service_version,
'status': self.status.name,
'title': self.title
}
if backrefs:
json_serializeable['user'] = \
self.user.to_json_serializeable(backrefs=True)
if relationships:
json_serializeable['inputs'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.inputs
}
json_serializeable['results'] = {
x.hashid: x.to_json_serializeable(relationships=True)
for x in self.results
}
return json_serializeable

View File

@ -1,57 +0,0 @@
from flask import url_for
from flask_hashids import HashidMixin
from pathlib import Path
from app import db
from .file_mixin import FileMixin
class JobInput(FileMixin, HashidMixin, db.Model):
__tablename__ = 'job_inputs'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
job_id = db.Column(db.Integer, db.ForeignKey('jobs.id'))
# Relationships
job = db.relationship(
'Job',
back_populates='inputs'
)
def __repr__(self):
return f'<JobInput {self.filename}>'
@property
def jsonpatch_path(self):
return f'{self.job.jsonpatch_path}/inputs/{self.hashid}'
@property
def path(self) -> Path:
return self.job.path / 'inputs' / f'{self.id}'
@property
def url(self):
return url_for(
'jobs.job',
job_input_id=self.id,
_anchor=f'job-{self.job.hashid}-input-{self.hashid}'
)
@property
def user_hashid(self):
return self.job.user.hashid
@property
def user_id(self):
return self.job.user.id
def to_json_serializeable(self, backrefs=False, relationships=False):
json_serializeable = {
'id': self.hashid,
**self.file_mixin_to_json_serializeable()
}
if backrefs:
json_serializeable['job'] = \
self.job.to_json_serializeable(backrefs=True)
if relationships:
pass
return json_serializeable

Some files were not shown because too many files have changed in this diff Show More