2 Commits

Author SHA1 Message Date
efaa2fb38b Make scripts compatible with Python3.5 2019-06-28 17:45:33 +02:00
86557443a2 Add prototype 2019-06-03 14:57:09 +02:00
343 changed files with 24637 additions and 25354 deletions

View File

@ -1,12 +0,0 @@
# Exclude everything
*
# Include what we need
!app
!migrations
!tests
!.flaskenv
!boot.sh
!config.py
!nopaque.py
!requirements.txt

204
.env.tpl
View File

@ -1,204 +0,0 @@
################################################################################
# Docker #
################################################################################
# DEFAULT: ./data
# NOTE: Use `.` as <project-basedir>
# HOST_DATA_DIR=
# Example: 1000
# HINT: Use this bash command `id -u`
HOST_UID=
# Example: 1000
# HINT: Use this bash command `id -g`
HOST_GID=
# Example: 999
# HINT: Use this bash command `getent group docker | cut -d: -f3`
HOST_DOCKER_GID=
# DEFAULT: ./logs
# NOTES: Use `.` as <project-basedir>
# HOST_LOG_DIR=
# DEFAULT: nopaque_default
# DOCKER_NETWORK_NAME=
################################################################################
# Flask #
# https://flask.palletsprojects.com/en/1.1.x/config/ #
################################################################################
# CHOOSE ONE: http, https
# DEFAULT: http
# PREFERRED_URL_SCHEME=
# DEFAULT: hard to guess string
# HINT: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"`
# SECRET_KEY=
# DEFAULT: localhost:5000
# Example: nopaque.example.com/nopaque.example.com:5000
# HINT: If your instance is publicly available on a different Port then 80/443,
# you will have to add this to the server name
# SERVER_NAME=
# CHOOSE ONE: False, True
# DEFAULT: False
# HINT: Set to true if you redirect http to https
# SESSION_COOKIE_SECURE=
################################################################################
# Flask-Assets #
# https://webassets.readthedocs.io/en/latest/ #
################################################################################
# CHOOSE ONE: False, True
# DEFAULT: False
# ASSETS_DEBUG=
################################################################################
# Flask-Hashids #
# https://github.com/Pevtrick/Flask-Hashids #
################################################################################
# DEFAULT: 16
# HASHIDS_MIN_LENGTH=
# NOTE: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"`
# It is strongly recommended that this is NEVER the same as the SECRET_KEY
HASHIDS_SALT=
################################################################################
# Flask-Login #
# https://flask-login.readthedocs.io/en/latest/ #
################################################################################
# CHOOSE ONE: False, True
# DEFAULT: False
# HINT: Set to true if you redirect http to https
# REMEMBER_COOKIE_SECURE=
################################################################################
# Flask-Mail #
# https://pythonhosted.org/Flask-Mail/ #
################################################################################
# EXAMPLE: nopaque Admin <nopaque@example.com>
MAIL_DEFAULT_SENDER=
MAIL_PASSWORD=
# EXAMPLE: smtp.example.com
MAIL_SERVER=
# EXAMPLE: 587
MAIL_PORT=
# CHOOSE ONE: False, True
# DEFAULT: False
# MAIL_USE_SSL=
# CHOOSE ONE: False, True
# DEFAULT: False
# MAIL_USE_TLS=
# EXAMPLE: nopaque@example.com
MAIL_USERNAME=
################################################################################
# Flask-SQLAlchemy #
# https://flask-sqlalchemy.palletsprojects.com/en/2.x/config/ #
################################################################################
# DEFAULT: 'sqlite:///<nopaque-basedir>/data.sqlite'
# NOTE: Use `.` as <nopaque-basedir>,
# Don't use a SQLite database when using Docker
# SQLALCHEMY_DATABASE_URI=
################################################################################
# nopaque #
################################################################################
# An account is registered with this email adress gets automatically assigned
# the administrator role.
# EXAMPLE: admin.nopaque@example.com
NOPAQUE_ADMIN=
# DEFAULT: /mnt/nopaque
# NOTE: This must be a network share and it must be available on all Docker
# Swarm nodes
# NOPAQUE_DATA_DIR=
# CHOOSE ONE: False, True
# DEFAULT: True
# NOPAQUE_IS_PRIMARY_INSTANCE=
# transport://[userid:password]@hostname[:port]/[virtual_host]
NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI=
# NOTE: Get these from the nopaque development team
NOPAQUE_DOCKER_REGISTRY_USERNAME=
NOPAQUE_DOCKER_REGISTRY_PASSWORD=
# DEFAULT: %Y-%m-%d %H:%M:%S
# NOPAQUE_LOG_DATE_FORMAT=
# DEFAULT: [%(asctime)s] %(levelname)s in %(pathname)s (function: %(funcName)s, line: %(lineno)d): %(message)s
# NOPAQUE_LOG_FORMAT=
# DEFAULT: INFO
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
# NOPAQUE_LOG_LEVEL=
# CHOOSE ONE: False, True
# DEFAULT: True
# NOPAQUE_LOG_FILE_ENABLED=
# DEFAULT: <nopaque-basedir>/logs
# NOTE: Use `.` as <nopaque-basedir>
# NOPAQUE_LOG_FILE_DIR=
# DEFAULT: NOPAQUE_LOG_LEVEL
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
# NOPAQUE_LOG_FILE_LEVEL=
# CHOOSE ONE: False, True
# DEFAULT: False
# NOPAQUE_LOG_STDERR_ENABLED=
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
# DEFAULT: NOPAQUE_LOG_LEVEL
# NOPAQUE_LOG_STDERR_LEVEL=
# CHOOSE ONE: False, True
# DEFAULT: False
# HINT: Set this to True only if you are using a proxy in front of nopaque
# NOPAQUE_PROXY_FIX_ENABLED=
# DEFAULT: 0
# Number of values to trust for X-Forwarded-For
# NOPAQUE_PROXY_FIX_X_FOR=
# DEFAULT: 0
# Number of values to trust for X-Forwarded-Host
# NOPAQUE_PROXY_FIX_X_HOST=
# DEFAULT: 0
# Number of values to trust for X-Forwarded-Port
# NOPAQUE_PROXY_FIX_X_PORT=
# DEFAULT: 0
# Number of values to trust for X-Forwarded-Prefix
# NOPAQUE_PROXY_FIX_X_PREFIX=
# DEFAULT: 0
# Number of values to trust for X-Forwarded-Proto
# NOPAQUE_PROXY_FIX_X_PROTO=
# CHOOSE ONE: False, True
# DEFAULT: False
# NOPAQUE_TRANSKRIBUS_ENABLED=
# READ-COOP account data: https://readcoop.eu/
# NOPAQUE_READCOOP_USERNAME=
# NOPAQUE_READCOOP_PASSWORD=

View File

@ -1 +0,0 @@
FLASK_APP=nopaque.py

170
.gitignore vendored
View File

@ -1,170 +0,0 @@
# nopaque specifics
app/static/gen/
data/
docker-compose.override.yml
logs/
!logs/dummy
*.env
*.pjentsch-testing
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

View File

@ -1,7 +0,0 @@
{
"recommendations": [
"samuelcolvin.jinjahtml",
"ms-azuretools.vscode-docker",
"ms-python.python"
]
}

23
.vscode/settings.json vendored
View File

@ -1,23 +0,0 @@
{
"editor.rulers": [79],
"files.insertFinalNewline": true,
"python.terminal.activateEnvironment": false,
"[css]": {
"editor.tabSize": 2
},
"[scss]": {
"editor.tabSize": 2
},
"[html]": {
"editor.tabSize": 2
},
"[javascript]": {
"editor.tabSize": 2
},
"[jinja-html]": {
"editor.tabSize": 2
},
"[jinja-js]": {
"editor.tabSize": 2
}
}

View File

@ -1,50 +0,0 @@
FROM python:3.8.10-slim-buster
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
ARG DOCKER_GID
ARG UID
ARG GID
ENV LANG="C.UTF-8"
ENV PYTHONDONTWRITEBYTECODE="1"
ENV PYTHONUNBUFFERED="1"
RUN apt-get update \
&& apt-get install --no-install-recommends --yes \
build-essential \
libpq-dev \
&& rm --recursive /var/lib/apt/lists/*
RUN groupadd --gid "${DOCKER_GID}" docker \
&& groupadd --gid "${GID}" nopaque \
&& useradd --create-home --gid nopaque --groups "${DOCKER_GID}" --no-log-init --uid "${UID}" nopaque
USER nopaque
WORKDIR /home/nopaque
ENV PYTHON3_VENV_PATH="/home/nopaque/venv"
RUN python3 -m venv "${PYTHON3_VENV_PATH}"
ENV PATH="${PYTHON3_VENV_PATH}/bin:${PATH}"
COPY --chown=nopaque:nopaque requirements.txt .
RUN python3 -m pip install --requirement requirements.txt \
&& rm requirements.txt
COPY --chown=nopaque:nopaque app app
COPY --chown=nopaque:nopaque migrations migrations
COPY --chown=nopaque:nopaque tests tests
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py ./
EXPOSE 5000
ENTRYPOINT ["./boot.sh"]

21
LICENSE
View File

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2021 Bielefeld University - CRC 1288 - INF
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,62 +0,0 @@
# nopaque
nopaque bundles various tools and services that provide humanities scholars with DH methods and thus can support their various individual research processes. Using nopaque, researchers can subject digitized sources to Optical Character Recognition (OCR). The resulting text files can then be used as a data basis for Natural Language Processing (NLP). The texts are automatically subjected to various linguistic annotations. The data processed via NLP can then be summarized in the web application as corpora and analyzed by means of an information retrieval system through complex search queries. The range of functions of the web application will be successively extended according to the needs of the researchers.
## Prerequisites and requirements
1. Install docker for your system. Following the official instructions.
2. Install docker-compose. Following the official instructions.
## Configuration and startup
### **Create Docker swarm**
The generated computational workload is handled by a [Docker](https://docs.docker.com/) swarm. A swarm is a group of machines that are running Docker and joined into a cluster. It consists out of two different kinds of members, manager and worker nodes. The swarm setup process is described best in the [Docker documentation](https://docs.docker.com/engine/swarm/swarm-tutorial/).
### **Create network storage**
A shared network space is necessary so that all swarm members have access to all the data. To achieve this a [samba](https://www.samba.org/) share can be used.
You can create a samba share by using [this](https://hub.docker.com/r/dperson/samba/) Docker image.
``` bash
# Mount the Samba share on all swarm nodes (managers and workers)
username@hostname:~$ sudo mkdir /mnt/nopaque
username@hostname:~$ sudo mount --types cifs --options gid=${USER},password=nopaque,uid=${USER},user=nopaque,vers=3.0 //<SAMBA-SERVER-IP>/<SAMBA-SHARE-NAME> /mnt/nopaque
```
### **Download, configure and build nopaque**
``` bash
# Clone the nopaque repository
username@hostname:~$ git clone https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
# Create data directories
username@hostname:~$ mkdir data/{db,logs,mq}
username@hostname:~$ cp db.env.tpl db.env
username@hostname:~$ cp .env.tpl .env
# Fill out the variables within these files.
username@hostname:~$ <YOUR EDITOR> db.env
username@hostname:~$ <YOUR EDITOR> .env
# Create docker-compose.override.yml file
username@hostname:~$ touch docker-compose.override.yml
# Tweak the docker-compose.override.yml to satisfy your needs. (You can find examples inside the docker-compose directory)
username@hostname:~$ <YOUR EDITOR> docker-compose.override.yml
# Build docker images
username@hostname:~$ docker-compose build
```
### Start your instance
``` bash
# For background execution add the -d flag
docker-compose up
# To scale your app use the following command after starting it normally
docker-compose \
-f docker-compose.yml \
-f docker-compose.override.yml \
-f docker-compose.scale.yml \
up \
-d \
--no-recreate \
--scale nopaque=<NUM_INSTANCES>
```

View File

@ -1,231 +0,0 @@
- title: 'Catalan'
description: 'Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ca_core_news_md-3.2.0/ca_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ca_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'ca_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.2.0/de_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'de_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.2.0/el_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/el_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'el_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'English'
description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.2.0/en_core_web_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.2.0'
publishing_year: 2021
pipeline_name: 'en_core_web_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Spanish'
description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.2.0/es_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/es_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'es_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'French'
description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.2.0/fr_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/fr_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'fr_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Italian'
description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.2.0/it_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/it_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'it_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Polish'
description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.2.0/pl_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/pl_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'pl_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Russian'
description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.2.0/ru_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'ru_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Chinese'
description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.2.0/zh_core_web_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.2.0'
publishing_year: 2021
pipeline_name: 'zh_core_web_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Catalan'
description: 'Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ca_core_news_md-3.4.0/ca_core_news_md-3.4.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ca_core_news_md-3.4.0'
publishing_year: 2022
pipeline_name: 'ca_core_news_md'
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.4.0'
publishing_year: 2022
pipeline_name: 'de_core_news_md'
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/el_core_news_md-3.4.0'
publishing_year: 2022
pipeline_name: 'el_core_news_md'
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'English'
description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.4.1'
publishing_year: 2022
pipeline_name: 'en_core_web_md'
version: '3.4.1'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Spanish'
description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/es_core_news_md-3.4.0'
publishing_year: 2022
pipeline_name: 'es_core_news_md'
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'French'
description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/fr_core_news_md-3.4.0'
publishing_year: 2022
pipeline_name: 'fr_core_news_md'
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Italian'
description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner'
url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/it_core_news_md-3.4.0'
publishing_year: 2022
pipeline_name: 'it_core_news_md'
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Polish'
description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/pl_core_news_md-3.4.0'
publishing_year: 2022
pipeline_name: 'pl_core_news_md'
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Russian'
description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.4.0'
publishing_year: 2022
pipeline_name: 'ru_core_news_md'
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Chinese'
description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.4.0'
publishing_year: 2022
pipeline_name: 'zh_core_web_md'
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'

File diff suppressed because it is too large Load Diff

View File

@ -1,103 +0,0 @@
from apifairy import APIFairy
from config import Config
from docker import DockerClient
from flask import Flask
from flask_apscheduler import APScheduler
from flask_assets import Environment
from flask_breadcrumbs import Breadcrumbs, default_breadcrumb_root
from flask_login import LoginManager
from flask_mail import Mail
from flask_marshmallow import Marshmallow
from flask_migrate import Migrate
from flask_paranoid import Paranoid
from flask_socketio import SocketIO
from flask_sqlalchemy import SQLAlchemy
from flask_hashids import Hashids
from werkzeug.exceptions import HTTPException
apifairy = APIFairy()
assets = Environment()
breadcrumbs = Breadcrumbs()
db = SQLAlchemy()
docker_client = DockerClient()
hashids = Hashids()
login = LoginManager()
login.login_view = 'auth.login'
login.login_message = 'Please log in to access this page.'
ma = Marshmallow()
mail = Mail()
migrate = Migrate(compare_type=True)
paranoid = Paranoid()
paranoid.redirect_view = '/'
scheduler = APScheduler()
socketio = SocketIO()
def create_app(config: Config = Config) -> Flask:
''' Creates an initialized Flask (WSGI Application) object. '''
app = Flask(__name__)
app.config.from_object(config)
config.init_app(app)
docker_client.login(
app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
registry=app.config['NOPAQUE_DOCKER_REGISTRY']
)
apifairy.init_app(app)
assets.init_app(app)
breadcrumbs.init_app(app)
db.init_app(app)
hashids.init_app(app)
login.init_app(app)
ma.init_app(app)
mail.init_app(app)
migrate.init_app(app, db)
paranoid.init_app(app)
scheduler.init_app(app)
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
from .admin import bp as admin_blueprint
default_breadcrumb_root(admin_blueprint, '.admin')
app.register_blueprint(admin_blueprint, url_prefix='/admin')
from .api import bp as api_blueprint
app.register_blueprint(api_blueprint, url_prefix='/api')
from .auth import bp as auth_blueprint
default_breadcrumb_root(auth_blueprint, '.')
app.register_blueprint(auth_blueprint)
from .contributions import bp as contributions_blueprint
default_breadcrumb_root(contributions_blueprint, '.contributions')
app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
from .corpora import bp as corpora_blueprint
default_breadcrumb_root(corpora_blueprint, '.corpora')
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
from .errors import bp as errors_bp
app.register_blueprint(errors_bp)
from .jobs import bp as jobs_blueprint
default_breadcrumb_root(jobs_blueprint, '.jobs')
app.register_blueprint(jobs_blueprint, url_prefix='/jobs')
from .main import bp as main_blueprint
default_breadcrumb_root(main_blueprint, '.')
app.register_blueprint(main_blueprint, cli_group=None)
from .services import bp as services_blueprint
default_breadcrumb_root(services_blueprint, '.services')
app.register_blueprint(services_blueprint, url_prefix='/services')
from .settings import bp as settings_blueprint
default_breadcrumb_root(settings_blueprint, '.settings')
app.register_blueprint(settings_blueprint, url_prefix='/settings')
from .users import bp as users_blueprint
default_breadcrumb_root(users_blueprint, '.users')
app.register_blueprint(users_blueprint, url_prefix='/users')
return app

View File

@ -1,20 +0,0 @@
from flask import Blueprint
from flask_login import login_required
from app.decorators import admin_required
bp = Blueprint('admin', __name__)
@bp.before_request
@login_required
@admin_required
def before_request():
'''
Ensures that the routes in this package can be visited only by users with
administrator privileges (login_required and admin_required).
'''
pass
from . import json_routes, routes

View File

@ -1,16 +0,0 @@
from flask_wtf import FlaskForm
from wtforms import SelectField, SubmitField
from app.models import Role
class UpdateUserForm(FlaskForm):
role = SelectField('Role')
submit = SubmitField()
def __init__(self, user, *args, **kwargs):
if 'data' not in kwargs:
kwargs['data'] = {'role': user.role.hashid}
if 'prefix' not in kwargs:
kwargs['prefix'] = 'update-user-form'
super().__init__(*args, **kwargs)
self.role.choices = [(x.hashid, x.name) for x in Role.query.all()]

View File

@ -1,23 +0,0 @@
from flask import abort, request
from app import db
from app.decorators import content_negotiation
from app.models import User
from . import bp
@bp.route('/users/<hashid:user_id>/confirmed', methods=['PUT'])
@content_negotiation(consumes='application/json', produces='application/json')
def update_user_role(user_id):
confirmed = request.json
if not isinstance(confirmed, bool):
abort(400)
user = User.query.get_or_404(user_id)
user.confirmed = confirmed
db.session.commit()
response_data = {
'message': (
f'User "{user.username}" is now '
f'{"confirmed" if confirmed else "unconfirmed"}'
)
}
return response_data, 200

View File

@ -1,146 +0,0 @@
from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from app import db, hashids
from app.models import Avatar, Corpus, Role, User
from app.users.settings.forms import (
UpdateAvatarForm,
UpdatePasswordForm,
UpdateNotificationsForm,
UpdateAccountInformationForm,
UpdateProfileInformationForm
)
from . import bp
from .forms import UpdateUserForm
from app.users.utils import (
user_endpoint_arguments_constructor as user_eac,
user_dynamic_list_constructor as user_dlc
)
@bp.route('')
@register_breadcrumb(bp, '.', '<i class="material-icons left">admin_panel_settings</i>Administration')
def admin():
return render_template(
'admin/admin.html.j2',
title='Administration'
)
@bp.route('/corpora')
@register_breadcrumb(bp, '.corpora', 'Corpora')
def corpora():
corpora = Corpus.query.all()
return render_template(
'admin/corpora.html.j2',
title='Corpora',
corpora=corpora
)
@bp.route('/users')
@register_breadcrumb(bp, '.users', '<i class="material-icons left">group</i>Users')
def users():
users = User.query.all()
return render_template(
'admin/users.html.j2',
title='Users',
users=users
)
@bp.route('/users/<hashid:user_id>')
@register_breadcrumb(bp, '.users.entity', '', dynamic_list_constructor=user_dlc)
def user(user_id):
user = User.query.get_or_404(user_id)
corpora = Corpus.query.filter(Corpus.user == user).all()
return render_template(
'admin/user.html.j2',
title=user.username,
user=user,
corpora=corpora
)
@bp.route('/users/<hashid:user_id>/settings', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.users.entity.settings', '<i class="material-icons left">settings</i>Settings')
def user_settings(user_id):
user = User.query.get_or_404(user_id)
update_account_information_form = UpdateAccountInformationForm(user)
update_profile_information_form = UpdateProfileInformationForm(user)
update_avatar_form = UpdateAvatarForm()
update_password_form = UpdatePasswordForm(user)
update_notifications_form = UpdateNotificationsForm(user)
update_user_form = UpdateUserForm(user)
# region handle update profile information form
if update_profile_information_form.submit.data and update_profile_information_form.validate():
user.about_me = update_profile_information_form.about_me.data
user.location = update_profile_information_form.location.data
user.organization = update_profile_information_form.organization.data
user.website = update_profile_information_form.website.data
user.full_name = update_profile_information_form.full_name.data
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update profile information form
# region handle update avatar form
if update_avatar_form.submit.data and update_avatar_form.validate():
try:
Avatar.create(
update_avatar_form.avatar.data,
user=user
)
except (AttributeError, OSError):
abort(500)
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update avatar form
# region handle update account information form
if update_account_information_form.submit.data and update_account_information_form.validate():
user.email = update_account_information_form.email.data
user.username = update_account_information_form.username.data
db.session.commit()
flash('Profile settings updated')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update account information form
# region handle update password form
if update_password_form.submit.data and update_password_form.validate():
user.password = update_password_form.new_password.data
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update password form
# region handle update notifications form
if update_notifications_form.submit.data and update_notifications_form.validate():
user.setting_job_status_mail_notification_level = \
update_notifications_form.job_status_mail_notification_level.data
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update notifications form
# region handle update user form
if update_user_form.submit.data and update_user_form.validate():
role_id = hashids.decode(update_user_form.role.data)
user.role = Role.query.get(role_id)
db.session.commit()
flash('Your changes have been saved')
return redirect(url_for('.user_settings', user_id=user.id))
# endregion handle update user form
return render_template(
'admin/user_settings.html.j2',
title='Settings',
update_account_information_form=update_account_information_form,
update_avatar_form=update_avatar_form,
update_notifications_form=update_notifications_form,
update_password_form=update_password_form,
update_profile_information_form=update_profile_information_form,
update_user_form=update_user_form,
user=user
)

View File

@ -1,14 +0,0 @@
from flask import Blueprint
bp = Blueprint('api', __name__)
from .tokens import bp as tokens_blueprint
bp.register_blueprint(tokens_blueprint, url_prefix='/tokens')
from .users import bp as users_blueprint
bp.register_blueprint(users_blueprint, url_prefix='/users')
from .jobs import bp as jobs_blueprint
bp.register_blueprint(jobs_blueprint, url_prefix='/jobs')

View File

@ -1,48 +0,0 @@
from flask_httpauth import HTTPBasicAuth, HTTPTokenAuth
from werkzeug.exceptions import Forbidden, Unauthorized
from app.models import User
basic_auth = HTTPBasicAuth()
token_auth = HTTPTokenAuth()
auth_error_responses = {
Unauthorized.code: Unauthorized.description,
Forbidden.code: Forbidden.description
}
@basic_auth.verify_password
def verify_password(email_or_username, password):
user = User.query.filter((User.email == email_or_username.lower()) | (User.username == email_or_username)).first()
if user is not None and user.verify_password(password):
return user
@basic_auth.error_handler
def basic_auth_error(status):
error = (Forbidden if status == 403 else Unauthorized)()
return {
'code': error.code,
'message': error.name,
'description': error.description,
}, error.code, {'WWW-Authenticate': 'Form'}
@token_auth.verify_token
def verify_token(token):
return User.verify_access_token(token) if token else None
@token_auth.error_handler
def token_auth_error(status):
error = (Forbidden if status == 403 else Unauthorized)()
return {
'code': error.code,
'message': error.name,
'description': error.description,
}, error.code
@basic_auth.get_user_roles
@token_auth.get_user_roles
def get_user_roles(user):
return [user.role.name]

View File

@ -1,102 +0,0 @@
from apifairy import authenticate, response
from apifairy.decorators import body, other_responses
from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError
from app import db, hashids
from app.models import Job, JobInput, JobStatus, TesseractOCRPipelineModel
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRPipelineModelSchema
from .auth import auth_error_responses, token_auth
bp = Blueprint('jobs', __name__)
job_schema = JobSchema()
jobs_schema = JobSchema(many=True)
spacy_nlp_pipeline_job_schema = SpaCyNLPPipelineJobSchema()
tesseract_ocr_pipeline_job_schema = TesseractOCRPipelineJobSchema()
tesseract_ocr_pipeline_model_schema = TesseractOCRPipelineModelSchema()
tesseract_ocr_pipeline_models_schema = TesseractOCRPipelineModelSchema(many=True)
@bp.route('', methods=['GET'])
@authenticate(token_auth, role='Administrator')
@response(jobs_schema)
@other_responses(auth_error_responses)
def get_jobs():
"""Get all jobs"""
return Job.query.all()
@bp.route('/tesseract-ocr-pipeline', methods=['POST'])
@authenticate(token_auth)
@body(tesseract_ocr_pipeline_job_schema, location='form')
@response(job_schema)
@other_responses({**auth_error_responses, InternalServerError.code: InternalServerError.description})
def create_tesseract_ocr_pipeline_job(args):
"""Create a new Tesseract OCR Pipeline job"""
current_user = token_auth.current_user()
try:
job = Job.create(
title=args['title'],
description=args['description'],
service='tesseract-ocr-pipeline',
service_args={
'model': hashids.decode(args['model_id']),
'binarization': args['binarization']
},
service_version=args['service_version'],
user=current_user
)
except OSError:
abort(500)
try:
JobInput.create(args['pdf'], job=job)
except OSError:
abort(500)
job.status = JobStatus.SUBMITTED
db.session.commit()
return job, 201
@bp.route('/tesseract-ocr-pipeline/models', methods=['GET'])
@authenticate(token_auth)
@response(tesseract_ocr_pipeline_models_schema)
@other_responses(auth_error_responses)
def get_tesseract_ocr_models():
"""Get all Tesseract OCR Models"""
return TesseractOCRPipelineModel.query.all()
@bp.route('/<hashid:job_id>', methods=['DELETE'])
@authenticate(token_auth)
@response(EmptySchema, status_code=204)
@other_responses(auth_error_responses)
def delete_job(job_id):
"""Delete a job by id"""
current_user = token_auth.current_user()
job = Job.query.get(job_id)
if job is None:
abort(404)
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
try:
job.delete()
except OSError as e:
abort(500)
db.session.commit()
return {}, 204
@bp.route('/<hashid:job_id>', methods=['GET'])
@authenticate(token_auth)
@response(job_schema)
@other_responses(auth_error_responses)
def get_job(job_id):
"""Get a job by id"""
current_user = token_auth.current_user()
job = Job.query.get(job_id)
if job is None:
abort(404)
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
return job

View File

@ -1,173 +0,0 @@
from apifairy.fields import FileField
from marshmallow import validate, validates, ValidationError
from marshmallow.decorators import post_dump
from app import ma
from app.models import (
Job,
JobStatus,
TesseractOCRPipelineModel,
Token,
User,
UserSettingJobStatusMailNotificationLevel
)
from app.services import SERVICES
class EmptySchema(ma.Schema):
pass
class TokenSchema(ma.SQLAlchemySchema):
class Meta:
model = Token
ordered = True
access_token = ma.String(required=True)
refresh_token = ma.String()
class TesseractOCRPipelineModelSchema(ma.SQLAlchemySchema):
class Meta:
model = TesseractOCRPipelineModel
ordered = True
hashid = ma.String(data_key='id', dump_only=True)
user_hashid = ma.String(data_key='user_id', dump_only=True)
title = ma.auto_field(
required=True,
validate=validate.Length(min=1, max=64)
)
description = ma.auto_field(
required=True,
validate=validate.Length(min=1, max=255)
)
version = ma.String(
required=True,
validate=validate.Length(min=1, max=16)
)
compatible_service_versions = ma.List(
ma.String(required=True, validate=validate.Length(min=1, max=16)),
required=True,
validate=validate.Length(min=1, max=255)
)
publisher = ma.String(
required=True,
validate=validate.Length(min=1, max=128)
)
publisher_url = ma.String(
validate=[validate.URL(), validate.Length(min=1, max=512)]
)
publishing_url = ma.String(
required=True,
validate=[validate.URL(), validate.Length(min=1, max=512)]
)
publishing_year = ma.Int(
required=True
)
is_public = ma.Boolean(required=True)
class JobSchema(ma.SQLAlchemySchema):
class Meta:
model = Job
ordered = True
hashid = ma.String(data_key='id', dump_only=True)
user_hashid = ma.String(data_key='user_id', dump_only=True)
title = ma.auto_field(
required=True,
validate=validate.Length(min=1, max=32)
)
description = ma.auto_field(
required=True,
validate=validate.Length(min=1, max=255)
)
creation_date = ma.auto_field(dump_only=True)
end_date = ma.auto_field(dump_only=True)
service = ma.String(
dump_only=True,
validate=validate.OneOf(SERVICES.keys())
)
service_args = ma.Dict(dump_only=True)
service_version = ma.String(dump_only=True)
status = ma.String(
dump_only=True,
validate=validate.OneOf(list(JobStatus.__members__.keys()))
)
@post_dump(pass_original=True)
def post_dump(self, serialized_job, job, **kwargs):
serialized_job['status'] = job.status.name
return serialized_job
class TesseractOCRPipelineJobSchema(JobSchema):
binarization = ma.Boolean(load_only=True, missing=False)
model_id = ma.String(required=True, load_only=True)
service_version = ma.auto_field(
required=True,
validate=[validate.Length(min=1, max=16), validate.OneOf(list(SERVICES['tesseract-ocr-pipeline']['versions'].keys()))]
)
pdf = FileField()
@validates('pdf')
def validate_pdf(self, value):
if value.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
class SpaCyNLPPipelineJobSchema(JobSchema):
binarization = ma.Boolean(load_only=True, missing=False)
model_id = ma.String(required=True, load_only=True)
service_version = ma.auto_field(
required=True,
validate=[validate.Length(min=1, max=16), validate.OneOf(list(SERVICES['tesseract-ocr-pipeline']['versions'].keys()))]
)
txt = FileField(required=True)
@validates('txt')
def validate_txt(self, value):
if value.mimetype != 'text/plain':
raise ValidationError('Plain text files only!')
class UserSchema(ma.SQLAlchemySchema):
class Meta:
model = User
ordered = True
hashid = ma.String(data_key='id', dump_only=True)
username = ma.auto_field(
validate=[
validate.Length(min=1, max=64),
validate.Regexp(
User.username_pattern,
error='Usernames must have only letters, numbers, dots or underscores'
)
]
)
email = ma.auto_field(validate=validate.Email())
member_since = ma.auto_field(dump_only=True)
last_seen = ma.auto_field(dump_only=True)
password = ma.String(load_only=True)
last_seen = ma.auto_field(dump_only=True)
setting_job_status_mail_notification_level = ma.String(
validate=validate.OneOf(list(UserSettingJobStatusMailNotificationLevel.__members__.keys()))
)
@validates('email')
def validate_email(self, email):
if User.query.filter(User.email == email).first():
raise ValidationError('Email already registered')
@validates('username')
def validate_username(self, username):
if User.query.filter(User.username == username).first():
raise ValidationError('Username already in use')
@post_dump(pass_original=True)
def post_dump(self, serialized_user, user, **kwargs):
serialized_user['setting_job_status_mail_notification_level'] = \
user.setting_job_status_mail_notification_level.name
return serialized_user

View File

@ -1,58 +0,0 @@
from apifairy import authenticate, body, response, other_responses
from flask import Blueprint, request, abort
from app import db
from app.models import Token, User
from .auth import basic_auth
from .schemas import EmptySchema, TokenSchema
bp = Blueprint('tokens', __name__)
token_schema = TokenSchema()
@bp.route('', methods=['DELETE'])
@response(EmptySchema, status_code=204, description='Token revoked')
@other_responses({401: 'Invalid access token'})
def delete_token():
"""Revoke an access token"""
access_token = request.headers['Authorization'].split()[1]
token = Token.query.filter(Token.access_token == access_token).first()
if token is None: # pragma: no cover
abort(401)
token.expire()
db.session.commit()
return {}
@bp.route('', methods=['POST'])
@authenticate(basic_auth)
@response(token_schema)
@other_responses({401: 'Invalid username or password'})
def create_token():
"""Create new access and refresh tokens"""
user = basic_auth.current_user()
token = user.generate_auth_token()
db.session.add(token)
Token.clean() # keep token table clean of old tokens
db.session.commit()
return token, 200
@bp.route('', methods=['PUT'])
@body(token_schema)
@response(token_schema, description='Newly issued access and refresh tokens')
@other_responses({401: 'Invalid access or refresh token'})
def refresh_token(args):
"""Refresh an access token"""
access_token = args.get('access_token')
refresh_token = args.get('refresh_token')
if access_token is None or refresh_token is None:
abort(401)
token = User.verify_refresh_token(refresh_token, access_token)
if token is None:
abort(401)
token.expire()
new_token = token.user.generate_auth_token()
db.session.add_all([token, new_token])
db.session.commit()
return new_token, 200

View File

@ -1,99 +0,0 @@
from apifairy import authenticate, body, response
from apifairy.decorators import other_responses
from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError
from app import db
from app.email import create_message, send
from app.models import User
from .schemas import EmptySchema, UserSchema
from .auth import auth_error_responses, token_auth
bp = Blueprint('users', __name__)
user_schema = UserSchema()
users_schema = UserSchema(many=True)
@bp.route('', methods=['GET'])
@authenticate(token_auth, role='Administrator')
@response(users_schema)
@other_responses(auth_error_responses)
def get_users():
"""Get all users"""
return User.query.all()
@bp.route('', methods=['POST'])
@body(user_schema)
@response(user_schema, 201)
@other_responses({InternalServerError.code: InternalServerError.description})
def create_user(args):
"""Create a new user"""
try:
user = User.create(
email=args['email'].lower(),
password=args['password'],
username=args['username']
)
except OSError:
abort(500)
msg = create_message(
user.email,
'Confirm Your Account',
'auth/email/confirm',
token=user.generate_confirm_token(),
user=user
)
send(msg)
db.session.commit()
return user, 201
@bp.route('/<hashid:user_id>', methods=['DELETE'])
@authenticate(token_auth)
@response(EmptySchema, status_code=204)
@other_responses(auth_error_responses)
def delete_user(user_id):
"""Delete a user by id"""
current_user = token_auth.current_user()
user = User.query.get(user_id)
if user is None:
abort(404)
if not (user == current_user or current_user.is_administrator()):
abort(403)
user.delete()
db.session.commit()
return {}, 204
@bp.route('/<hashid:user_id>', methods=['GET'])
@authenticate(token_auth)
@response(user_schema)
@other_responses(auth_error_responses)
@other_responses({404: 'User not found'})
def get_user(user_id):
"""Retrieve a user by id"""
current_user = token_auth.current_user()
user = User.query.get(user_id)
if user is None:
abort(404)
if not (user == current_user or current_user.is_administrator()):
abort(403)
return user
@bp.route('/<username>', methods=['GET'])
@authenticate(token_auth)
@response(user_schema)
@other_responses(auth_error_responses)
@other_responses({404: 'User not found'})
def get_user_by_username(username):
"""Retrieve a user by username"""
current_user = token_auth.current_user()
user = User.query.filter(User.username == username).first()
if user is None:
abort(404)
if not (user == current_user or current_user.is_administrator()):
abort(403)
return user

View File

@ -1,5 +0,0 @@
from flask import Blueprint
bp = Blueprint('auth', __name__)
from . import routes

View File

@ -1,108 +0,0 @@
from flask_wtf import FlaskForm
from wtforms import (
BooleanField,
PasswordField,
StringField,
SubmitField,
ValidationError
)
from wtforms.validators import InputRequired, Email, EqualTo, Length, Regexp
from app.models import User
class RegistrationForm(FlaskForm):
email = StringField(
'Email',
validators=[InputRequired(), Email(), Length(max=254)]
)
username = StringField(
'Username',
validators=[
InputRequired(),
Length(max=64),
Regexp(
User.username_pattern,
message=(
'Usernames must have only letters, numbers, dots or '
'underscores'
)
)
]
)
password = PasswordField(
'Password',
validators=[
InputRequired(),
EqualTo('password_2', message='Passwords must match')
]
)
password_2 = PasswordField(
'Password confirmation',
validators=[
InputRequired(),
EqualTo('password', message='Passwords must match')
]
)
terms_of_use_accepted = BooleanField(
'I have read and accept the terms of use',
validators=[InputRequired()]
)
submit = SubmitField()
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'registration-form'
super().__init__(*args, **kwargs)
def validate_email(self, field):
if User.query.filter_by(email=field.data.lower()).first():
raise ValidationError('Email already registered')
def validate_username(self, field):
if User.query.filter_by(username=field.data).first():
raise ValidationError('Username already in use')
class LoginForm(FlaskForm):
user = StringField('Email or username', validators=[InputRequired()])
password = PasswordField('Password', validators=[InputRequired()])
remember_me = BooleanField('Keep me logged in')
submit = SubmitField()
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'login-form'
super().__init__(*args, **kwargs)
class ResetPasswordRequestForm(FlaskForm):
email = StringField('Email', validators=[InputRequired(), Email()])
submit = SubmitField()
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'reset-password-request-form'
super().__init__(*args, **kwargs)
class ResetPasswordForm(FlaskForm):
password = PasswordField(
'New password',
validators=[
InputRequired(),
EqualTo('password_2', message='Passwords must match')
]
)
password_2 = PasswordField(
'New password confirmation',
validators=[
InputRequired(),
EqualTo('password', message='Passwords must match')
]
)
submit = SubmitField()
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'reset-password-form'
super().__init__(*args, **kwargs)

View File

@ -1,190 +0,0 @@
from flask import abort, flash, redirect, render_template, request, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user, login_user, login_required, logout_user
from app import db
from app.email import create_message, send
from app.models import User
from . import bp
from .forms import (
LoginForm,
ResetPasswordForm,
ResetPasswordRequestForm,
RegistrationForm
)
@bp.before_app_request
def before_request():
"""
Checks if a user is unconfirmed when visiting specific sites. Redirects to
unconfirmed view if user is unconfirmed.
"""
if current_user.is_authenticated:
current_user.ping()
db.session.commit()
if (not current_user.confirmed
and request.endpoint
and request.blueprint != 'auth'
and request.endpoint != 'static'):
return redirect(url_for('auth.unconfirmed'))
@bp.route('/register', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.register', 'Register')
def register():
if current_user.is_authenticated:
return redirect(url_for('main.dashboard'))
form = RegistrationForm()
if form.validate_on_submit():
try:
user = User.create(
email=form.email.data.lower(),
password=form.password.data,
username=form.username.data,
terms_of_use_accepted=form.terms_of_use_accepted.data
)
except OSError:
flash('Internal Server Error', category='error')
abort(500)
flash(f'User "{user.username}" created')
token = user.generate_confirm_token()
msg = create_message(
user.email,
'Confirm Your Account',
'auth/email/confirm',
token=token,
user=user
)
send(msg)
flash('A confirmation email has been sent to you by email')
db.session.commit()
return redirect(url_for('.login'))
return render_template(
'auth/register.html.j2',
title='Register',
form=form
)
@bp.route('/login', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.login', 'Login')
def login():
if current_user.is_authenticated:
return redirect(url_for('main.dashboard'))
form = LoginForm()
if form.validate_on_submit():
user = User.query.filter((User.email == form.user.data.lower()) | (User.username == form.user.data)).first()
if user and user.verify_password(form.password.data):
login_user(user, form.remember_me.data)
next = request.args.get('next')
if next is None or not next.startswith('/'):
next = url_for('main.dashboard')
flash('You have been logged in')
return redirect(next)
flash('Invalid email/username or password', category='error')
return render_template(
'auth/login.html.j2',
title='Log in',
form=form
)
@bp.route('/logout')
@login_required
def logout():
logout_user()
flash('You have been logged out')
return redirect(url_for('main.index'))
@bp.route('/unconfirmed')
@register_breadcrumb(bp, '.unconfirmed', 'Unconfirmed')
@login_required
def unconfirmed():
if current_user.confirmed:
return redirect(url_for('main.dashboard'))
return render_template(
'auth/unconfirmed.html.j2',
title='Unconfirmed'
)
@bp.route('/confirm-request')
@login_required
def confirm_request():
if current_user.confirmed:
return redirect(url_for('main.dashboard'))
token = current_user.generate_confirm_token()
msg = create_message(
current_user.email,
'Confirm Your Account',
'auth/email/confirm',
token=token,
user=current_user
)
send(msg)
flash('A new confirmation email has been sent to you by email')
return redirect(url_for('.unconfirmed'))
@bp.route('/confirm/<token>')
@login_required
def confirm(token):
if current_user.confirmed:
return redirect(url_for('main.dashboard'))
if current_user.confirm(token):
db.session.commit()
flash('You have confirmed your account')
return redirect(url_for('main.dashboard'))
flash('The confirmation link is invalid or has expired', category='error')
return redirect(url_for('.unconfirmed'))
@bp.route('/reset-password-request', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.reset_password_request', 'Password Reset')
def reset_password_request():
if current_user.is_authenticated:
return redirect(url_for('main.dashboard'))
form = ResetPasswordRequestForm()
if form.validate_on_submit():
user = User.query.filter_by(email=form.email.data.lower()).first()
if user is not None:
token = user.generate_reset_password_token()
msg = create_message(
user.email,
'Reset Your Password',
'auth/email/reset_password',
token=token,
user=user
)
send(msg)
flash(
'An email with instructions to reset your password has been sent '
'to you'
)
return redirect(url_for('.login'))
return render_template(
'auth/reset_password_request.html.j2',
title='Password Reset',
form=form
)
@bp.route('/reset-password/<token>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.reset_password', 'Password Reset')
def reset_password(token):
if current_user.is_authenticated:
return redirect(url_for('main.dashboard'))
form = ResetPasswordForm()
if form.validate_on_submit():
if User.reset_password(token, form.password.data):
db.session.commit()
flash('Your password has been updated')
return redirect(url_for('.login'))
return redirect(url_for('main.index'))
return render_template(
'auth/reset_password.html.j2',
title='Password Reset',
form=form,
token=token
)

View File

@ -1,23 +0,0 @@
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('contributions', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import (
routes,
spacy_nlp_pipeline_models,
tesseract_ocr_pipeline_models,
transkribus_htr_pipeline_models
)

View File

@ -1,47 +0,0 @@
from flask_wtf import FlaskForm
from wtforms import (
StringField,
SubmitField,
SelectMultipleField,
IntegerField
)
from wtforms.validators import InputRequired, Length
class ContributionBaseForm(FlaskForm):
title = StringField(
'Title',
validators=[InputRequired(), Length(max=64)]
)
description = StringField(
'Description',
validators=[InputRequired(), Length(max=255)]
)
version = StringField(
'Version',
validators=[InputRequired(), Length(max=16)]
)
publisher = StringField(
'Publisher',
validators=[InputRequired(), Length(max=128)]
)
publisher_url = StringField(
'Publisher URL',
validators=[InputRequired(), Length(max=512)]
)
publishing_url = StringField(
'Publishing URL',
validators=[InputRequired(), Length(max=512)]
)
publishing_year = IntegerField(
'Publishing year',
validators=[InputRequired()]
)
compatible_service_versions = SelectMultipleField(
'Compatible service versions'
)
submit = SubmitField()
class UpdateContributionBaseForm(ContributionBaseForm):
pass

View File

@ -1,9 +0,0 @@
from flask import redirect, url_for
from flask_breadcrumbs import register_breadcrumb
from . import bp
@bp.route('')
@register_breadcrumb(bp, '.', '<i class="material-icons left">new_label</i>My Contributions')
def contributions():
return redirect(url_for('main.dashboard', _anchor='contributions'))

View File

@ -1,2 +0,0 @@
from .. import bp
from . import json_routes, routes

View File

@ -1,48 +0,0 @@
from flask_wtf.file import FileField, FileRequired
from wtforms import StringField, ValidationError
from wtforms.validators import InputRequired, Length
from app.services import SERVICES
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
class CreateSpaCyNLPPipelineModelForm(ContributionBaseForm):
spacy_model_file = FileField(
'File',
validators=[FileRequired()]
)
pipeline_name = StringField(
'Pipeline name',
validators=[InputRequired(), Length(max=64)]
)
def validate_spacy_model_file(self, field):
if not field.data.filename.lower().endswith('.tar.gz'):
raise ValidationError('.tar.gz files only!')
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-spacy-nlp-pipeline-model-form'
super().__init__(*args, **kwargs)
service_manifest = SERVICES['spacy-nlp-pipeline']
self.compatible_service_versions.choices = [('', 'Choose your option')]
self.compatible_service_versions.choices += [
(x, x) for x in service_manifest['versions'].keys()
]
self.compatible_service_versions.default = ''
class UpdateSpaCyNLPPipelineModelForm(UpdateContributionBaseForm):
pipeline_name = StringField(
'Pipeline name',
validators=[InputRequired(), Length(max=64)]
)
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'edit-spacy-nlp-pipeline-model-form'
super().__init__(*args, **kwargs)
service_manifest = SERVICES['spacy-nlp-pipeline']
self.compatible_service_versions.choices = [('', 'Choose your option')]
self.compatible_service_versions.choices += [
(x, x) for x in service_manifest['versions'].keys()
]
self.compatible_service_versions.default = ''

View File

@ -1,52 +0,0 @@
from flask import abort, current_app, request
from flask_login import current_user
from threading import Thread
from app import db
from app.decorators import content_negotiation, permission_required
from app.models import SpaCyNLPPipelineModel
from .. import bp
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
@content_negotiation(produces='application/json')
def delete_spacy_model(spacy_nlp_pipeline_model_id):
def _delete_spacy_model(app, spacy_nlp_pipeline_model_id):
with app.app_context():
snpm = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id)
snpm.delete()
db.session.commit()
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator()):
abort(403)
thread = Thread(
target=_delete_spacy_model,
args=(current_app._get_current_object(), snpm.id)
)
thread.start()
response_data = {
'message': \
f'SpaCy NLP Pipeline Model "{snpm.title}" marked for deletion'
}
return response_data, 202
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>/is_public', methods=['PUT'])
@permission_required('CONTRIBUTE')
@content_negotiation(consumes='application/json', produces='application/json')
def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
is_public = request.json
if not isinstance(is_public, bool):
abort(400)
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator()):
abort(403)
snpm.is_public = is_public
db.session.commit()
response_data = {
'message': (
f'SpaCy NLP Pipeline Model "{snpm.title}"'
f' is now {"public" if is_public else "private"}'
)
}
return response_data, 200

View File

@ -1,77 +0,0 @@
from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
from app import db
from app.models import SpaCyNLPPipelineModel
from . import bp
from .forms import (
CreateSpaCyNLPPipelineModelForm,
UpdateSpaCyNLPPipelineModelForm
)
from .utils import (
spacy_nlp_pipeline_model_dlc as spacy_nlp_pipeline_model_dlc
)
@bp.route('/spacy-nlp-pipeline-models')
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models', 'SpaCy NLP Pipeline Models')
def spacy_nlp_pipeline_models():
return render_template(
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_models.html.j2',
title='SpaCy NLP Pipeline Models'
)
@bp.route('/spacy-nlp-pipeline-models/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.create', 'Create')
def create_spacy_nlp_pipeline_model():
form = CreateSpaCyNLPPipelineModelForm()
if form.is_submitted():
if not form.validate():
return {'errors': form.errors}, 400
try:
snpm = SpaCyNLPPipelineModel.create(
form.spacy_model_file.data,
compatible_service_versions=form.compatible_service_versions.data,
description=form.description.data,
pipeline_name=form.pipeline_name.data,
publisher=form.publisher.data,
publisher_url=form.publisher_url.data,
publishing_url=form.publishing_url.data,
publishing_year=form.publishing_year.data,
is_public=False,
title=form.title.data,
version=form.version.data,
user=current_user
)
except OSError:
abort(500)
db.session.commit()
flash(f'SpaCy NLP Pipeline model "{snpm.title}" created')
return {}, 201, {'Location': url_for('.spacy_nlp_pipeline_models')}
return render_template(
'contributions/spacy_nlp_pipeline_models/create.html.j2',
title='Create SpaCy NLP Pipeline Model',
form=form
)
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.entity', '', dynamic_list_constructor=spacy_nlp_pipeline_model_dlc)
def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id):
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (snpm.user == current_user or current_user.is_administrator()):
abort(403)
form = UpdateSpaCyNLPPipelineModelForm(data=snpm.to_json_serializeable())
if form.validate_on_submit():
form.populate_obj(snpm)
if db.session.is_modified(snpm):
flash(f'SpaCy NLP Pipeline model "{snpm.title}" updated')
db.session.commit()
return redirect(url_for('.spacy_nlp_pipeline_models'))
return render_template(
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_model.html.j2',
title=f'{snpm.title} {snpm.version}',
form=form,
spacy_nlp_pipeline_model=snpm
)

View File

@ -1,13 +0,0 @@
from flask import request, url_for
from app.models import SpaCyNLPPipelineModel
def spacy_nlp_pipeline_model_dlc():
snpm_id = request.view_args['spacy_nlp_pipeline_model_id']
snpm = SpaCyNLPPipelineModel.query.get_or_404(snpm_id)
return [
{
'text': f'{snpm.title} {snpm.version}',
'url': url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=snpm_id)
}
]

View File

@ -1,2 +0,0 @@
from .. import bp
from . import json_routes, routes

View File

@ -1,39 +0,0 @@
from flask_wtf.file import FileField, FileRequired
from wtforms import ValidationError
from app.services import SERVICES
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
class CreateTesseractOCRPipelineModelForm(ContributionBaseForm):
tesseract_model_file = FileField(
'File',
validators=[FileRequired()]
)
def validate_tesseract_model_file(self, field):
if not field.data.filename.lower().endswith('.traineddata'):
raise ValidationError('traineddata files only!')
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-tesseract-ocr-pipeline-model-form'
service_manifest = SERVICES['tesseract-ocr-pipeline']
super().__init__(*args, **kwargs)
self.compatible_service_versions.choices = [('', 'Choose your option')]
self.compatible_service_versions.choices += [
(x, x) for x in service_manifest['versions'].keys()
]
self.compatible_service_versions.default = ''
class UpdateTesseractOCRPipelineModelForm(UpdateContributionBaseForm):
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'edit-tesseract-ocr-pipeline-model-form'
service_manifest = SERVICES['tesseract-ocr-pipeline']
super().__init__(*args, **kwargs)
self.compatible_service_versions.choices = [('', 'Choose your option')]
self.compatible_service_versions.choices += [
(x, x) for x in service_manifest['versions'].keys()
]
self.compatible_service_versions.default = ''

View File

@ -1,52 +0,0 @@
from flask import abort, current_app, request
from flask_login import current_user
from threading import Thread
from app import db
from app.decorators import content_negotiation, permission_required
from app.models import TesseractOCRPipelineModel
from . import bp
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
@content_negotiation(produces='application/json')
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
def _delete_tesseract_ocr_pipeline_model(app, tesseract_ocr_pipeline_model_id):
with app.app_context():
topm = TesseractOCRPipelineModel.query.get(tesseract_ocr_pipeline_model_id)
topm.delete()
db.session.commit()
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (topm.user == current_user or current_user.is_administrator()):
abort(403)
thread = Thread(
target=_delete_tesseract_ocr_pipeline_model,
args=(current_app._get_current_object(), topm.id)
)
thread.start()
response_data = {
'message': \
f'Tesseract OCR Pipeline Model "{topm.title}" marked for deletion'
}
return response_data, 202
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>/is_public', methods=['PUT'])
@permission_required('CONTRIBUTE')
@content_negotiation(consumes='application/json', produces='application/json')
def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_id):
is_public = request.json
if not isinstance(is_public, bool):
abort(400)
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (topm.user == current_user or current_user.is_administrator()):
abort(403)
topm.is_public = is_public
db.session.commit()
response_data = {
'message': (
f'Tesseract OCR Pipeline Model "{topm.title}"'
f' is now {"public" if is_public else "private"}'
)
}
return response_data, 200

View File

@ -1,76 +0,0 @@
from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
from app import db
from app.models import TesseractOCRPipelineModel
from . import bp
from .forms import (
CreateTesseractOCRPipelineModelForm,
UpdateTesseractOCRPipelineModelForm
)
from .utils import (
tesseract_ocr_pipeline_model_dlc as tesseract_ocr_pipeline_model_dlc
)
@bp.route('/tesseract-ocr-pipeline-models')
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models', 'Tesseract OCR Pipeline Models')
def tesseract_ocr_pipeline_models():
return render_template(
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_models.html.j2',
title='Tesseract OCR Pipeline Models'
)
@bp.route('/tesseract-ocr-pipeline-models/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.create', 'Create')
def create_tesseract_ocr_pipeline_model():
form = CreateTesseractOCRPipelineModelForm()
if form.is_submitted():
if not form.validate():
return {'errors': form.errors}, 400
try:
topm = TesseractOCRPipelineModel.create(
form.tesseract_model_file.data,
compatible_service_versions=form.compatible_service_versions.data,
description=form.description.data,
publisher=form.publisher.data,
publisher_url=form.publisher_url.data,
publishing_url=form.publishing_url.data,
publishing_year=form.publishing_year.data,
is_public=False,
title=form.title.data,
version=form.version.data,
user=current_user
)
except OSError:
abort(500)
db.session.commit()
flash(f'Tesseract OCR Pipeline model "{topm.title}" created')
return {}, 201, {'Location': url_for('.tesseract_ocr_pipeline_models')}
return render_template(
'contributions/tesseract_ocr_pipeline_models/create.html.j2',
title='Create Tesseract OCR Pipeline Model',
form=form
)
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.entity', '', dynamic_list_constructor=tesseract_ocr_pipeline_model_dlc)
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (topm.user == current_user or current_user.is_administrator()):
abort(403)
form = UpdateTesseractOCRPipelineModelForm(data=topm.to_json_serializeable())
if form.validate_on_submit():
form.populate_obj(topm)
if db.session.is_modified(topm):
flash(f'Tesseract OCR Pipeline model "{topm.title}" updated')
db.session.commit()
return redirect(url_for('.tesseract_ocr_pipeline_models'))
return render_template(
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_model.html.j2',
title=f'{topm.title} {topm.version}',
form=form,
tesseract_ocr_pipeline_model=topm
)

View File

@ -1,13 +0,0 @@
from flask import request, url_for
from app.models import TesseractOCRPipelineModel
def tesseract_ocr_pipeline_model_dlc():
topm_id = request.view_args['tesseract_ocr_pipeline_model_id']
topm = TesseractOCRPipelineModel.query.get_or_404(topm_id)
return [
{
'text': f'{topm.title} {topm.version}',
'url': url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=topm_id)
}
]

View File

@ -1,2 +0,0 @@
from .. import bp
from . import routes

View File

@ -1,7 +0,0 @@
from flask import abort
from . import bp
@bp.route('/transkribus_htr_pipeline_models')
def transkribus_htr_pipeline_models():
return abort(503)

View File

@ -1,22 +0,0 @@
import click
from . import bp
from .sandpaper import SandpaperConverter
@bp.cli.group('converter')
def converter():
''' Converter commands. '''
pass
@converter.group('sandpaper')
def sandpaper_converter():
''' Sandpaper converter commands. '''
pass
@sandpaper_converter.command('run')
@click.argument('json_db_file')
@click.argument('data_dir')
def run_sandpaper_converter(json_db_file, data_dir):
''' Run the sandpaper converter. '''
sandpaper_converter = SandpaperConverter(json_db_file, data_dir)
sandpaper_converter.run()

View File

@ -1,112 +0,0 @@
from flask import current_app
from app import db
from app.models import User, Corpus, CorpusFile
from datetime import datetime
import json
import os
import shutil
class SandpaperConverter:
def __init__(self, json_db_file, data_dir):
self.json_db_file = json_db_file
self.data_dir = data_dir
def run(self):
with open(self.json_db_file, 'r') as f:
json_db = json.loads(f.read())
for json_user in json_db:
if not json_user['confirmed']:
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
continue
user_dir = os.path.join(self.data_dir, str(json_user['id']))
self.convert_user(json_user, user_dir)
db.session.commit()
def convert_user(self, json_user, user_dir):
current_app.logger.info(f'Create User {json_user["username"]}...')
user = User(
confirmed=json_user['confirmed'],
email=json_user['email'],
last_seen=datetime.fromtimestamp(json_user['last_seen']),
member_since=datetime.fromtimestamp(json_user['member_since']),
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
username=json_user['username']
)
db.session.add(user)
db.session.flush(objects=[user])
db.session.refresh(user)
try:
user.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise Exception('Internal Server Error')
for json_corpus in json_user['corpora'].values():
if not json_corpus['files'].values():
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
continue
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
self.convert_corpus(json_corpus, user, corpus_dir)
current_app.logger.info('Done')
def convert_corpus(self, json_corpus, user, corpus_dir):
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
corpus = Corpus(
user=user,
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
description=json_corpus['description'],
title=json_corpus['title']
)
db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
try:
corpus.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
raise Exception('Internal Server Error')
for json_corpus_file in json_corpus['files'].values():
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
current_app.logger.info('Done')
def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
corpus_file = CorpusFile(
corpus=corpus,
address=json_corpus_file['address'],
author=json_corpus_file['author'],
booktitle=json_corpus_file['booktitle'],
chapter=json_corpus_file['chapter'],
editor=json_corpus_file['editor'],
filename=json_corpus_file['filename'],
institution=json_corpus_file['institution'],
journal=json_corpus_file['journal'],
mimetype='application/vrt+xml',
pages=json_corpus_file['pages'],
publisher=json_corpus_file['publisher'],
publishing_year=json_corpus_file['publishing_year'],
school=json_corpus_file['school'],
title=json_corpus_file['title']
)
db.session.add(corpus_file)
db.session.flush(objects=[corpus_file])
db.session.refresh(corpus_file)
try:
shutil.copy2(
os.path.join(corpus_dir, json_corpus_file['filename']),
corpus_file.path
)
except:
current_app.logger.warning(
'Can not convert corpus file: '
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
' -> '
f'{corpus_file.path}'
)
current_app.logger.info('Done')

View File

@ -1,117 +0,0 @@
from flask import current_app
def normalize_vrt_file(input_file, output_file):
def check_pos_attribute_order(vrt_lines):
# The following orders are possible:
# since 26.02.2019: 'word,lemma,simple_pos,pos,ner'
# since 26.03.2021: 'word,pos,lemma,simple_pos,ner'
# since 27.01.2022: 'word,pos,lemma,simple_pos'
# This Function tries to find out which order we have by looking at the
# number of attributes and the position of the simple_pos attribute
SIMPLE_POS_LABELS = [
'ADJ', 'ADP', 'ADV', 'AUX', 'CONJ',
'DET', 'INTJ', 'NOUN', 'NUM', 'PART',
'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',
'VERB', 'X'
]
for line in vrt_lines:
if line.startswith('<'):
continue
pos_attrs = line.rstrip('\n').split('\t')
num_pos_attrs = len(pos_attrs)
if num_pos_attrs == 4:
if pos_attrs[3] in SIMPLE_POS_LABELS:
return ['word', 'pos', 'lemma', 'simple_pos']
continue
elif num_pos_attrs == 5:
if pos_attrs[2] in SIMPLE_POS_LABELS:
return ['word', 'lemma', 'simple_pos', 'pos', 'ner']
elif pos_attrs[3] in SIMPLE_POS_LABELS:
return ['word', 'pos', 'lemma', 'simple_pos', 'ner']
continue
return None
def check_has_ent_as_s_attr(vrt_lines):
for line in vrt_lines:
if line.startswith('<ent'):
return True
return False
def pos_attrs_to_string_1(pos_attrs):
return f'{pos_attrs[0]}\t{pos_attrs[3]}\t{pos_attrs[1]}\t{pos_attrs[2]}\n'
def pos_attrs_to_string_2(pos_attrs):
return f'{pos_attrs[0]}\t{pos_attrs[1]}\t{pos_attrs[2]}\t{pos_attrs[3]}\n'
current_app.logger.info(f'Converting {input_file}...')
with open(input_file) as f:
input_vrt_lines = f.readlines()
pos_attr_order = check_pos_attribute_order(input_vrt_lines)
has_ent_as_s_attr = check_has_ent_as_s_attr(input_vrt_lines)
current_app.logger.info(f'Detected pos_attr_order: [{",".join(pos_attr_order)}]')
current_app.logger.info(f'Detected has_ent_as_s_attr: {has_ent_as_s_attr}')
if pos_attr_order == ['word', 'lemma', 'simple_pos', 'pos', 'ner']:
pos_attrs_to_string_function = pos_attrs_to_string_1
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos', 'ner']:
pos_attrs_to_string_function = pos_attrs_to_string_2
elif pos_attr_order == ['word', 'pos', 'lemma', 'simple_pos']:
pos_attrs_to_string_function = pos_attrs_to_string_2
else:
raise Exception('Can not handle format')
current_ent = None
multi_line_tag_definition = False
output_vrt = ''
for line in input_vrt_lines:
if line.strip() == '':
continue
if line.startswith('<'):
if not has_ent_as_s_attr:
if current_ent is not None:
output_vrt += '</ent>\n'
current_ent = None
if not line.rstrip().endswith('>'):
multi_line_tag_definition = True
if line.startswith('<text'):
output_vrt += '<text>\n'
if line.startswith('</text>'):
output_vrt += '</text>\n'
elif line.startswith('<s'):
output_vrt += '<s>\n'
elif line.startswith('</s>'):
output_vrt += '</s>\n'
elif line.startswith('<ent'):
output_vrt += line
elif line.startswith('</ent>'):
output_vrt += line
continue
if multi_line_tag_definition and line.rstrip().endswith('>'):
multi_line_tag_definition = False
continue
pos_attrs = line.rstrip('\n').split('\t')
if not has_ent_as_s_attr and len(pos_attrs) > 4:
if pos_attrs[4].lower() in ['null', 'none']:
if current_ent:
output_vrt += '</ent>\n'
current_ent = None
else:
if current_ent is None:
output_vrt += f'<ent type="{pos_attrs[4]}">\n'
current_ent = pos_attrs[4]
elif current_ent != pos_attrs[4]:
output_vrt += '</ent>\n'
current_ent = None
output_vrt += f'<ent type="{pos_attrs[4]}">\n'
current_ent = pos_attrs[4]
output_vrt += pos_attrs_to_string_function(pos_attrs)
with open(output_file, 'w') as f:
f.write(output_vrt)

View File

@ -1,19 +0,0 @@
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('corpora', __name__)
bp.cli.short_help = 'Corpus commands.'
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import cli, cqi_over_sio, files, followers, routes, json_routes

View File

@ -1,24 +0,0 @@
from app.models import Corpus, CorpusStatus
import os
import shutil
from app import db
from . import bp
@bp.cli.command('reset')
def reset():
''' Reset built corpora. '''
status = [
CorpusStatus.QUEUED,
CorpusStatus.BUILDING,
CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION
]
for corpus in [x for x in Corpus.query.all() if x.status in status]:
print(f'Resetting corpus {corpus}')
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
corpus.status = CorpusStatus.UNPREPARED
corpus.num_analysis_sessions = 0
db.session.commit()

View File

@ -1,112 +0,0 @@
from cqi import CQiClient
from cqi.errors import CQiException
from flask import session
from flask_login import current_user
from flask_socketio import ConnectionRefusedError
from threading import Lock
from app import db, hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus, CorpusStatus
'''
This package tunnels the Corpus Query interface (CQi) protocol through
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
This module only handles the SIO connect/disconnect, which handles the setup
and teardown of necessary ressources for later use. Each CQi function has a
corresponding SIO event. The event handlers are spread across the different
modules within this package.
Basic concept:
1. A client connects to the SIO namespace and provides the id of a corpus to be
analysed.
1.1 The analysis session counter of the corpus is incremented.
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
1.3 Wait until the CQP server is running.
1.4 Connect the CQiClient to the server.
1.5 Save the CQiClient and the Lock in the session for subsequential use.
2. A client emits an event and may provide a single json object with necessary
arguments for the targeted CQi function.
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
- The event handler function defines all arguments. Hence the client
is sent as a single json object, the decorator decomposes it to fit
the functions signature. This also includes type checking and proper
use of the lock (acquire/release) mechanism.
4. Wait for more events
5. The client disconnects from the SIO namespace
1.1 The analysis session counter of the corpus is decremented.
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
'''
NAMESPACE = '/cqi_over_sio'
from .cqi import * # noqa
@socketio.on('connect', namespace=NAMESPACE)
@socketio_login_required
def connect(auth):
# the auth variable is used in a hacky way. It contains the corpus id for
# which a corpus analysis session should be started.
corpus_id = hashids.decode(auth['corpus_id'])
corpus = Corpus.query.get(corpus_id)
if corpus is None:
# return {'code': 404, 'msg': 'Not Found'}
raise ConnectionRefusedError('Not Found')
if not (corpus.user == current_user
or current_user.is_following_corpus(corpus)
or current_user.is_administrator()):
# return {'code': 403, 'msg': 'Forbidden'}
raise ConnectionRefusedError('Forbidden')
if corpus.status not in [
CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION
]:
# return {'code': 424, 'msg': 'Failed Dependency'}
raise ConnectionRefusedError('Failed Dependency')
if corpus.num_analysis_sessions is None:
corpus.num_analysis_sessions = 0
db.session.commit()
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
db.session.commit()
retry_counter = 20
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
if retry_counter == 0:
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
return {'code': 408, 'msg': 'Request Timeout'}
socketio.sleep(3)
retry_counter -= 1
db.session.refresh(corpus)
cqi_client = CQiClient(f'cqpserver_{corpus_id}')
session['cqi_over_sio'] = {
'corpus_id': corpus_id,
'cqi_client': cqi_client,
'cqi_client_lock': Lock(),
}
# return {'code': 200, 'msg': 'OK'}
@socketio.on('disconnect', namespace=NAMESPACE)
def disconnect():
try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
except KeyError:
return
cqi_client_lock.acquire()
try:
cqi_client.api.ctrl_bye()
except (BrokenPipeError, CQiException):
pass
cqi_client_lock.release()
corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
db.session.commit()
session.pop('cqi_over_sio')
# return {'code': 200, 'msg': 'OK'}

View File

@ -1,113 +0,0 @@
from cqi import CQiClient
from cqi.errors import CQiException
from cqi.status import CQiStatus
from flask import session
from inspect import signature
from threading import Lock
from typing import Callable, Dict, List
from app import socketio
from app.decorators import socketio_login_required
from . import NAMESPACE as ns
from .extensions import CQI_EXTENSION_FUNCTION_NAMES
from . import extensions as extensions_module
CQI_FUNCTION_NAMES: List[str] = [
'ask_feature_cl_2_3',
'ask_feature_cqi_1_0',
'ask_feature_cqp_2_3',
'cl_alg2cpos',
'cl_attribute_size',
'cl_cpos2alg',
'cl_cpos2id',
'cl_cpos2lbound',
'cl_cpos2rbound',
'cl_cpos2str',
'cl_cpos2struc',
'cl_drop_attribute',
'cl_id2cpos',
'cl_id2freq',
'cl_id2str',
'cl_idlist2cpos',
'cl_lexicon_size',
'cl_regex2id',
'cl_str2id',
'cl_struc2cpos',
'cl_struc2str',
'corpus_alignment_attributes',
'corpus_charset',
'corpus_drop_corpus',
'corpus_full_name',
'corpus_info',
'corpus_list_corpora',
'corpus_positional_attributes',
'corpus_properties',
'corpus_structural_attribute_has_values',
'corpus_structural_attributes',
'cqp_drop_subcorpus',
'cqp_dump_subcorpus',
'cqp_fdist_1',
'cqp_fdist_2',
'cqp_list_subcorpora',
'cqp_query',
'cqp_subcorpus_has_field',
'cqp_subcorpus_size',
'ctrl_bye',
'ctrl_connect',
'ctrl_last_general_error',
'ctrl_ping',
'ctrl_user_abort'
]
@socketio.on('cqi', namespace=ns)
@socketio_login_required
def cqi_over_sio(fn_name: str, fn_args: Dict = {}):
try:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
except KeyError:
return {'code': 424, 'msg': 'Failed Dependency'}
if fn_name in CQI_FUNCTION_NAMES:
fn: Callable = getattr(cqi_client.api, fn_name)
elif fn_name in CQI_EXTENSION_FUNCTION_NAMES:
fn: Callable = getattr(extensions_module, fn_name)
else:
return {'code': 400, 'msg': 'Bad Request'}
for param in signature(fn).parameters.values():
if param.default is param.empty:
if param.name not in fn_args:
return {'code': 400, 'msg': 'Bad Request'}
else:
if param.name not in fn_args:
continue
if type(fn_args[param.name]) is not param.annotation:
return {'code': 400, 'msg': 'Bad Request'}
cqi_client_lock.acquire()
try:
fn_return_value = fn(**fn_args)
except BrokenPipeError:
fn_return_value = {
'code': 500,
'msg': 'Internal Server Error'
}
except CQiException as e:
return {
'code': 502,
'msg': 'Bad Gateway',
'payload': {
'code': e.code,
'desc': e.description,
'msg': e.__class__.__name__
}
}
finally:
cqi_client_lock.release()
if isinstance(fn_return_value, CQiStatus):
payload = {
'code': fn_return_value.code,
'msg': fn_return_value.__class__.__name__
}
else:
payload = fn_return_value
return {'code': 200, 'msg': 'OK', 'payload': payload}

View File

@ -1,273 +0,0 @@
from collections import Counter
from cqi import CQiClient
from cqi.models.corpora import Corpus
from cqi.status import StatusOk
from flask import session
from typing import Dict, List
import json
import math
import os
from app import db
from app.models import Corpus
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
'ext_corpus_update_db',
'ext_corpus_static_data',
'ext_corpus_paginate_corpus',
'ext_cqp_paginate_subcorpus',
'ext_cqp_partial_export_subcorpus',
'ext_cqp_export_subcorpus',
]
def ext_corpus_update_db(corpus: str):
db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus)
db_corpus.num_tokens = cqi_corpus.size
db.session.commit()
return StatusOk()
def ext_corpus_static_data(corpus: str) -> Dict:
db_corpus = Corpus.query.get(session['cqi_over_sio']['corpus_id'])
static_corpus_data_file = os.path.join(db_corpus.path, 'cwb', 'static.json')
if os.path.exists(static_corpus_data_file):
with open(static_corpus_data_file, 'r') as f:
return json.load(f)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus)
##########################################################################
# A faster way to get cpos boundaries for smaller s_attrs #
##########################################################################
# cqi_corpus.query('Last', '<s> []* </s>;')
# cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
# print(cqi_subcorpus.size)
# first_match = 0
# last_match = cqi_subcorpus.attrs['size'] - 1
# match_boundaries = zip(
# list(range(first_match, last_match + 1)),
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['match'], first_match, last_match),
# cqi_subcorpus.dump(cqi_subcorpus.attrs['fields']['matchend'], first_match, last_match)
# )
# for x in match_boundaries:
# print(x)
cqi_p_attrs = {
p_attr.name: p_attr
for p_attr in cqi_corpus.positional_attributes.list()
}
cqi_s_attrs = {
s_attr.name: s_attr
for s_attr in cqi_corpus.structural_attributes.list()
}
static_corpus_data = {
'corpus': {
'bounds': [0, cqi_corpus.size - 1],
'counts': {
'token': cqi_corpus.size
},
'freqs': {}
},
'p_attrs': {},
's_attrs': {},
'values': {'p_attrs': {}, 's_attrs': {}}
}
for p_attr in cqi_p_attrs.values():
static_corpus_data['corpus']['freqs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.freqs_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
static_corpus_data['p_attrs'][p_attr.name] = dict(
zip(
range(0, cqi_corpus.size),
p_attr.ids_by_cpos(list(range(0, cqi_corpus.size)))
)
)
static_corpus_data['values']['p_attrs'][p_attr.name] = dict(
zip(
range(0, p_attr.lexicon_size),
p_attr.values_by_ids(list(range(0, p_attr.lexicon_size)))
)
)
for s_attr in cqi_s_attrs.values():
if s_attr.has_values:
continue
static_corpus_data['corpus']['counts'][s_attr.name] = s_attr.size
static_corpus_data['s_attrs'][s_attr.name] = {'lexicon': {}, 'values': None}
static_corpus_data['values']['s_attrs'][s_attr.name] = {}
if s_attr.name in ['s', 'ent']:
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
cqi_subcorpus = cqi_corpus.subcorpora.get('Last')
first_match = 0
last_match = cqi_subcorpus.size - 1
match_boundaries = zip(
range(first_match, last_match + 1),
cqi_subcorpus.dump(cqi_subcorpus.fields['match'], first_match, last_match),
cqi_subcorpus.dump(cqi_subcorpus.fields['matchend'], first_match, last_match)
)
for id, lbound, rbound in match_boundaries:
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
cqi_subcorpus.drop()
for id in range(0, s_attr.size):
if s_attr.name not in ['s', 'ent']:
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id] = {}
lbound, rbound = s_attr.cpos_by_id(id)
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts'] = {}
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['token'] = rbound - lbound + 1
if s_attr.name not in ['text', 's']:
continue
cpos_range = range(lbound, rbound + 1)
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['ent'] = len({x for x in cqi_s_attrs['ent'].ids_by_cpos(list(cpos_range)) if x != -1})
if s_attr.name != 'text':
continue
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['counts']['s'] = len({x for x in cqi_s_attrs['s'].ids_by_cpos(list(cpos_range)) if x != -1})
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
for p_attr in cqi_p_attrs.values():
static_corpus_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr.ids_by_cpos(list(cpos_range))))
sub_s_attrs = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
s_attr_value_names = [
sub_s_attr.name[(len(s_attr.name) + 1):]
for sub_s_attr in sub_s_attrs
]
sub_s_attr_values = [
sub_s_attr.values_by_ids(list(range(0, s_attr.size)))
for sub_s_attr in sub_s_attrs
]
static_corpus_data['s_attrs'][s_attr.name]['values'] = s_attr_value_names
static_corpus_data['values']['s_attrs'][s_attr.name] = {
s_attr_id: {
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id_idx]
for s_attr_value_name_idx, s_attr_value_name in enumerate(
static_corpus_data['s_attrs'][s_attr.name]['values']
)
} for s_attr_id_idx, s_attr_id in enumerate(range(0, s_attr.size))
}
with open(static_corpus_data_file, 'w') as f:
json.dump(static_corpus_data, f)
return static_corpus_data
def ext_corpus_paginate_corpus(
corpus: str,
page: int = 1,
per_page: int = 20
) -> Dict:
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_corpus.size > 0
and page > math.ceil(cqi_corpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
first_cpos = (page - 1) * per_page
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
cpos_list = [*range(first_cpos, last_cpos)]
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
payload = {}
# the items for the current page
payload['items'] = [cpos_list]
# the lookups for the items
payload['lookups'] = lookups
# the total number of items matching the query
payload['total'] = cqi_corpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return payload
def ext_cqp_paginate_subcorpus(
subcorpus: str,
context: int = 50,
page: int = 1,
per_page: int = 20
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
# Sanity checks
if (
per_page < 1
or page < 1
or (
cqi_subcorpus.size > 0
and page > math.ceil(cqi_subcorpus.size / per_page)
)
):
return {'code': 416, 'msg': 'Range Not Satisfiable'}
offset = (page - 1) * per_page
cutoff = per_page
cqi_results_export = export_subcorpus(
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
payload = {}
# the items for the current page
payload['items'] = cqi_results_export.pop('matches')
# the lookups for the items
payload['lookups'] = cqi_results_export
# the total number of items matching the query
payload['total'] = cqi_subcorpus.size
# the number of items to be displayed on a page.
payload['per_page'] = per_page
# The total number of pages
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
# the current page number (1 indexed)
payload['page'] = page if payload['pages'] > 0 else None
# True if a previous page exists
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
# True if a next page exists.
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
# Number of the previous page.
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
# Number of the next page
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
return payload
def ext_cqp_partial_export_subcorpus(
subcorpus: str,
match_id_list: list,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
return cqi_subcorpus_partial_export
def ext_cqp_export_subcorpus(
subcorpus: str,
context: int = 50
) -> Dict:
corpus_name, subcorpus_name = subcorpus.split(':', 1)
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
cqi_corpus = cqi_client.corpora.get(corpus_name)
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
return cqi_subcorpus_export

View File

@ -1,133 +0,0 @@
from cqi.models.corpora import Corpus
from cqi.models.subcorpora import Subcorpus
from typing import Dict, List
from app.models import Corpus
def lookups_by_cpos(corpus: Corpus, cpos_list: List[int]) -> Dict:
lookups = {}
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
for attr in corpus.positional_attributes.list():
cpos_attr_values = attr.values_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
cpos_attr_values[i]
for attr in corpus.structural_attributes.list():
# We only want to iterate over non subattributes, identifiable by
# attr.attrs['has_values'] == False
if attr.attrs['has_values']:
continue
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
for i, cpos in enumerate(cpos_list):
if cpos_attr_ids[i] == -1:
continue
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
if not occured_attr_ids:
continue
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
if not subattrs:
continue
lookup_name = f'{attr.attrs["name"]}_lookup'
lookups[lookup_name] = {}
for attr_id in occured_attr_ids:
lookups[lookup_name][attr_id] = {}
for subattr in subattrs:
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
return lookups
def partial_export_subcorpus(
subcorpus: Subcorpus,
match_id_list: List[int],
context: int = 25
) -> Dict:
if subcorpus.size == 0:
return {"matches": []}
match_boundaries = []
for match_id in match_id_list:
if match_id < 0 or match_id >= subcorpus.size:
continue
match_boundaries.append(
(
match_id,
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
)
)
cpos_set = set()
matches = []
for match_boundary in match_boundaries:
match_num, match_start, match_end = match_boundary
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}
def export_subcorpus(
subcorpus: Subcorpus,
context: int = 25,
cutoff: float = float('inf'),
offset: int = 0
) -> Dict:
if subcorpus.size == 0:
return {"matches": []}
first_match = max(0, offset)
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
match_boundaries = zip(
range(first_match, last_match + 1),
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
)
cpos_set = set()
matches = []
for match_num, match_start, match_end in match_boundaries:
c = (match_start, match_end)
if match_start == 0 or context == 0:
lc = None
cpos_list_lbound = match_start
else:
lc_lbound = max(0, (match_start - context))
lc_rbound = match_start - 1
lc = (lc_lbound, lc_rbound)
cpos_list_lbound = lc_lbound
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
rc = None
cpos_list_rbound = match_end
else:
rc_lbound = match_end + 1
rc_rbound = min(
(match_end + context),
(subcorpus.collection.corpus.size - 1)
)
rc = (rc_lbound, rc_rbound)
cpos_list_rbound = rc_rbound
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
matches.append(match)
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
return {'matches': matches, **lookups}

View File

@ -1,33 +0,0 @@
from flask import abort
from flask_login import current_user
from functools import wraps
from app.models import Corpus, CorpusFollowerAssociation
def corpus_follower_permission_required(*permissions):
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
corpus_id = kwargs.get('corpus_id')
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if cfa is None:
abort(403)
if not all([cfa.role.has_permission(p) for p in permissions]):
abort(403)
return f(*args, **kwargs)
return decorated_function
return decorator
def corpus_owner_or_admin_required(f):
@wraps(f)
def decorated_function(*args, **kwargs):
corpus_id = kwargs.get('corpus_id')
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
return f(*args, **kwargs)
return decorated_function

View File

@ -1,45 +0,0 @@
from flask_login import current_user
from flask_socketio import join_room
from app import hashids, socketio
from app.decorators import socketio_login_required
from app.models import Corpus
@socketio.on('GET /corpora/<corpus_id>')
@socketio_login_required
def get_corpus(corpus_hashid):
corpus_id = hashids.decode(corpus_hashid)
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'options': {'status': 404, 'statusText': 'Not found'}}
if not (
corpus.is_public
or corpus.user == current_user
or current_user.is_administrator()
):
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
return {
'body': corpus.to_json_serializable(),
'options': {
'status': 200,
'statusText': 'OK',
'headers': {'Content-Type: application/json'}
}
}
@socketio.on('SUBSCRIBE /corpora/<corpus_id>')
@socketio_login_required
def subscribe_corpus(corpus_hashid):
corpus_id = hashids.decode(corpus_hashid)
corpus = Corpus.query.get(corpus_id)
if corpus is None:
return {'options': {'status': 404, 'statusText': 'Not found'}}
if not (
corpus.is_public
or corpus.user == current_user
or current_user.is_administrator()
):
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
join_room(f'/corpora/{corpus.hashid}')
return {'options': {'status': 200, 'statusText': 'OK'}}

View File

@ -1,2 +0,0 @@
from .. import bp
from . import json_routes, routes

View File

@ -1,54 +0,0 @@
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired
from wtforms import (
StringField,
SubmitField,
ValidationError,
IntegerField
)
from wtforms.validators import InputRequired, Length
class CorpusFileBaseForm(FlaskForm):
author = StringField(
'Author',
validators=[InputRequired(), Length(max=255)]
)
publishing_year = IntegerField(
'Publishing year',
validators=[InputRequired()]
)
title = StringField(
'Title',
validators=[InputRequired(), Length(max=255)]
)
address = StringField('Adress', validators=[Length(max=255)])
booktitle = StringField('Booktitle', validators=[Length(max=255)])
chapter = StringField('Chapter', validators=[Length(max=255)])
editor = StringField('Editor', validators=[Length(max=255)])
institution = StringField('Institution', validators=[Length(max=255)])
journal = StringField('Journal', validators=[Length(max=255)])
pages = StringField('Pages', validators=[Length(max=255)])
publisher = StringField('Publisher', validators=[Length(max=255)])
school = StringField('School', validators=[Length(max=255)])
submit = SubmitField()
class CreateCorpusFileForm(CorpusFileBaseForm):
vrt = FileField('File', validators=[FileRequired()])
def validate_vrt(self, field):
if not field.data.filename.lower().endswith('.vrt'):
raise ValidationError('VRT files only!')
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-corpus-file-form'
super().__init__(*args, **kwargs)
class UpdateCorpusFileForm(CorpusFileBaseForm):
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'update-corpus-file-form'
super().__init__(*args, **kwargs)

View File

@ -1,30 +0,0 @@
from flask import abort, current_app
from threading import Thread
from app import db
from app.decorators import content_negotiation
from app.models import CorpusFile
from ..decorators import corpus_follower_permission_required
from . import bp
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['DELETE'])
@corpus_follower_permission_required('MANAGE_FILES')
@content_negotiation(produces='application/json')
def delete_corpus_file(corpus_id, corpus_file_id):
def _delete_corpus_file(app, corpus_file_id):
with app.app_context():
corpus_file = CorpusFile.query.get(corpus_file_id)
corpus_file.delete()
db.session.commit()
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
thread = Thread(
target=_delete_corpus_file,
args=(current_app._get_current_object(), corpus_file.id)
)
thread.start()
response_data = {
'message': f'Corpus File "{corpus_file.title}" marked for deletion',
'category': 'corpus'
}
return response_data, 202

View File

@ -1,100 +0,0 @@
from flask import (
abort,
flash,
redirect,
render_template,
send_from_directory,
url_for
)
from flask_breadcrumbs import register_breadcrumb
import os
from app import db
from app.models import Corpus, CorpusFile, CorpusStatus
from ..decorators import corpus_follower_permission_required
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
from . import bp
from .forms import CreateCorpusFileForm, UpdateCorpusFileForm
from .utils import corpus_file_dynamic_list_constructor as corpus_file_dlc
@bp.route('/<hashid:corpus_id>/files')
@register_breadcrumb(bp, '.entity.files', 'Files', endpoint_arguments_constructor=corpus_eac)
def corpus_files(corpus_id):
return redirect(url_for('.corpus', _anchor='files', corpus_id=corpus_id))
@bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.entity.files.create', 'Create', endpoint_arguments_constructor=corpus_eac)
@corpus_follower_permission_required('MANAGE_FILES')
def create_corpus_file(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
form = CreateCorpusFileForm()
if form.is_submitted():
if not form.validate():
response = {'errors': form.errors}
return response, 400
try:
corpus_file = CorpusFile.create(
form.vrt.data,
address=form.address.data,
author=form.author.data,
booktitle=form.booktitle.data,
chapter=form.chapter.data,
editor=form.editor.data,
institution=form.institution.data,
journal=form.journal.data,
pages=form.pages.data,
publisher=form.publisher.data,
publishing_year=form.publishing_year.data,
school=form.school.data,
title=form.title.data,
mimetype='application/vrt+xml',
corpus=corpus
)
except (AttributeError, OSError):
abort(500)
corpus.status = CorpusStatus.UNPREPARED
db.session.commit()
flash(f'Corpus File "{corpus_file.filename}" added', category='corpus')
return '', 201, {'Location': corpus.url}
return render_template(
'corpora/files/create.html.j2',
title='Add corpus file',
form=form,
corpus=corpus
)
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.entity.files.entity', '', dynamic_list_constructor=corpus_file_dlc)
@corpus_follower_permission_required('MANAGE_FILES')
def corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
form = UpdateCorpusFileForm(data=corpus_file.to_json_serializeable())
if form.validate_on_submit():
form.populate_obj(corpus_file)
if db.session.is_modified(corpus_file):
corpus_file.corpus.status = CorpusStatus.UNPREPARED
db.session.commit()
flash(f'Corpus file "{corpus_file.filename}" updated', category='corpus')
return redirect(corpus_file.corpus.url)
return render_template(
'corpora/files/corpus_file.html.j2',
title='Edit corpus file',
form=form,
corpus=corpus_file.corpus,
corpus_file=corpus_file
)
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>/download')
@corpus_follower_permission_required('VIEW')
def download_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
return send_from_directory(
os.path.dirname(corpus_file.path),
os.path.basename(corpus_file.path),
as_attachment=True,
attachment_filename=corpus_file.filename,
mimetype=corpus_file.mimetype
)

View File

@ -1,15 +0,0 @@
from flask import request, url_for
from app.models import CorpusFile
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
def corpus_file_dynamic_list_constructor():
corpus_id = request.view_args['corpus_id']
corpus_file_id = request.view_args['corpus_file_id']
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
return [
{
'text': f'{corpus_file.author}: {corpus_file.title} ({corpus_file.publishing_year})',
'url': url_for('.corpus_file', corpus_id=corpus_id, corpus_file_id=corpus_file_id)
}
]

View File

@ -1,2 +0,0 @@
from .. import bp
from . import json_routes

View File

@ -1,76 +0,0 @@
from flask import abort, flash, jsonify, make_response, request
from flask_login import current_user
from app import db
from app.decorators import content_negotiation
from app.models import (
Corpus,
CorpusFollowerAssociation,
CorpusFollowerRole,
User
)
from ..decorators import corpus_follower_permission_required
from . import bp
# @bp.route('/<hashid:corpus_id>/followers', methods=['POST'])
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
# @content_negotiation(consumes='application/json', produces='application/json')
# def create_corpus_followers(corpus_id):
# usernames = request.json
# if not (isinstance(usernames, list) or all(isinstance(u, str) for u in usernames)):
# abort(400)
# corpus = Corpus.query.get_or_404(corpus_id)
# for username in usernames:
# user = User.query.filter_by(username=username, is_public=True).first_or_404()
# user.follow_corpus(corpus)
# db.session.commit()
# response_data = {
# 'message': f'Users are now following "{corpus.title}"',
# 'category': 'corpus'
# }
# return response_data, 200
# @bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>/role', methods=['PUT'])
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
# @content_negotiation(consumes='application/json', produces='application/json')
# def update_corpus_follower_role(corpus_id, follower_id):
# role_name = request.json
# if not isinstance(role_name, str):
# abort(400)
# cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
# if cfr is None:
# abort(400)
# cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
# cfa.role = cfr
# db.session.commit()
# response_data = {
# 'message': f'User "{cfa.follower.username}" is now {cfa.role.name}',
# 'category': 'corpus'
# }
# return response_data, 200
# @bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>', methods=['DELETE'])
# def delete_corpus_follower(corpus_id, follower_id):
# cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
# if not (
# current_user.id == follower_id
# or current_user == cfa.corpus.user
# or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
# or current_user.is_administrator()):
# abort(403)
# if current_user.id == follower_id:
# flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')
# response = make_response()
# response.status_code = 204
# else:
# response_data = {
# 'message': f'"{cfa.follower.username}" is not following "{cfa.corpus.title}" anymore',
# 'category': 'corpus'
# }
# response = jsonify(response_data)
# response.status_code = 200
# cfa.follower.unfollow_corpus(cfa.corpus)
# db.session.commit()
# return response

View File

@ -1,33 +0,0 @@
from flask_wtf import FlaskForm
from wtforms import StringField, SubmitField, TextAreaField
from wtforms.validators import InputRequired, Length
class CorpusBaseForm(FlaskForm):
description = TextAreaField(
'Description',
validators=[InputRequired(), Length(max=255)]
)
title = StringField('Title', validators=[InputRequired(), Length(max=32)])
submit = SubmitField()
class CreateCorpusForm(CorpusBaseForm):
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-corpus-form'
super().__init__(*args, **kwargs)
class UpdateCorpusForm(CorpusBaseForm):
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'update-corpus-form'
super().__init__(*args, **kwargs)
class ImportCorpusForm(FlaskForm):
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'import-corpus-form'
super().__init__(*args, **kwargs)

View File

@ -1,134 +0,0 @@
from datetime import datetime
from flask import abort, current_app, request, url_for
from flask_login import current_user
from threading import Thread
from app import db
from app.decorators import content_negotiation
from app.models import Corpus, CorpusFollowerRole
from . import bp
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
import nltk
from string import punctuation
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
@corpus_owner_or_admin_required
@content_negotiation(produces='application/json')
def delete_corpus(corpus_id):
def _delete_corpus(app, corpus_id):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.delete()
db.session.commit()
corpus = Corpus.query.get_or_404(corpus_id)
thread = Thread(
target=_delete_corpus,
args=(current_app._get_current_object(), corpus.id)
)
thread.start()
response_data = {
'message': f'Corpus "{corpus.title}" marked for deletion',
'category': 'corpus'
}
return response_data, 200
@bp.route('/<hashid:corpus_id>/build', methods=['POST'])
@corpus_follower_permission_required('MANAGE_FILES')
@content_negotiation(produces='application/json')
def build_corpus(corpus_id):
def _build_corpus(app, corpus_id):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.build()
db.session.commit()
corpus = Corpus.query.get_or_404(corpus_id)
if len(corpus.files.all()) == 0:
abort(409)
thread = Thread(
target=_build_corpus,
args=(current_app._get_current_object(), corpus_id)
)
thread.start()
response_data = {
'message': f'Corpus "{corpus.title}" marked for building',
'category': 'corpus'
}
return response_data, 202
@bp.route('/stopwords')
@content_negotiation(produces='application/json')
def get_stopwords():
# data = request.json
# if not isinstance(data, dict):
# abort(400)
# language = data.get('language')
# if not isinstance(language, str):
# abort(400)
nltk.download('stopwords')
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
stopwords = {}
for language in languages:
stopwords[language] = nltk.corpus.stopwords.words(language)
stopwords['punctuation'] = list(punctuation) + ['', '|']
stopwords['user_stopwords'] = []
print(stopwords)
response_data = {
'stopwords': stopwords
}
return response_data, 202
# @bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
# @corpus_follower_permission_required('MANAGE_FOLLOWERS')
# @content_negotiation(consumes='application/json', produces='application/json')
# def generate_corpus_share_link(corpus_id):
# data = request.json
# if not isinstance(data, dict):
# abort(400)
# expiration = data.get('expiration')
# if not isinstance(expiration, str):
# abort(400)
# role_name = data.get('role')
# if not isinstance(role_name, str):
# abort(400)
# expiration_date = datetime.strptime(expiration, '%b %d, %Y')
# cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
# if cfr is None:
# abort(400)
# corpus = Corpus.query.get_or_404(corpus_id)
# token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date)
# corpus_share_link = url_for(
# 'corpora.follow_corpus',
# corpus_id=corpus_id,
# token=token,
# _external=True
# )
# response_data = {
# 'message': 'Corpus share link generated',
# 'category': 'corpus',
# 'corpusShareLink': corpus_share_link
# }
# return response_data, 200
# @bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
# @corpus_owner_or_admin_required
# @content_negotiation(consumes='application/json', produces='application/json')
# def update_corpus_is_public(corpus_id):
# is_public = request.json
# if not isinstance(is_public, bool):
# abort(400)
# corpus = Corpus.query.get_or_404(corpus_id)
# corpus.is_public = is_public
# db.session.commit()
# response_data = {
# 'message': (
# f'Corpus "{corpus.title}" is now'
# f' {"public" if is_public else "private"}'
# ),
# 'category': 'corpus'
# }
# return response_data, 200

View File

@ -1,121 +0,0 @@
from flask import abort, flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
from app import db
from app.models import (
Corpus,
CorpusFollowerAssociation,
CorpusFollowerRole,
User
)
from . import bp
from .decorators import corpus_follower_permission_required
from .forms import CreateCorpusForm
from .utils import (
corpus_endpoint_arguments_constructor as corpus_eac,
corpus_dynamic_list_constructor as corpus_dlc
)
@bp.route('')
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">I</i>My Corpora')
def corpora():
return redirect(url_for('main.dashboard', _anchor='corpora'))
@bp.route('/create', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.create', 'Create')
def create_corpus():
form = CreateCorpusForm()
if form.validate_on_submit():
try:
corpus = Corpus.create(
title=form.title.data,
description=form.description.data,
user=current_user
)
except OSError:
abort(500)
db.session.commit()
flash(f'Corpus "{corpus.title}" created', 'corpus')
return redirect(corpus.url)
return render_template(
'corpora/create.html.j2',
title='Create corpus',
form=form
)
@bp.route('/<hashid:corpus_id>')
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=corpus_dlc)
def corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
cfrs = CorpusFollowerRole.query.all()
# TODO: Better solution for filtering admin
users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all()
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
if cfa is None:
if corpus.user == current_user or current_user.is_administrator():
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
else:
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
else:
cfr = cfa.role
if corpus.user == current_user or current_user.is_administrator():
return render_template(
'corpora/corpus.html.j2',
title=corpus.title,
corpus=corpus,
cfr=cfr,
cfrs=cfrs,
users = users
)
if (current_user.is_following_corpus(corpus) or corpus.is_public):
abort(404)
# cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all()
# return render_template(
# 'corpora/public_corpus.html.j2',
# title=corpus.title,
# corpus=corpus,
# cfrs=cfrs,
# cfr=cfr,
# cfas=cfas,
# users = users
# )
abort(403)
@bp.route('/<hashid:corpus_id>/analysis')
@corpus_follower_permission_required('VIEW')
@register_breadcrumb(bp, '.entity.analysis', 'Analysis', endpoint_arguments_constructor=corpus_eac)
def analysis(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
return render_template(
'corpora/analysis.html.j2',
corpus=corpus,
title=f'Analyse Corpus {corpus.title}'
)
# @bp.route('/<hashid:corpus_id>/follow/<token>')
# def follow_corpus(corpus_id, token):
# corpus = Corpus.query.get_or_404(corpus_id)
# if current_user.follow_corpus_by_token(token):
# db.session.commit()
# flash(f'You are following "{corpus.title}" now', category='corpus')
# return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
# abort(403)
@bp.route('/import', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.import', 'Import')
def import_corpus():
abort(503)
@bp.route('/<hashid:corpus_id>/export')
@corpus_follower_permission_required('VIEW')
@register_breadcrumb(bp, '.entity.export', 'Export', endpoint_arguments_constructor=corpus_eac)
def export_corpus(corpus_id):
abort(503)

View File

@ -1,17 +0,0 @@
from flask import request, url_for
from app.models import Corpus
def corpus_endpoint_arguments_constructor():
return {'corpus_id': request.view_args['corpus_id']}
def corpus_dynamic_list_constructor():
corpus_id = request.view_args['corpus_id']
corpus = Corpus.query.get_or_404(corpus_id)
return [
{
'text': f'<i class="material-icons left">book</i>{corpus.title}',
'url': url_for('.corpus', corpus_id=corpus_id)
}
]

View File

@ -1,11 +0,0 @@
from app import db
from flask import Flask
from .corpus_utils import check_corpora
from .job_utils import check_jobs
def daemon(app: Flask):
with app.app_context():
check_corpora()
check_jobs()
db.session.commit()

View File

@ -1,221 +0,0 @@
from app import docker_client
from app.models import Corpus, CorpusStatus
from flask import current_app
import docker
import os
import shutil
def check_corpora():
corpora = Corpus.query.all()
for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
_create_build_corpus_service(corpus)
for corpus in [x for x in corpora if x.status in [CorpusStatus.QUEUED, CorpusStatus.BUILDING]]:
_checkout_build_corpus_service(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.BUILT and x.num_analysis_sessions > 0]:
corpus.status = CorpusStatus.STARTING_ANALYSIS_SESSION
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
_checkout_analysing_corpus_container(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
_create_cqpserver_container(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
_remove_cqpserver_container(corpus)
def _create_build_corpus_service(corpus):
''' # Docker service settings # '''
''' ## Command ## '''
command = ['bash', '-c']
command.append(
f'mkdir /corpora/data/nopaque_{corpus.id}'
' && '
'cwb-encode'
' -c utf8'
f' -d /corpora/data/nopaque_{corpus.id}'
' -f /root/files/corpus.vrt'
f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}'
' -P pos -P lemma -P simple_pos'
' -S ent:0+type -S s:0'
' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
' -xsB -9'
' && '
f'cwb-make -V NOPAQUE_{corpus.id}'
)
''' ## Constraints ## '''
constraints = ['node.role==worker']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
''' ## Labels ## '''
labels = {
'origin': current_app.config['SERVER_NAME'],
'type': 'corpus.build',
'corpus_id': str(corpus.id)
}
''' ## Mounts ## '''
mounts = []
''' ### Data mount ### '''
data_mount_source = os.path.join(corpus.path, 'cwb', 'data')
data_mount_target = '/corpora/data'
data_mount = f'{data_mount_source}:{data_mount_target}:rw'
# Make sure that their is no data in the data directory
shutil.rmtree(data_mount_source, ignore_errors=True)
os.makedirs(data_mount_source)
mounts.append(data_mount)
''' ### File mount ### '''
file_mount_source = os.path.join(corpus.path, 'cwb', 'corpus.vrt')
file_mount_target = '/root/files/corpus.vrt'
file_mount = f'{file_mount_source}:{file_mount_target}:ro'
mounts.append(file_mount)
''' ### Registry mount ### '''
registry_mount_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_mount_target = '/usr/local/share/cwb/registry'
registry_mount = f'{registry_mount_source}:{registry_mount_target}:rw'
# Make sure that their is no data in the registry directory
shutil.rmtree(registry_mount_source, ignore_errors=True)
os.makedirs(registry_mount_source)
mounts.append(registry_mount)
''' ## Name ## '''
name = f'build-corpus_{corpus.id}'
''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy()
try:
docker_client.services.create(
image,
command=command,
constraints=constraints,
labels=labels,
mounts=mounts,
name=name,
restart_policy=restart_policy,
user='0:0'
)
except docker.errors.DockerException as e:
current_app.logger.error(f'Create service "{name}" failed: {e}')
return
corpus.status = CorpusStatus.QUEUED
def _checkout_build_corpus_service(corpus):
service_name = f'build-corpus_{corpus.id}'
try:
service = docker_client.services.get(service_name)
except docker.errors.NotFound as e:
current_app.logger.error(f'Get service "{service_name}" failed: {e}')
corpus.status = CorpusStatus.FAILED
return
except docker.errors.DockerException as e:
current_app.logger.error(f'Get service "{service_name}" failed: {e}')
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if corpus.status == CorpusStatus.QUEUED and task_state != 'pending':
corpus.status = CorpusStatus.BUILDING
return
elif corpus.status == CorpusStatus.BUILDING and task_state == 'complete':
corpus.status = CorpusStatus.BUILT
elif corpus.status == CorpusStatus.BUILDING and task_state == 'failed':
corpus.status = CorpusStatus.FAILED
else:
return
try:
service.remove()
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
def _create_cqpserver_container(corpus):
''' # Docker container settings # '''
''' ## Command ## '''
command = []
command.append(
'echo "host *;" > cqpserver.init'
' && '
'echo "user anonymous \\"\\";" >> cqpserver.init'
' && '
'cqpserver -I cqpserver.init'
)
''' ## Detach ## '''
detach = True
''' ## Entrypoint ## '''
entrypoint = ['bash', '-c']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
''' ## Name ## '''
name = f'cqpserver_{corpus.id}'
''' ## Network ## '''
network = f'{current_app.config["DOCKER_NETWORK_NAME"]}'
''' ## Volumes ## '''
volumes = []
''' ### Corpus data volume ### '''
data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
data_volume_target = '/corpora/data'
data_volume = f'{data_volume_source}:{data_volume_target}:rw'
volumes.append(data_volume)
''' ### Corpus registry volume ### '''
registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_volume_target = '/usr/local/share/cwb/registry'
registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
volumes.append(registry_volume)
# Check if a cqpserver container already exists. If this is the case,
# remove it and create a new one
try:
container = docker_client.containers.get(name)
except docker.errors.NotFound:
pass
except docker.errors.DockerException as e:
current_app.logger.error(f'Get container "{name}" failed: {e}')
return
else:
try:
container.remove(force=True)
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove container "{name}" failed: {e}')
return
try:
docker_client.containers.run(
image,
command=command,
detach=detach,
entrypoint=entrypoint,
name=name,
network=network,
user='0:0',
volumes=volumes
)
except docker.errors.ImageNotFound as e:
current_app.logger.error(
f'Run container "{name}" failed '
f'due to "docker.errors.ImageNotFound" error: {e}'
)
corpus.status = CorpusStatus.FAILED
return
except docker.errors.DockerException as e:
current_app.logger.error(f'Run container "{name}" failed: {e}')
return
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
def _checkout_analysing_corpus_container(corpus):
container_name = f'cqpserver_{corpus.id}'
try:
docker_client.containers.get(container_name)
except docker.errors.NotFound as e:
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
corpus.num_analysis_sessions = 0
corpus.status = CorpusStatus.BUILT
except docker.errors.DockerException as e:
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
def _remove_cqpserver_container(corpus):
container_name = f'cqpserver_{corpus.id}'
try:
container = docker_client.containers.get(container_name)
except docker.errors.NotFound:
corpus.status = CorpusStatus.BUILT
return
except docker.errors.DockerException as e:
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
return
try:
container.remove(force=True)
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove container "{container_name}" failed: {e}')

View File

@ -1,234 +0,0 @@
from app import db, docker_client, hashids
from app.models import (
Job,
JobResult,
JobStatus,
TesseractOCRPipelineModel,
SpaCyNLPPipelineModel
)
from datetime import datetime
from flask import current_app
from werkzeug.utils import secure_filename
import docker
import json
import os
import shutil
def check_jobs():
jobs = Job.query.all()
for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
_create_job_service(job)
for job in [x for x in jobs if x.status in [JobStatus.QUEUED, JobStatus.RUNNING]]:
_checkout_job_service(job)
for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
_remove_job_service(job)
def _create_job_service(job):
''' # Docker service settings # '''
''' ## Service specific settings ## '''
if job.service == 'file-setup-pipeline':
mem_mb = 512
n_cores = 2
executable = 'file-setup-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}file-setup-pipeline:v{job.service_version}'
elif job.service == 'tesseract-ocr-pipeline':
mem_mb = 1024
n_cores = 4
executable = 'tesseract-ocr-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}tesseract-ocr-pipeline:v{job.service_version}'
elif job.service == 'transkribus-htr-pipeline':
mem_mb = 1024
n_cores = 4
executable = 'transkribus-htr-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}transkribus-htr-pipeline:v{job.service_version}'
elif job.service == 'spacy-nlp-pipeline':
mem_mb = 1024
n_cores = 1
executable = 'spacy-nlp-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}spacy-nlp-pipeline:v{job.service_version}'
''' ## Command ## '''
command = f'{executable} -i /input -o /output'
command += ' --log-dir /logs'
command += f' --mem-mb {mem_mb}'
command += f' --n-cores {n_cores}'
if job.service == 'spacy-nlp-pipeline':
model_id = hashids.decode(job.service_args['model'])
model = SpaCyNLPPipelineModel.query.get(model_id)
if model is None:
job.status = JobStatus.FAILED
return
command += f' -m {model.pipeline_name}'
if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']:
command += ' --check-encoding'
elif job.service == 'tesseract-ocr-pipeline':
command += f' -m {job.service_args["model"]}'
if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize'
if 'ocropus_nlbin_threshold' in job.service_args and job.service_args['ocropus_nlbin_threshold']:
value = job.service_args['ocropus_nlbin_threshold']
command += f' --ocropus-nlbin-threshold {value}'
elif job.service == 'transkribus-htr-pipeline':
transkribus_htr_pipeline_model_id = job.service_args['model']
command += f' -m {transkribus_htr_pipeline_model_id}'
readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME')
command += f' --readcoop-username "{readcoop_username}"'
readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD')
command += f' --readcoop-password "{readcoop_password}"'
if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize'
''' ## Constraints ## '''
constraints = ['node.role==worker']
''' ## Labels ## '''
labels = {
'origin': current_app.config['SERVER_NAME'],
'type': 'job',
'job_id': str(job.id)
}
''' ## Mounts ## '''
mounts = []
''' ### Input mount(s) ### '''
input_mount_target_base = '/input'
if job.service == 'file-setup-pipeline':
input_mount_target_base += f'/{secure_filename(job.title)}'
for job_input in job.inputs:
input_mount_source = job_input.path
input_mount_target = f'{input_mount_target_base}/{job_input.filename}'
input_mount = f'{input_mount_source}:{input_mount_target}:ro'
mounts.append(input_mount)
if job.service == 'tesseract-ocr-pipeline':
if isinstance(job.service_args['model'], str):
model_id = hashids.decode(job.service_args['model'])
elif isinstance(job.service_args['model'], int):
model_id = job.service_args['model']
else:
job.status = JobStatus.FAILED
return
model = TesseractOCRPipelineModel.query.get(model_id)
if model is None:
job.status = JobStatus.FAILED
return
models_mount_source = model.path
models_mount_target = f'/usr/local/share/tessdata/{model.id}.traineddata'
models_mount = f'{models_mount_source}:{models_mount_target}:ro'
mounts.append(models_mount)
elif job.service == 'spacy-nlp-pipeline':
model_id = hashids.decode(job.service_args['model'])
model = SpaCyNLPPipelineModel.query.get(model_id)
if model is None:
job.status = JobStatus.FAILED
return
models_mount_source = model.path
models_mount_target = f'/usr/local/share/spacy/models/{model.filename}'
models_mount = f'{models_mount_source}:{models_mount_target}:ro'
mounts.append(models_mount)
''' ### Output mount ### '''
output_mount_source = os.path.join(job.path, 'results')
output_mount_target = '/output'
output_mount = f'{output_mount_source}:{output_mount_target}:rw'
# Make sure that their is no data in the output directory
shutil.rmtree(output_mount_source, ignore_errors=True)
os.makedirs(output_mount_source)
mounts.append(output_mount)
''' ### Pipeline data mount ### '''
pyflow_data_mount_source = os.path.join(job.path, 'pipeline_data')
pyflow_data_mount_target = '/logs/pyflow.data'
pyflow_data_mount = f'{pyflow_data_mount_source}:{pyflow_data_mount_target}:rw'
# Make sure that their is no data in the output directory
shutil.rmtree(pyflow_data_mount_source, ignore_errors=True)
os.makedirs(pyflow_data_mount_source)
mounts.append(pyflow_data_mount)
''' ## Name ## '''
name = f'job_{job.id}'
''' ## Resources ## '''
resources = docker.types.Resources(
cpu_reservation=n_cores * (10 ** 9),
mem_reservation=mem_mb * (10 ** 6)
)
''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy()
try:
docker_client.services.create(
image,
command=command,
constraints=constraints,
labels=labels,
mounts=mounts,
name=name,
resources=resources,
restart_policy=restart_policy,
user='0:0'
)
except docker.errors.DockerException as e:
current_app.logger.error(f'Create service "{name}" failed: {e}')
return
job.status = JobStatus.QUEUED
def _checkout_job_service(job):
service_name = f'job_{job.id}'
try:
service = docker_client.services.get(service_name)
except docker.errors.NotFound as e:
current_app.logger.error(f'Get service "{service_name}" failed: {e}')
job.status = JobStatus.FAILED
return
except docker.errors.DockerException as e:
current_app.logger.error(f'Get service "{service_name}" failed: {e}')
return
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if job.status == JobStatus.QUEUED and task_state != 'pending':
job.status = JobStatus.RUNNING
return
elif job.status == JobStatus.RUNNING and task_state == 'complete':
job.status = JobStatus.COMPLETED
results_dir = os.path.join(job.path, 'results')
with open(os.path.join(results_dir, 'outputs.json')) as f:
outputs = json.load(f)
for output in outputs:
filename = os.path.basename(output['file'])
job_result = JobResult(
filename=filename,
job=job,
mimetype=output['mimetype']
)
if 'description' in output:
job_result.description = output['description']
db.session.add(job_result)
db.session.flush(objects=[job_result])
db.session.refresh(job_result)
os.rename(
os.path.join(results_dir, output['file']),
job_result.path
)
elif job.status == JobStatus.RUNNING and task_state == 'failed':
job.status = JobStatus.FAILED
else:
return
job.end_date = datetime.utcnow()
try:
service.remove()
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
def _remove_job_service(job):
service_name = f'job_{job.id}'
try:
service = docker_client.services.get(service_name)
except docker.errors.NotFound:
job.status = JobStatus.CANCELED
return
except docker.errors.DockerException as e:
current_app.logger.error(f'Get service "{service_name}" failed: {e}')
return
try:
service.update(mounts=None)
except docker.errors.DockerException as e:
current_app.logger.error(f'Update service "{service_name}" failed: {e}')
return
try:
service.remove()
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove "{service_name}" service failed: {e}')

View File

@ -1,99 +0,0 @@
from flask import abort, current_app, request
from flask_login import current_user
from functools import wraps
from threading import Thread
from typing import List, Union
from werkzeug.exceptions import NotAcceptable
from app.models import Permission
def permission_required(permission):
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
if not current_user.can(permission):
abort(403)
return f(*args, **kwargs)
return decorated_function
return decorator
def admin_required(f):
return permission_required(Permission.ADMINISTRATE)(f)
def socketio_login_required(f):
@wraps(f)
def decorated_function(*args, **kwargs):
if current_user.is_authenticated:
return f(*args, **kwargs)
else:
return {'code': 401, 'msg': 'Unauthorized'}
return decorated_function
def socketio_permission_required(permission):
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
if not current_user.can(permission):
return {'code': 403, 'msg': 'Forbidden'}
return f(*args, **kwargs)
return decorated_function
return decorator
def socketio_admin_required(f):
return socketio_permission_required(Permission.ADMINISTRATE)(f)
def background(f):
'''
' This decorator executes a function in a Thread.
' Decorated functions need to be executed within a code block where an
' app context exists.
'
' NOTE: An app object is passed as a keyword argument to the decorated
' function.
'''
@wraps(f)
def wrapped(*args, **kwargs):
kwargs['app'] = current_app._get_current_object()
thread = Thread(target=f, args=args, kwargs=kwargs)
thread.start()
return thread
return wrapped
def content_negotiation(
produces: Union[str, List[str], None] = None,
consumes: Union[str, List[str], None] = None
):
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
provided = request.mimetype
if consumes is None:
consumeables = None
elif isinstance(consumes, str):
consumeables = {consumes}
elif isinstance(consumes, list) and all(isinstance(x, str) for x in consumes):
consumeables = {*consumes}
else:
raise TypeError()
accepted = {*request.accept_mimetypes.values()}
if produces is None:
produceables = None
elif isinstance(produces, str):
produceables = {produces}
elif isinstance(produces, list) and all(isinstance(x, str) for x in produces):
produceables = {*produces}
else:
raise TypeError()
if produceables is not None and len(produceables & accepted) == 0:
raise NotAcceptable()
if consumeables is not None and provided not in consumeables:
raise NotAcceptable()
return f(*args, **kwargs)
return decorated_function
return decorator

View File

@ -1,25 +0,0 @@
from flask import current_app, render_template
from flask_mail import Message
from threading import Thread
from app import mail
def create_message(recipient, subject, template, **kwargs):
subject_prefix: str = current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX']
msg: Message = Message(
body=render_template(f'{template}.txt.j2', **kwargs),
html=render_template(f'{template}.html.j2', **kwargs),
recipients=[recipient],
subject=f'{subject_prefix} {subject}'
)
return msg
def send(msg, *args, **kwargs):
def _send(app, msg):
with app.app_context():
mail.send(msg)
thread = Thread(target=_send, args=[current_app._get_current_object(), msg])
thread.start()
return thread

View File

@ -1,5 +0,0 @@
from flask import Blueprint
bp = Blueprint('errors', __name__)
from . import handlers

View File

@ -1,14 +0,0 @@
from flask import jsonify, render_template, request
from werkzeug.exceptions import HTTPException
from . import bp
@bp.app_errorhandler(HTTPException)
def handle_http_exception(error):
''' Generic HTTP exception handler '''
accept_json = request.accept_mimetypes.accept_json
accept_html = request.accept_mimetypes.accept_html
if accept_json and not accept_html:
response = jsonify(str(error))
return response, error.code
return render_template('errors/error.html.j2', error=error), error.code

View File

@ -1,18 +0,0 @@
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('jobs', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes, json_routes

View File

@ -1,73 +0,0 @@
from flask import abort, current_app
from flask_login import current_user
from threading import Thread
import os
from app import db
from app.decorators import admin_required, content_negotiation
from app.models import Job, JobStatus
from . import bp
@bp.route('/<hashid:job_id>', methods=['DELETE'])
@content_negotiation(produces='application/json')
def delete_job(job_id):
def _delete_job(app, job_id):
with app.app_context():
job = Job.query.get(job_id)
job.delete()
db.session.commit()
job = Job.query.get_or_404(job_id)
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
thread = Thread(
target=_delete_job,
args=(current_app._get_current_object(), job_id)
)
thread.start()
response_data = {
'message': f'Job "{job.title}" marked for deletion'
}
return response_data, 202
@bp.route('/<hashid:job_id>/log')
@admin_required
@content_negotiation(produces='application/json')
def job_log(job_id):
job = Job.query.get_or_404(job_id)
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
response = {'errors': {'message': 'Job status is not completed or failed'}}
return response, 409
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
log = log_file.read()
response_data = {
'jobLog': log
}
return response_data, 200
@bp.route('/<hashid:job_id>/restart', methods=['POST'])
@content_negotiation(produces='application/json')
def restart_job(job_id):
def _restart_job(app, job_id):
with app.app_context():
job = Job.query.get(job_id)
job.restart()
db.session.commit()
job = Job.query.get_or_404(job_id)
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
if job.status == JobStatus.FAILED:
response = {'errors': {'message': 'Job status is not "failed"'}}
return response, 409
thread = Thread(
target=_restart_job,
args=(current_app._get_current_object(), job_id)
)
thread.start()
response_data = {
'message': f'Job "{job.title}" marked for restarting'
}
return response_data, 202

View File

@ -1,60 +0,0 @@
from flask import (
abort,
redirect,
render_template,
send_from_directory,
url_for
)
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
import os
from app.models import Job, JobInput, JobResult
from . import bp
from .utils import job_dynamic_list_constructor as job_dlc
@bp.route('')
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">J</i>My Jobs')
def corpora():
return redirect(url_for('main.dashboard', _anchor='jobs'))
@bp.route('/<hashid:job_id>')
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=job_dlc)
def job(job_id):
job = Job.query.get_or_404(job_id)
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
return render_template(
'jobs/job.html.j2',
title='Job',
job=job
)
@bp.route('/<hashid:job_id>/inputs/<hashid:job_input_id>/download')
def download_job_input(job_id, job_input_id):
job_input = JobInput.query.filter_by(job_id=job_id, id=job_input_id).first_or_404()
if not (job_input.job.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
os.path.dirname(job_input.path),
os.path.basename(job_input.path),
as_attachment=True,
attachment_filename=job_input.filename,
mimetype=job_input.mimetype
)
@bp.route('/<hashid:job_id>/results/<hashid:job_result_id>/download')
def download_job_result(job_id, job_result_id):
job_result = JobResult.query.filter_by(job_id=job_id, id=job_result_id).first_or_404()
if not (job_result.job.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
os.path.dirname(job_result.path),
os.path.basename(job_result.path),
as_attachment=True,
attachment_filename=job_result.filename,
mimetype=job_result.mimetype
)

View File

@ -1,13 +0,0 @@
from flask import request, url_for
from app.models import Job
def job_dynamic_list_constructor():
job_id = request.view_args['job_id']
job = Job.query.get_or_404(job_id)
return [
{
'text': f'<i class="nopaque-icons left service-icons" data-service="{job.service}"></i>{job.title}',
'url': url_for('.job', job_id=job_id)
}
]

View File

@ -1,5 +0,0 @@
from flask import Blueprint
bp = Blueprint('main', __name__, cli_group=None)
from . import cli, routes

View File

@ -1,45 +0,0 @@
from flask import current_app
from flask_migrate import upgrade
import os
from app.models import (
CorpusFollowerRole,
Role,
SpaCyNLPPipelineModel,
TesseractOCRPipelineModel,
User
)
from . import bp
@bp.cli.command('deploy')
def deploy():
''' Run deployment tasks. '''
# Make default directories
print('Make default directories')
base_dir = current_app.config['NOPAQUE_DATA_DIR']
default_dirs = [
os.path.join(base_dir, 'tmp'),
os.path.join(base_dir, 'users')
]
for dir in default_dirs:
if os.path.exists(dir):
if not os.path.isdir(dir):
raise NotADirectoryError(f'{dir} is not a directory')
else:
os.mkdir(dir)
# migrate database to latest revision
print('Migrate database to latest revision')
upgrade()
# Insert/Update default database values
print('Insert/Update default Roles')
Role.insert_defaults()
print('Insert/Update default Users')
User.insert_defaults()
print('Insert/Update default CorpusFollowerRoles')
CorpusFollowerRole.insert_defaults()
print('Insert/Update default SpaCyNLPPipelineModels')
SpaCyNLPPipelineModel.insert_defaults()
print('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRPipelineModel.insert_defaults()

View File

@ -1,92 +0,0 @@
from flask import flash, redirect, render_template, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user, login_required, login_user
from app.auth.forms import LoginForm
from app.models import Corpus, User
from sqlalchemy import or_
from . import bp
@bp.route('/', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.', '<i class="material-icons">home</i>')
def index():
form = LoginForm()
if form.validate_on_submit():
user = User.query.filter((User.email == form.user.data.lower()) | (User.username == form.user.data)).first()
if user and user.verify_password(form.password.data):
login_user(user, form.remember_me.data)
flash('You have been logged in')
return redirect(url_for('.dashboard'))
flash('Invalid email/username or password', category='error')
redirect(url_for('.index'))
return render_template(
'main/index.html.j2',
title='nopaque',
form=form
)
@bp.route('/faq')
@register_breadcrumb(bp, '.faq', 'Frequently Asked Questions')
def faq():
return render_template(
'main/faq.html.j2',
title='Frequently Asked Questions'
)
@bp.route('/dashboard')
@register_breadcrumb(bp, '.dashboard', '<i class="material-icons left">dashboard</i>Dashboard')
@login_required
def dashboard():
return render_template(
'main/dashboard.html.j2',
title='Dashboard'
)
# @bp.route('/user_manual')
# @register_breadcrumb(bp, '.user_manual', '<i class="material-icons left">help</i>User manual')
# def user_manual():
# return render_template('main/user_manual.html.j2', title='User manual')
@bp.route('/news')
@register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News')
def news():
return render_template(
'main/news.html.j2',
title='News'
)
@bp.route('/privacy_policy')
@register_breadcrumb(bp, '.privacy_policy', 'Private statement (GDPR)')
def privacy_policy():
return render_template(
'main/privacy_policy.html.j2',
title='Privacy statement (GDPR)'
)
@bp.route('/terms_of_use')
@register_breadcrumb(bp, '.terms_of_use', 'Terms of Use')
def terms_of_use():
return render_template(
'main/terms_of_use.html.j2',
title='Terms of Use'
)
# @bp.route('/social-area')
# @register_breadcrumb(bp, '.social_area', '<i class="material-icons left">group</i>Social Area')
# @login_required
# def social_area():
# corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
# users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
# return render_template(
# 'main/social_area.html.j2',
# title='Social Area',
# corpora=corpora,
# users=users
# )

File diff suppressed because it is too large Load Diff

View File

@ -1,25 +0,0 @@
from flask import Blueprint
from flask_login import login_required
import os
import yaml
services_file = \
os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml')
with open(services_file, 'r') as f:
SERVICES = yaml.safe_load(f)
bp = Blueprint('services', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes # noqa

View File

@ -1,192 +0,0 @@
from flask_wtf import FlaskForm
from flask_login import current_user
from flask_wtf.file import FileField, FileRequired
from wtforms import (
BooleanField,
DecimalRangeField,
MultipleFileField,
SelectField,
StringField,
SubmitField,
ValidationError
)
from wtforms.validators import InputRequired, Length
from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel
from . import SERVICES
class CreateJobBaseForm(FlaskForm):
description = StringField(
'Description',
validators=[InputRequired(), Length(max=255)]
)
title = StringField(
'Title',
validators=[InputRequired(), Length(max=32)]
)
version = SelectField('Version', validators=[InputRequired()])
submit = SubmitField()
class CreateFileSetupPipelineJobForm(CreateJobBaseForm):
images = MultipleFileField('File(s)', validators=[InputRequired()])
def validate_images(form, field):
valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
for image in field.data:
if image.mimetype not in valid_mimetypes:
raise ValidationError('JPEG, PNG and TIFF files only!')
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-file-setup-pipeline-job-form'
service_manifest = SERVICES['file-setup-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[InputRequired()])
ocropus_nlbin_threshold = DecimalRangeField(
render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5], 'disabled': True}
)
def validate_binarization(self, field):
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
if field.data:
if not('methods' in service_info and 'binarization' in service_info['methods']):
raise ValidationError('Binarization is not available')
def validate_pdf(self, field):
if field.data.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-tesseract-ocr-pipeline-job-form'
service_manifest = SERVICES['tesseract-ocr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if self.binarization.render_kw is None:
self.binarization.render_kw = {}
self.binarization.render_kw['disabled'] = True
if self.ocropus_nlbin_threshold.render_kw is None:
self.ocropus_nlbin_threshold.render_kw = {}
self.ocropus_nlbin_threshold.render_kw['disabled'] = True
if 'methods' in service_info:
if 'binarization' in service_info['methods']:
del self.binarization.render_kw['disabled']
if 'ocropus_nlbin_threshold' in service_info['methods']:
del self.ocropus_nlbin_threshold.render_kw['disabled']
user_models = [
x for x in current_user.tesseract_ocr_pipeline_models.order_by(TesseractOCRPipelineModel.title).all()
]
models = [
x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all()
if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
]
self.model.choices = {
'': [('', 'Choose your option')],
'Your models': [(x.hashid, f'{x.title} [{x.version}]') for x in user_models] if user_models else [(0, 'Nothing here yet...')],
'Public models': [(x.hashid, f'{x.title} [{x.version}]') for x in models]
}
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
binarization = BooleanField('Binarization')
pdf = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[InputRequired()])
def validate_binarization(self, field):
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
if field.data:
if(
'methods' not in service_info
or 'binarization' not in service_info['methods']
):
raise ValidationError('Binarization is not available')
def validate_pdf(self, field):
if field.data.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-transkribus-htr-pipeline-job-form'
transkribus_htr_pipeline_models = kwargs.pop('transkribus_htr_pipeline_models', [])
service_manifest = SERVICES['transkribus-htr-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
if self.binarization.render_kw is None:
self.binarization.render_kw = {}
self.binarization.render_kw['disabled'] = True
if 'methods' in service_info:
if 'binarization' in service_info['methods']:
del self.binarization.render_kw['disabled']
self.model.choices = [('', 'Choose your option')]
self.model.choices += [(x['modelId'], x['name']) for x in transkribus_htr_pipeline_models]
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = service_manifest['latest_version']
class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True})
txt = FileField('File', validators=[FileRequired()])
model = SelectField('Model', validators=[InputRequired()])
def validate_encoding_detection(self, field):
service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data]
if field.data:
if(
'methods' not in service_info
or 'encoding_detection' not in service_info['methods']
):
raise ValidationError('Encoding detection is not available')
def validate_txt(form, field):
if field.data.mimetype != 'text/plain':
raise ValidationError('Plain text files only!')
def __init__(self, *args, **kwargs):
if 'prefix' not in kwargs:
kwargs['prefix'] = 'create-spacy-nlp-pipeline-job-form'
service_manifest = SERVICES['spacy-nlp-pipeline']
version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version]
print(service_info)
if self.encoding_detection.render_kw is None:
self.encoding_detection.render_kw = {}
self.encoding_detection.render_kw['disabled'] = True
if 'methods' in service_info:
if 'encoding_detection' in service_info['methods']:
del self.encoding_detection.render_kw['disabled']
user_models = [
x for x in current_user.spacy_nlp_pipeline_models.order_by(SpaCyNLPPipelineModel.title).all()
]
models = [
x for x in SpaCyNLPPipelineModel.query.filter(SpaCyNLPPipelineModel.user != current_user, SpaCyNLPPipelineModel.is_public == True).order_by(SpaCyNLPPipelineModel.title).all()
if version in x.compatible_service_versions
]
self.model.choices = {
'': [('', 'Choose your option')],
'Your models': [(x.hashid, f'{x.title} [{x.version}]') for x in user_models] if user_models else [(0, 'Nothing here yet...')],
'Public models': [(x.hashid, f'{x.title} [{x.version}]') for x in models]
}
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version
self.version.default = version

View File

@ -1,231 +0,0 @@
from flask import abort, current_app, flash, Markup, redirect, render_template, request, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
import requests
from app import db, hashids
from app.models import (
Job,
JobInput,
JobStatus,
TesseractOCRPipelineModel,
SpaCyNLPPipelineModel
)
from . import bp, SERVICES
from .forms import (
CreateFileSetupPipelineJobForm,
CreateTesseractOCRPipelineJobForm,
CreateTranskribusHTRPipelineJobForm,
CreateSpacyNLPPipelineJobForm
)
@bp.route('/services')
@register_breadcrumb(bp, '.', 'Services')
def services():
return redirect(url_for('main.dashboard'))
@bp.route('/file-setup-pipeline', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.file_setup_pipeline', '<i class="nopaque-icons service-icons left" data-service="file-setup-pipeline"></i>File Setup')
def file_setup_pipeline():
service = 'file-setup-pipeline'
service_manifest = SERVICES[service]
version = request.args.get('version', service_manifest['latest_version'])
if version not in service_manifest['versions']:
abort(404)
form = CreateFileSetupPipelineJobForm(prefix='create-job-form', version=version)
if form.is_submitted():
if not form.validate():
response = {'errors': form.errors}
return response, 400
try:
job = Job.create(
title=form.title.data,
description=form.description.data,
service=service,
service_args={},
service_version=form.version.data,
user=current_user
)
except OSError:
abort(500)
for input_file in form.images.data:
try:
JobInput.create(input_file, job=job)
except (AttributeError, OSError):
abort(500)
job.status = JobStatus.SUBMITTED
db.session.commit()
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
return render_template(
'services/file_setup_pipeline.html.j2',
title=service_manifest['name'],
form=form
)
@bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.tesseract_ocr_pipeline', '<i class="nopaque-icons service-icons left" data-service="tesseract-ocr-pipeline"></i>Tesseract OCR Pipeline')
def tesseract_ocr_pipeline():
service_name = 'tesseract-ocr-pipeline'
service_manifest = SERVICES[service_name]
version = request.args.get('version', service_manifest['latest_version'])
if version not in service_manifest['versions']:
abort(404)
form = CreateTesseractOCRPipelineJobForm(prefix='create-job-form', version=version)
if form.is_submitted():
if not form.validate():
response = {'errors': form.errors}
return response, 400
try:
job = Job.create(
title=form.title.data,
description=form.description.data,
service=service_name,
service_args={
'binarization': form.binarization.data,
'model': hashids.decode(form.model.data),
'ocropus_nlbin_threshold': float(form.ocropus_nlbin_threshold.data)
},
service_version=form.version.data,
user=current_user
)
except OSError:
abort(500)
try:
JobInput.create(form.pdf.data, job=job)
except (AttributeError, OSError):
abort(500)
job.status = JobStatus.SUBMITTED
db.session.commit()
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
tesseract_ocr_pipeline_models = [
x for x in TesseractOCRPipelineModel.query.all()
if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
]
user_tesseract_ocr_pipeline_models_count = len(current_user.tesseract_ocr_pipeline_models.all())
return render_template(
'services/tesseract_ocr_pipeline.html.j2',
title=service_manifest['name'],
form=form,
tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
user_tesseract_ocr_pipeline_models_count=user_tesseract_ocr_pipeline_models_count
)
@bp.route('/transkribus-htr-pipeline', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.transkribus_htr_pipeline', '<i class="nopaque-icons service-icons left" data-service="transkribus-htr-pipeline"></i>Transkribus HTR Pipeline')
def transkribus_htr_pipeline():
if not current_app.config.get('NOPAQUE_TRANSKRIBUS_ENABLED'):
abort(404)
service = 'transkribus-htr-pipeline'
service_manifest = SERVICES[service]
version = request.args.get('version', service_manifest['latest_version'])
if version not in service_manifest['versions']:
abort(404)
r = requests.get(
'https://transkribus.eu/TrpServer/rest/models/text',
headers={'Accept': 'application/json'}
)
if r.status_code != 200:
abort(500)
transkribus_htr_pipeline_models = r.json()['trpModelMetadata']
transkribus_htr_pipeline_models.append({'modelId': 48513, 'name': 'Caroline Minuscle', 'language': 'lat', 'isoLanguages': ['lat']})
form = CreateTranskribusHTRPipelineJobForm(
prefix='create-job-form',
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models,
version=version
)
if form.is_submitted():
if not form.validate():
response = {'errors': form.errors}
return response, 400
try:
job = Job.create(
title=form.title.data,
description=form.description.data,
service=service,
service_args={
'binarization': form.binarization.data,
'model': form.model.data
},
service_version=form.version.data,
user=current_user
)
except OSError:
abort(500)
try:
JobInput.create(form.pdf.data, job=job)
except (AttributeError, OSError):
abort(500)
job.status = JobStatus.SUBMITTED
db.session.commit()
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
return render_template(
'services/transkribus_htr_pipeline.html.j2',
title=service_manifest['name'],
form=form,
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models
)
@bp.route('/spacy-nlp-pipeline', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.spacy_nlp_pipeline', '<i class="nopaque-icons service-icons left" data-service="spacy-nlp-pipeline"></i>SpaCy NLP Pipeline')
def spacy_nlp_pipeline():
service = 'spacy-nlp-pipeline'
service_manifest = SERVICES[service]
version = request.args.get('version', SERVICES[service]['latest_version'])
if version not in service_manifest['versions']:
abort(404)
form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version)
spacy_nlp_pipeline_models = SpaCyNLPPipelineModel.query.all()
user_spacy_nlp_pipeline_models_count = len(current_user.spacy_nlp_pipeline_models.all())
if form.is_submitted():
if not form.validate():
response = {'errors': form.errors}
return response, 400
try:
job = Job.create(
title=form.title.data,
description=form.description.data,
service=service,
service_args={
'encoding_detection': form.encoding_detection.data,
'model': form.model.data
},
service_version=form.version.data,
user=current_user
)
except OSError:
abort(500)
try:
JobInput.create(form.txt.data, job=job)
except (AttributeError, OSError):
abort(500)
job.status = JobStatus.SUBMITTED
db.session.commit()
message = Markup(f'Job "<a href="{job.url}">{job.title}</a>" created')
flash(message, 'job')
return {}, 201, {'Location': job.url}
return render_template(
'services/spacy_nlp_pipeline.html.j2',
title=service_manifest['name'],
form=form,
spacy_nlp_pipeline_models=spacy_nlp_pipeline_models,
user_spacy_nlp_pipeline_models_count=user_spacy_nlp_pipeline_models_count
)
@bp.route('/corpus-analysis')
@register_breadcrumb(bp, '.corpus_analysis', '<i class="nopaque-icons service-icons left" data-service="corpus-analysis"></i>Corpus Analysis')
def corpus_analysis():
return render_template(
'services/corpus_analysis.html.j2',
title='Corpus Analysis'
)

View File

@ -1,60 +0,0 @@
# TODO: This could also be done via GitLab/GitHub APIs
file-setup-pipeline:
name: 'File Setup Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.0'
versions:
0.1.0:
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup-pipeline/-/releases/v0.1.0'
tesseract-ocr-pipeline:
name: 'Tesseract OCR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.1'
versions:
0.1.0:
methods:
- 'binarization'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.0'
0.1.1:
methods:
- 'binarization'
- 'ocropus_nlbin_threshold'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
transkribus-htr-pipeline:
name: 'Transkribus HTR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.1'
versions:
0.1.0:
methods:
- 'binarization'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.0'
0.1.1:
methods:
- 'binarization'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.1'
spacy-nlp-pipeline:
name: 'SpaCy NLP Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.2'
versions:
0.1.0:
methods:
- 'encoding_detection'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0'
0.1.1:
methods:
- 'encoding_detection'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'encoding_detection'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'

View File

@ -1,18 +0,0 @@
from flask import Blueprint
from flask_login import login_required
bp = Blueprint('settings', __name__)
@bp.before_request
@login_required
def before_request():
'''
Ensures that the routes in this package can only be visited by users that
are logged in.
'''
pass
from . import routes

View File

@ -1,12 +0,0 @@
from flask import g, url_for
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
from app.users.settings.routes import settings as settings_route
from . import bp
@bp.route('/settings', methods=['GET', 'POST'])
@register_breadcrumb(bp, '.', '<i class="material-icons left">settings</i>Settings')
def settings():
g._nopaque_redirect_location_on_post = url_for('.settings')
return settings_route(current_user.id)

View File

@ -1,290 +0,0 @@
/// Map deep get
/// @author Kitty Giraudel
/// @access public
/// @param {Map} $map - Map
/// @param {Arglist} $keys - Key chain
/// @return {*} - Desired value
@function map-deep-get($map, $keys...) {
@each $key in $keys {
$map: map-get($map, $key);
}
@return $map;
}
$color: (
"baseline": (
"primary": #00426f,
"primary-variant": #1a5c89,
"secondary": #00426f,
"secondary-variant": #1a5c89,
"background": #ffffff,
"surface": #ffffff,
"error": #b00020
),
"social-area": (
"base": #d6ae86,
"darken": #C98536,
"lighten": #EAE2DB
),
"service": (
"corpus-analysis": (
"base": #aa9cc9,
"darken": #6b3f89,
"lighten": #ebe8f6
),
"file-setup-pipeline": (
"base": #d5dc95,
"darken": #a1b300,
"lighten": #f2f3e1
),
"spacy-nlp-pipeline": (
"base": #98acd2,
"darken": #0064a3,
"lighten": #e5e8f5
),
"tesseract-ocr-pipeline": (
"base": #a9d8c8,
"darken": #00a58b,
"lighten": #e7f4f1
),
"transkribus-htr-pipeline": (
"base": #607d8b,
"darken": #37474f,
"lighten": #cfd8dc
)
),
"status": (
"corpus": (
"UNPREPARED": #9e9e9e,
"QUEUED": #2196f3,
"BUILDING": #ffc107,
"BUILT": #4caf50,
"FAILED": #f44336,
"STARTING_ANALYSIS_SESSION": #2196f3,
"RUNNING_ANALYSIS_SESSION": #4caf50,
"CANCELING_ANALYSIS_SESSION": #ff5722
),
"job": (
"INITIALIZING": #9e9e9e,
"SUBMITTED": #9e9e9e,
"QUEUED": #2196f3,
"RUNNING": #ffc107,
"CANCELING": #ff5722,
"CANCELED": #ff5722,
"COMPLETED": #4caf50,
"FAILED": #f44336
)
),
"s-attr": (
"PERSON": #a6e22d,
"PER": #a6e22d,
"NORP": #ef60b4,
"FACILITY": #43c6fc,
"ORG": #43c6fc,
"GPE": #fd9720,
"LOC": #fd9720,
"PRODUCT": #a99dfb,
"MISC": #a99dfb,
"EVENT": #fc0,
"WORK_OF_ART": #fc0,
"LANGUAGE": #fc0,
"DATE": #2fbbab,
"TIME": #2fbbab,
"PERCENT": #bbb,
"MONEY": #bbb,
"QUANTITY": #bbb,
"ORDINAL": #bbb,
"CARDINAL": #bbb
)
);
@each $key, $color-code in map-get($color, "baseline") {
.#{$key}-color {
background-color: $color-code !important;
}
.#{$key}-color-border {
border-color: $color-code !important;
}
.#{$key}-color-text {
color: $color-code !important;
}
}
@each $key, $color-code in map-get($color, "social-area") {
.social-area-color-#{$key} {
background-color: $color-code !important;
}
.social-area-color-border-#{$key} {
border-color: $color-code !important;
}
}
@each $service-name, $color-palette in map-get($color, "service") {
.service-color[data-service="#{$service-name}"] {
background-color: map-get($color-palette, "base") !important;
&.darken {
background-color: map-get($color-palette, "darken") !important;
}
&.lighten {
background-color: map-get($color-palette, "lighten") !important;
}
}
.service-color-border[data-service="#{$service-name}"] {
border-color: map-get($color-palette, "base") !important;
&.border-darken {
border-color: map-get($color-palette, "darken") !important;
}
&.border-lighten {
border-color: map-get($color-palette, "lighten") !important;
}
}
.service-color-text[data-service="#{$service-name}"] {
color: map-get($color-palette, "base") !important;
&.text-darken {
color: map-get($color-palette, "darken") !important;
}
&.text-lighten {
color: map-get($color-palette, "lighten") !important;
}
}
.service-scheme[data-service="#{$service-name}"] {
background-color: map-get($color-palette, "lighten");
.btn, .btn-small, .btn-large, .btn-floating {
background-color: map-get($color-palette, "darken");
&:hover {
background-color: map-get($color-palette, "base");
}
}
.pagination {
li.active {
background-color: map-get($color-palette, "darken");
}
}
.table-of-contents {
a.active, a:hover {
border-color: map-get($color-palette, "darken");
}
}
.tabs {
.tab {
&.disabled {
a {
color: inherit;
&:hover {
color: change-color(map-get($color-palette, "darken"), $alpha: 0.15);
}
}
}
a {
color: inherit;
&:focus, &:hover, &.active {
color: map-get($color-palette, "darken");
}
&:focus, &.active, &.active:focus {
background-color: change-color(map-get($color-palette, "darken"), $alpha: 0.15);
}
}
}
.indicator {
background-color: map-get($color-palette, "darken");
}
}
}
}
@each $ressource-name, $color-palette in map-get($color, "status") {
@each $key, $color-code in $color-palette {
.#{$ressource-name}-status-color[data-status="#{$key}"] {
background-color: $color-code !important;
}
.#{$ressource-name}-status-color-border[data-status="#{$key}"] {
border-color: $color-code !important;
}
.#{$ressource-name}-status-color-text[data-status="#{$key}"] {
color: $color-code !important;
}
}
}
@each $key, $color-code in map-get($color, "s-attr") {
.chip.s-attr[data-s-attr-type="ent"][data-s-attr-ent-type="#{$key}"] {
background-color: $color-code !important;
}
}
main {
.btn, .btn-small, .btn-large, .btn-floating {
background-color: map-deep-get($color, "baseline", "secondary");
&:hover {
background-color: map-deep-get($color, "baseline", "secondary-variant");
}
}
.pagination {
li.active {
background-color: map-deep-get($color, "baseline", "secondary");
}
}
.table-of-contents {
a.active, a:hover {
border-color: map-deep-get($color, "baseline", "secondary");
}
}
.tabs {
.tab {
&.disabled {
a {
color: inherit;
&:hover {
color: change-color(map-deep-get($color, "baseline", "secondary"), $alpha: 0.15);
}
}
}
a {
color: inherit;
&:focus, &:hover, &.active {
color: map-deep-get($color, "baseline", "secondary");
}
&:focus, &.active, &.active:focus {
background-color: change-color(map-deep-get($color, "baseline", "secondary"), $alpha: 0.15);
}
}
}
.indicator {
background-color: map-deep-get($color, "baseline", "secondary");
}
}
}

View File

@ -1,31 +0,0 @@
/*
* Spacing
*/
$spacing-shortcuts: ("margin": "mg", "padding": "pd");
$spacing-directions: ("top": "t", "right": "r", "bottom": "b", "left": "l");
$spacing-values: ("0": 0, "1": 0.25rem, "2": 0.5rem, "3": 0.75rem, "4": 1rem, "5": 1.5rem, "6": 3rem, "auto": auto);
@each $spacing-shortcut-name, $spacing-shortcut-value in $spacing-shortcuts {
@each $spacing-name, $spacing-value in $spacing-values {
// All directions
.#{$spacing-shortcut-value}-#{$spacing-name} {
#{$spacing-shortcut-name}: $spacing-value !important;
}
// Horizontal axis
.#{$spacing-shortcut-value}x-#{$spacing-name} {
#{$spacing-shortcut-name}-left: $spacing-value !important;
#{$spacing-shortcut-name}-right: $spacing-value !important;
}
// Vertical axis
.#{$spacing-shortcut-value}y-#{$spacing-name} {
#{$spacing-shortcut-name}-top: $spacing-value !important;
#{$spacing-shortcut-name}-bottom: $spacing-value !important;
}
// Cardinal directions
@each $spacing-direction-name, $spacing-direction-value in $spacing-directions {
.#{$spacing-shortcut-value}#{$spacing-direction-value}-#{$spacing-name} {
#{$spacing-shortcut-name}-#{$spacing-direction-name}: $spacing-value !important;
}
}
}
}

View File

@ -1,8 +0,0 @@
.parallax-container .parallax {
z-index: 0;
}
.autocomplete-content {
width: 100% !important;
left: 0 !important;
}

View File

@ -1,12 +0,0 @@
/*
* The sidenav-fixed class is used which causes the sidenav to be fixed and open
* on large screens and hides to the regular functionality on smaller screens.
* In order to prevent the sidenav to overlap the content, the content (in our
* case header, main and footer) gets an offset equal to the width of the
* sidenav.
*/
@media only screen and (min-width : 993px) {
header, main, footer {padding-left: 300px;}
.modal:not(.bottom-sheet) {left: 300px;}
.navbar-fixed > nav {width: calc(100% - 300px)}
}

View File

@ -1,18 +0,0 @@
/*
* Sticky Footer: https://materializecss.com/footer.html#sticky-footer
* A sticky footer always stays on the bottom of the page regardless of how
* little content is on the page. However, this footer will be pushed down if
* there is a lot of content, so it is different from a fixed footer.
*
* Note: This may cause issues in Internet Explorer which has weak support for
* flexbox.
*/
body {
display: flex;
flex-direction: column;
min-height: 100vh;
}
main {
flex: 1 0 auto;
}

View File

@ -1,33 +0,0 @@
@font-face {
font-family: 'Nopaque Icons';
font-style: normal;
font-weight: 400;
src: local('nopaque Icons'),
local('NopaqueIcons-Regular'),
url(../fonts/nopaque_icons/NopaqueIcons-Regular.otf) format('opentype');
}
.nopaque-icons {
font-family: 'Nopaque Icons';
font-weight: normal;
font-style: normal;
font-size: 24px; /* Preferred icon size */
display: inline-block;
line-height: 1;
text-transform: none;
letter-spacing: normal;
word-wrap: normal;
white-space: nowrap;
direction: ltr;
/* Support for all WebKit browsers. */
-webkit-font-smoothing: antialiased;
/* Support for Safari and Chrome. */
text-rendering: optimizeLegibility;
/* Support for Firefox. */
-moz-osx-font-smoothing: grayscale;
/* Support for IE. */
font-feature-settings: 'liga';
}

View File

@ -1,146 +0,0 @@
.modal-conent {
overflow-x: hidden;
}
#concordance-query-builder {
width: 70%;
}
#concordance-query-builder nav {
background-color: #6B3F89;
margin-top: -25px;
margin-left: -25px;
width: 105%;
}
#query-builder-nav{
padding-left: 15px;
}
#close-query-builder {
margin-right: 50px;
cursor: pointer;
}
#general-options-query-builder-tutorial-info-icon {
color: black;
}
#your-query {
border-bottom-style: solid;
border-bottom-width: 1px;
}
#insert-query-button {
background-color: #00426f;
text-align: center;
}
#structural-attr h6 {
margin-left: 15px;
}
#add-structural-attribute-tutorial-info-icon {
color: black;
}
#sentence {
background-color:#FD9720;
}
#entity {
background-color: #A6E22D;
}
#text-annotation {
background-color: #2FBBAB;
}
#no-value-metadata-message {
padding-top: 25px;
margin-left: -20px;
}
#token-kind-selector {
background-color: #f2eff7;
padding: 15px;
border-top-style: solid;
border-color: #6B3F89;
}
#token-kind-selector.s5 {
margin-top: 15px;
}
#token-kind-selector h6 {
margin-left: 15px;
}
#token-tutorial-info-icon {
color: black;
}
#no-value-message {
padding-top: 25px;
margin-left: -20px;
}
#token-edit-options h6 {
margin-left: 15px;
}
#edit-options-tutorial-info-icon {
color: black;
}
#incidence-modifiers-button a{
background-color: #2FBBAB;
}
#incidence-modifiers a{
background-color: white;
}
#ignore-case {
margin-left: 5px;
}
#or, #and {
background-color: #fc0;
}
#betweenNM {
width: 60%;
}
#query-builder-tutorial-modal {
width: 60%;
}
#query-builder-tutorial-modal ul {
margin-top: 10px;
}
#query-builder-tutorial {
padding:15px;
}
#scroll-up-button-query-builder-tutorial {
background-color: #28B3D1;
}
[data-type="start-sentence"], [data-type="end-sentence"] {
background-color: #FD9720;
}
[data-type="start-empty-entity"], [data-type="start-entity"], [data-type="end-entity"] {
background-color: #A6E22D;
}
[data-type="start-text-annotation"]{
background-color: #2FBBAB;
}
[data-type="token"] {
background-color: #28B3D1;
}

View File

@ -1,67 +0,0 @@
/* Change navbar height bacause an extended and fixed navbar is used */
.navbar-fixed {
height: 112px;
}
/* Change placholdertext color of file uplaod fields */
::placeholder {
color: #9e9e9e;
opacity: 1;
}
/*
* changes preoloader size etc. to fit visually better with the chip status
* indicator of jobs
*/
.status-spinner {
margin-bottom: -10px;
width: 30px !important;
height: 30px !important;
}
#manual-modal .manual-chapter-title {
display: none;
}
.show-if-only-child:not(:only-child) {
display: none !important;
}
.btn-scale-x2 {
transform: scale(2);
}
.btn-scale-x2 .nopaque-icons.service-icons {
font-size: 2.5rem;
}
/* Fix material icon vertical alignment when nested in various elements */
h1 .nopaque-icons, h2 .nopaque-icons, h3 .nopaque-icons, h4 .nopaque-icons, .tab .nopaque-icons, .tab .material-icons {
line-height: inherit;
}
.corpus-status-text, .job-status-text {text-transform: lowercase;}
.corpus-status-text[data-status]:empty::before, .job-status-text[data-status]:empty::before {content: attr(data-status);}
.service-scheme[data-service="file-setup-pipeline"] .nopaque-icons.service-icons[data-service="inherit"]:empty::before {content: "E";}
.service-scheme[data-service="tesseract-ocr-pipeline"] .nopaque-icons.service-icons[data-service="inherit"]:empty::before {content: "F";}
.service-scheme[data-service="transkribus-htr-pipeline"] .nopaque-icons.service-icons[data-service="inherit"]:empty::before {content: "F";}
.service-scheme[data-service="spacy-nlp-pipeline"] .nopaque-icons.service-icons[data-service="inherit"]:empty::before {content: "G";}
.service-scheme[data-service="corpus-analysis"] .nopaque-icons.service-icons[data-service="inherit"]:empty::before {content: "H";}
.nopaque-icons.service-icons[data-service="file-setup-pipeline"]:empty::before {content: "E";}
.nopaque-icons.service-icons[data-service="tesseract-ocr-pipeline"]:empty::before {content: "F";}
.nopaque-icons.service-icons[data-service="transkribus-htr-pipeline"]:empty::before {content: "F";}
.nopaque-icons.service-icons[data-service="spacy-nlp-pipeline"]:empty::before {content: "G";}
.nopaque-icons.service-icons[data-service="corpus-analysis"]:empty::before {content: "H";}
[draggable="true"] {cursor: move !important;}
.clickable {cursor: pointer !important;}
.chip.s-attr .chip.p-attr {background-color: inherit;}
.width-25 {width: 25%;}
.width-50 {width: 50%;}
.width-75 {width: 75%;}
.width-100 {width: 100%;}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

Some files were not shown because too many files have changed in this diff Show More