Compare commits
428 Commits
525723818e
...
manual
Author | SHA1 | Date | |
---|---|---|---|
|
48fe7c0702 | ||
|
5a2723b617 | ||
|
4425d50140 | ||
|
39113a6f17 | ||
|
a53f1d216b | ||
|
ffd7a3ad91 | ||
|
5dce269736 | ||
|
13369296d3 | ||
|
4f6e1c121f | ||
|
438a257fe3 | ||
|
2e88d7d035 | ||
|
b338c33d42 | ||
|
d6cebddd92 | ||
|
07fda0e95a | ||
|
3927d9e4cd | ||
|
8f5d5ffdec | ||
|
f02d1619e2 | ||
|
892f1f799e | ||
|
f5e98ae655 | ||
|
f790106e0e | ||
|
c57acc73d2 | ||
|
678a0767b7 | ||
|
17a9338d9f | ||
|
a7cbce1eda | ||
|
fa28c875e1 | ||
|
0927edcceb | ||
|
9c22370eea | ||
|
bdcc80a66f | ||
|
9be5ce6014 | ||
|
00e4c3ade3 | ||
|
79a16cae83 | ||
|
c5aea0be94 | ||
|
afcb890ccf | ||
|
9627708950 | ||
|
1bb1408988 | ||
|
79bafdea89 | ||
|
a2d617718b | ||
|
691b2de5b2 | ||
|
eb0e7c9ba1 | ||
|
ab132746e7 | ||
|
ae5646512d | ||
|
fc66327920 | ||
|
9bfc96ad41 | ||
|
008938b46b | ||
|
4f24e9f9da | ||
|
d0fe4360bb | ||
|
1c18806c9c | ||
|
9487aa7a60 | ||
|
6559051fd5 | ||
|
0882e085a3 | ||
|
ff1bcb40f3 | ||
|
d298b200dc | ||
|
660d7ebc99 | ||
|
df33c7b36d | ||
|
bf8b22fb58 | ||
|
b216ad8a40 | ||
|
4822f6ec02 | ||
|
61be3345be | ||
|
e9ddb85f03 | ||
|
e3166ca54c | ||
|
0565f309f8 | ||
|
1f40002249 | ||
|
1ff9c8bfe3 | ||
|
e8fe67d290 | ||
|
fbb32ef580 | ||
|
985e9b406f | ||
|
0abfe65afa | ||
|
f4d3415c11 | ||
|
965f2854b2 | ||
|
f101a742a9 | ||
|
c046fbfb1e | ||
|
8997d3ad67 | ||
|
bf249193af | ||
|
c40e428eb2 | ||
|
4daf3359b9 | ||
|
d875623a8c | ||
|
067318bb89 | ||
|
a9203cc409 | ||
|
78dd375ef8 | ||
|
82cd384e5f | ||
|
c7dab5e502 | ||
|
d3cfd2cfaf | ||
|
14c10aeab1 | ||
|
2dec17b1b9 | ||
|
9fe38fab52 | ||
|
e20dd01710 | ||
|
1b974f0bbc | ||
|
c6be72d0a7 | ||
|
d3f2d5648e | ||
|
7cae84ffdc | ||
|
1d6834302d | ||
|
53f4400731 | ||
|
f36600f06c | ||
|
068211a72b | ||
|
f566e276a1 | ||
|
c605613d86 | ||
|
d1fc425f48 | ||
|
b8ae221987 | ||
|
b50147a66a | ||
|
18311c8c9c | ||
|
2dc54f4258 | ||
|
bcdc3721ef | ||
|
60bcaa9e01 | ||
|
af89a5776f | ||
|
fcbf9c8cb6 | ||
|
cc6ce6e1f3 | ||
|
4581367d04 | ||
|
d7f00f6337 | ||
|
86947e2cf8 | ||
|
4a9a03e648 | ||
|
45369d4c84 | ||
|
f56e951b71 | ||
|
d776e11fe5 | ||
|
9200837e63 | ||
|
aad347caa0 | ||
|
9ccab8657a | ||
|
fe7f69d596 | ||
|
8a5c94f448 | ||
|
3d38e550a0 | ||
|
1387d80a26 | ||
|
5c00c5740e | ||
|
04575b78cf | ||
|
2951fc6966 | ||
|
bf0213edbc | ||
|
c843fbb437 | ||
|
1dc7d2a1c6 | ||
|
173aea7df4 | ||
|
f1962b3b47 | ||
|
dd04623278 | ||
|
5e8008399d | ||
|
0d92f221cb | ||
|
766c5ba27d | ||
|
661ac7c509 | ||
|
3b390858ff | ||
|
ae8e383085 | ||
|
d0c6b2b9e5 | ||
|
8277e60689 | ||
|
8b887d79ef | ||
|
983400b925 | ||
|
37f9e1281d | ||
|
5eef2292e7 | ||
|
351da5d4e9 | ||
|
27fe4a95e4 | ||
|
0627b27ec7 | ||
|
adfd229e66 | ||
|
ae6a7cb86d | ||
|
2dd6015ba6 | ||
|
f80b635ca3 | ||
|
0e8a87d34e | ||
|
ccf7f449dd | ||
|
dd05657362 | ||
|
cef82d9001 | ||
|
656eef17db | ||
|
104c2fe468 | ||
|
d08f95e944 | ||
|
87e2c2b484 | ||
|
7a925b6a19 | ||
|
e4f435c5ee | ||
|
7721926d6c | ||
|
691d4757ff | ||
|
6c744fc3ba | ||
|
e46f0032bd | ||
|
9da1a6e987 | ||
|
8182cccecd | ||
|
d898cd8516 | ||
|
4ae4b88a44 | ||
|
b7483af8e9 | ||
|
41d8dbad5d | ||
|
203faa4257 | ||
|
960f36c740 | ||
|
c3834ca400 | ||
|
572fdf3a00 | ||
|
22b43a689f | ||
|
deec9e8a76 | ||
|
688b96ffee | ||
|
a9973e9c8e | ||
|
413b6111df | ||
|
a9f05fffdf | ||
|
7936ac270b | ||
|
1eabf18b13 | ||
|
94dc25750c | ||
|
beb157092e | ||
|
1cd9540e5b | ||
|
912bd7da07 | ||
|
e21ef2422d | ||
|
c52c966863 | ||
|
a7a948908f | ||
|
3a97b1a07a | ||
|
315b538c30 | ||
|
07103ee4e5 | ||
|
efa8712cd9 | ||
|
e816a2fb15 | ||
|
6c31788402 | ||
|
1c98c5070a | ||
|
1e33366820 | ||
|
71013f1dc5 | ||
|
142c82cc36 | ||
|
f84ac48975 | ||
|
2739dc4b4f | ||
|
eb2abf8282 | ||
|
529c778772 | ||
|
be51044059 | ||
|
e194ce7541 | ||
|
19e01d6709 | ||
|
d6e17e1554 | ||
|
11b697145b | ||
|
11e1789d83 | ||
|
f037c31b88 | ||
|
972f514e6b | ||
|
e6d8d72e52 | ||
|
91e68360ac | ||
|
c35b2f8674 | ||
|
71359523ba | ||
|
cc508cf4eb | ||
|
b7ca2a2cf6 | ||
|
21d6072f6f | ||
|
b8bcb159a2 | ||
|
15e7fa6dd3 | ||
|
589c4a6c56 | ||
|
9ffc41a133 | ||
|
4944d31dd5 | ||
|
62d20409ea | ||
|
8aeafc33bd | ||
|
a54ff2e35a | ||
|
e93449ba73 | ||
|
c2471e1848 | ||
|
4c277cd685 | ||
|
d6789a0388 | ||
|
793de849ef | ||
|
f4b30433e6 | ||
|
c2a6b9d746 | ||
|
59950aba5b | ||
|
d619795815 | ||
|
0e786803ee | ||
|
ac2f27150b | ||
|
526fd1769e | ||
|
86318b9a7d | ||
|
e326e1ab81 | ||
|
2f3ddc6b81 | ||
|
44d98dfa46 | ||
|
6648b3548b | ||
|
367c2ce5e0 | ||
|
5cc07f2e13 | ||
|
fab259522e | ||
|
b795cfa891 | ||
|
184e78ef0b | ||
|
202700b129 | ||
|
49ff1aa284 | ||
|
ce32b03f4a | ||
|
fbf663fee3 | ||
|
66a54dae10 | ||
|
60a59383c7 | ||
|
0cf955bd2f | ||
|
1c47d2a346 | ||
|
336bbc39e4 | ||
|
595bda98ef | ||
|
91e42d6d92 | ||
|
8c935820e8 | ||
|
e4593d5922 | ||
|
c3306563f0 | ||
|
47b9a90cb6 | ||
|
b07addc5c3 | ||
|
8a85dd9e61 | ||
|
c6db277436 | ||
|
6c76d27a32 | ||
|
817a13dfff | ||
|
078f88d4ec | ||
|
2768a96133 | ||
|
69b5e9b48b | ||
|
a844cdb45b | ||
|
8dd3669af4 | ||
|
e67dc49976 | ||
|
8538fc705f | ||
|
144bb38d75 | ||
|
ac47a9e57f | ||
|
0de14ea5db | ||
|
98b1c15aa0 | ||
|
8e7d44ec57 | ||
|
4ca2c0c873 | ||
|
2fd7e35b99 | ||
|
3a2295487c | ||
|
77fc8a42f1 | ||
|
6abf119c0c | ||
|
719e6da9c8 | ||
|
ca25ea0b80 | ||
|
477e583be9 | ||
|
ee82dafb7c | ||
|
423709b4eb | ||
|
a27caaa8a2 | ||
|
87798f4781 | ||
|
6e6fa49f79 | ||
|
f1d8b81923 | ||
|
c3d429ed83 | ||
|
4d2d4fcc40 | ||
|
2289106ac7 | ||
|
451a0a3955 | ||
|
9167bffa61 | ||
|
491e24f0a3 | ||
|
43751b44ac | ||
|
cff4b2c588 | ||
|
cca0185500 | ||
|
5c776e0fb6 | ||
|
9ce5ff8cba | ||
|
35b239877a | ||
|
e4a8ad911f | ||
|
9b2353105e | ||
|
9de09519d6 | ||
|
b41436c844 | ||
|
09b3afc880 | ||
|
df870c1c7d | ||
|
020de69e45 | ||
|
9e58578761 | ||
|
5b6eae7645 | ||
|
57813b4bc2 | ||
|
622d32fa45 | ||
|
a676475b55 | ||
|
f2bbcdc441 | ||
|
685db81c85 | ||
|
575eeae94a | ||
|
3d4403e997 | ||
|
333e6b268e | ||
|
ff4406aaae | ||
|
41096445a6 | ||
|
823e42faf0 | ||
|
faf5a61808 | ||
|
f8e94a721f | ||
|
7ea7b6d7a7 | ||
|
268d00ce72 | ||
|
666397046d | ||
|
8d21bfe434 | ||
|
743ed52fd6 | ||
|
0f8c1b1cb4 | ||
|
bd86a71222 | ||
|
464ae8ecc3 | ||
|
fac6ba11ed | ||
|
0520caeddd | ||
|
6e9a6fa5a1 | ||
|
4c97929b1b | ||
|
2efc9533ec | ||
|
f1be57e509 | ||
|
777151b2bf | ||
|
c14abf5200 | ||
|
b8e63d2342 | ||
|
8dba78c474 | ||
|
8aebe27aa8 | ||
|
a9767bf3c3 | ||
|
c91004d6ba | ||
|
bac526b927 | ||
|
90ac30bba3 | ||
|
4c05c6cc18 | ||
|
018805a1b6 | ||
|
646f735ab2 | ||
|
eec056e010 | ||
|
f348d1ed23 | ||
|
e03b5258ef | ||
|
ca53974e50 | ||
|
5c2225c43e | ||
|
a1af3e34d2 | ||
|
7e54d56ed5 | ||
|
bda18e64c8 | ||
|
b6f155a06b | ||
|
ecb577628b | ||
|
6ba3f9c849 | ||
|
2529dfeb62 | ||
|
c589fd1f78 | ||
|
21819bfd9b | ||
|
aaf14a9952 | ||
|
57a598ed20 | ||
|
3789f61ca4 | ||
|
42b421c2e0 | ||
|
d6fcaa97bf | ||
|
465a7dc0af | ||
|
59de68e6fa | ||
|
5f27ce2801 | ||
|
92cc2cd419 | ||
|
fdad10991c | ||
|
53bba2afb0 | ||
|
6bb4594937 | ||
|
0d7fca9b0b | ||
|
0e7e5933cc | ||
|
09fdad2162 | ||
|
9272150212 | ||
|
cb830a6f9b | ||
|
7770d4d478 | ||
|
cfa4fa68f2 | ||
|
b98e30022e | ||
|
4fb5f2f2dc | ||
|
fecbb50d39 | ||
|
8a55ce902e | ||
|
b364480de6 | ||
|
e11b2e3c1a | ||
|
2dc7efbc8d | ||
|
c01068e96b | ||
|
73cb566db2 | ||
|
145b80356d | ||
|
ed195af6a2 | ||
|
b1586b3679 | ||
|
ec6d0a6477 | ||
|
d2828cabbe | ||
|
8b01777318 | ||
|
e2ddbf26f1 | ||
|
3147bed90a | ||
|
c565b08f9c | ||
|
ff3ac3658f | ||
|
17ec3e292a | ||
|
122cce98a1 | ||
|
cb31afe723 | ||
|
d0b369efaf | ||
|
b27a1051af | ||
|
0609e2cd72 | ||
|
1d85e96d3a | ||
|
132875bb34 | ||
|
9e5bb7ad90 | ||
|
38d09a3490 | ||
|
a459d6607a | ||
|
5881588160 | ||
|
3ad942f17b | ||
|
1be8a449fe | ||
|
288014969a | ||
|
68dc8de476 | ||
|
4fab75f0e2 | ||
|
726e781692 | ||
|
d699fd09e5 | ||
|
ff238cd823 | ||
|
8d70e93856 | ||
|
8168a2384f | ||
|
2dc41fd387 | ||
|
5837e05024 |
@@ -8,5 +8,6 @@
|
||||
!.flaskenv
|
||||
!boot.sh
|
||||
!config.py
|
||||
!docker-nopaque-entrypoint.sh
|
||||
!nopaque.py
|
||||
!requirements.txt
|
||||
|
210
.env.tpl
@@ -1,204 +1,32 @@
|
||||
################################################################################
|
||||
# Docker #
|
||||
################################################################################
|
||||
# DEFAULT: ./data
|
||||
# NOTE: Use `.` as <project-basedir>
|
||||
# HOST_DATA_DIR=
|
||||
|
||||
# Example: 1000
|
||||
##############################################################################
|
||||
# Variables for use in Docker Compose YAML files #
|
||||
##############################################################################
|
||||
# HINT: Use this bash command `id -u`
|
||||
# NOTE: 0 (= root user) is not allowed
|
||||
HOST_UID=
|
||||
|
||||
# Example: 1000
|
||||
# HINT: Use this bash command `id -g`
|
||||
HOST_GID=
|
||||
|
||||
# Example: 999
|
||||
# HINT: Use this bash command `getent group docker | cut -d: -f3`
|
||||
HOST_DOCKER_GID=
|
||||
|
||||
# DEFAULT: ./logs
|
||||
# NOTES: Use `.` as <project-basedir>
|
||||
# HOST_LOG_DIR=
|
||||
# DEFAULT: nopaque
|
||||
# DOCKER_DEFAULT_NETWORK_NAME=
|
||||
|
||||
# DEFAULT: nopaque_default
|
||||
# DOCKER_NETWORK_NAME=
|
||||
# DEFAULT: ./volumes/db/data
|
||||
# NOTE: Use `.` as <project-basedir>
|
||||
# DOCKER_DB_SERVICE_DATA_VOLUME_SOURCE_PATH=
|
||||
|
||||
################################################################################
|
||||
# Flask #
|
||||
# https://flask.palletsprojects.com/en/1.1.x/config/ #
|
||||
################################################################################
|
||||
# CHOOSE ONE: http, https
|
||||
# DEFAULT: http
|
||||
# PREFERRED_URL_SCHEME=
|
||||
# DEFAULT: ./volumes/mq/data
|
||||
# NOTE: Use `.` as <project-basedir>
|
||||
# DOCKER_MQ_SERVICE_DATA_VOLUME_SOURCE_PATH=
|
||||
|
||||
# DEFAULT: hard to guess string
|
||||
# HINT: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"`
|
||||
# SECRET_KEY=
|
||||
# NOTE: This must be a network share and it must be available on all
|
||||
# Docker Swarm nodes, mounted to the same path with the same
|
||||
# user and group ownership.
|
||||
DOCKER_NOPAQUE_SERVICE_DATA_VOLUME_SOURCE_PATH=
|
||||
|
||||
# DEFAULT: localhost:5000
|
||||
# Example: nopaque.example.com/nopaque.example.com:5000
|
||||
# HINT: If your instance is publicly available on a different Port then 80/443,
|
||||
# you will have to add this to the server name
|
||||
# SERVER_NAME=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# HINT: Set to true if you redirect http to https
|
||||
# SESSION_COOKIE_SECURE=
|
||||
|
||||
|
||||
################################################################################
|
||||
# Flask-Assets #
|
||||
# https://webassets.readthedocs.io/en/latest/ #
|
||||
################################################################################
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# ASSETS_DEBUG=
|
||||
|
||||
|
||||
################################################################################
|
||||
# Flask-Hashids #
|
||||
# https://github.com/Pevtrick/Flask-Hashids #
|
||||
################################################################################
|
||||
# DEFAULT: 16
|
||||
# HASHIDS_MIN_LENGTH=
|
||||
|
||||
# NOTE: Use this bash command `python -c "import uuid; print(uuid.uuid4().hex)"`
|
||||
# It is strongly recommended that this is NEVER the same as the SECRET_KEY
|
||||
HASHIDS_SALT=
|
||||
|
||||
|
||||
################################################################################
|
||||
# Flask-Login #
|
||||
# https://flask-login.readthedocs.io/en/latest/ #
|
||||
################################################################################
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# HINT: Set to true if you redirect http to https
|
||||
# REMEMBER_COOKIE_SECURE=
|
||||
|
||||
|
||||
################################################################################
|
||||
# Flask-Mail #
|
||||
# https://pythonhosted.org/Flask-Mail/ #
|
||||
################################################################################
|
||||
# EXAMPLE: nopaque Admin <nopaque@example.com>
|
||||
MAIL_DEFAULT_SENDER=
|
||||
|
||||
MAIL_PASSWORD=
|
||||
|
||||
# EXAMPLE: smtp.example.com
|
||||
MAIL_SERVER=
|
||||
|
||||
# EXAMPLE: 587
|
||||
MAIL_PORT=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# MAIL_USE_SSL=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# MAIL_USE_TLS=
|
||||
|
||||
# EXAMPLE: nopaque@example.com
|
||||
MAIL_USERNAME=
|
||||
|
||||
|
||||
################################################################################
|
||||
# Flask-SQLAlchemy #
|
||||
# https://flask-sqlalchemy.palletsprojects.com/en/2.x/config/ #
|
||||
################################################################################
|
||||
# DEFAULT: 'sqlite:///<nopaque-basedir>/data.sqlite'
|
||||
# NOTE: Use `.` as <nopaque-basedir>,
|
||||
# Don't use a SQLite database when using Docker
|
||||
# SQLALCHEMY_DATABASE_URI=
|
||||
|
||||
|
||||
################################################################################
|
||||
# nopaque #
|
||||
################################################################################
|
||||
# An account is registered with this email adress gets automatically assigned
|
||||
# the administrator role.
|
||||
# EXAMPLE: admin.nopaque@example.com
|
||||
NOPAQUE_ADMIN=
|
||||
|
||||
# DEFAULT: /mnt/nopaque
|
||||
# NOTE: This must be a network share and it must be available on all Docker
|
||||
# Swarm nodes
|
||||
# NOPAQUE_DATA_DIR=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: True
|
||||
# NOPAQUE_IS_PRIMARY_INSTANCE=
|
||||
|
||||
# transport://[userid:password]@hostname[:port]/[virtual_host]
|
||||
NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI=
|
||||
|
||||
# NOTE: Get these from the nopaque development team
|
||||
NOPAQUE_DOCKER_REGISTRY_USERNAME=
|
||||
NOPAQUE_DOCKER_REGISTRY_PASSWORD=
|
||||
|
||||
# DEFAULT: %Y-%m-%d %H:%M:%S
|
||||
# NOPAQUE_LOG_DATE_FORMAT=
|
||||
|
||||
# DEFAULT: [%(asctime)s] %(levelname)s in %(pathname)s (function: %(funcName)s, line: %(lineno)d): %(message)s
|
||||
# NOPAQUE_LOG_FORMAT=
|
||||
|
||||
# DEFAULT: INFO
|
||||
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
|
||||
# NOPAQUE_LOG_LEVEL=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: True
|
||||
# NOPAQUE_LOG_FILE_ENABLED=
|
||||
|
||||
# DEFAULT: <nopaque-basedir>/logs
|
||||
# NOTE: Use `.` as <nopaque-basedir>
|
||||
# NOPAQUE_LOG_FILE_DIR=
|
||||
|
||||
# DEFAULT: NOPAQUE_LOG_LEVEL
|
||||
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
|
||||
# NOPAQUE_LOG_FILE_LEVEL=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# NOPAQUE_LOG_STDERR_ENABLED=
|
||||
|
||||
# CHOOSE ONE: CRITICAL, ERROR, WARNING, INFO, DEBUG
|
||||
# DEFAULT: NOPAQUE_LOG_LEVEL
|
||||
# NOPAQUE_LOG_STDERR_LEVEL=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# HINT: Set this to True only if you are using a proxy in front of nopaque
|
||||
# NOPAQUE_PROXY_FIX_ENABLED=
|
||||
|
||||
# DEFAULT: 0
|
||||
# Number of values to trust for X-Forwarded-For
|
||||
# NOPAQUE_PROXY_FIX_X_FOR=
|
||||
|
||||
# DEFAULT: 0
|
||||
# Number of values to trust for X-Forwarded-Host
|
||||
# NOPAQUE_PROXY_FIX_X_HOST=
|
||||
|
||||
# DEFAULT: 0
|
||||
# Number of values to trust for X-Forwarded-Port
|
||||
# NOPAQUE_PROXY_FIX_X_PORT=
|
||||
|
||||
# DEFAULT: 0
|
||||
# Number of values to trust for X-Forwarded-Prefix
|
||||
# NOPAQUE_PROXY_FIX_X_PREFIX=
|
||||
|
||||
# DEFAULT: 0
|
||||
# Number of values to trust for X-Forwarded-Proto
|
||||
# NOPAQUE_PROXY_FIX_X_PROTO=
|
||||
|
||||
# CHOOSE ONE: False, True
|
||||
# DEFAULT: False
|
||||
# NOPAQUE_TRANSKRIBUS_ENABLED=
|
||||
|
||||
# READ-COOP account data: https://readcoop.eu/
|
||||
# NOPAQUE_READCOOP_USERNAME=
|
||||
# NOPAQUE_READCOOP_PASSWORD=
|
||||
# DEFAULT: ./volumes/nopaque/logs
|
||||
# NOTE: Use `.` as <project-basedir>
|
||||
# DOCKER_NOPAQUE_SERVICE_LOGS_VOLUME_SOURCE_PATH=.
|
||||
|
4
.gitignore
vendored
@@ -1,11 +1,13 @@
|
||||
# nopaque specifics
|
||||
app/static/gen/
|
||||
data/
|
||||
volumes/
|
||||
docker-compose.override.yml
|
||||
logs/
|
||||
!logs/dummy
|
||||
*.env
|
||||
|
||||
*.pjentsch-testing
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
84
.gitlab-ci.yml
Normal file
@@ -0,0 +1,84 @@
|
||||
include:
|
||||
- template: Security/Container-Scanning.gitlab-ci.yml
|
||||
|
||||
##############################################################################
|
||||
# Pipeline stages in order of execution #
|
||||
##############################################################################
|
||||
stages:
|
||||
- build
|
||||
- publish
|
||||
- sca
|
||||
|
||||
##############################################################################
|
||||
# Pipeline behavior #
|
||||
##############################################################################
|
||||
workflow:
|
||||
rules:
|
||||
# Run the pipeline on commits to the default branch
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
variables:
|
||||
# Set the Docker image tag to `latest`
|
||||
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:latest
|
||||
when: always
|
||||
# Run the pipeline on tag creation
|
||||
- if: $CI_COMMIT_TAG
|
||||
variables:
|
||||
# Set the Docker image tag to the Git tag name
|
||||
DOCKER_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
|
||||
when: always
|
||||
# Don't run the pipeline on all other occasions
|
||||
- when: never
|
||||
|
||||
##############################################################################
|
||||
# Default values for pipeline jobs #
|
||||
##############################################################################
|
||||
default:
|
||||
image: docker:24.0.6
|
||||
services:
|
||||
- docker:24.0.6-dind
|
||||
tags:
|
||||
- docker
|
||||
|
||||
##############################################################################
|
||||
# CI/CD variables for all jobs in the pipeline #
|
||||
##############################################################################
|
||||
variables:
|
||||
DOCKER_TLS_CERTDIR: /certs
|
||||
DOCKER_BUILD_PATH: .
|
||||
DOCKERFILE: Dockerfile
|
||||
|
||||
##############################################################################
|
||||
# Pipeline jobs #
|
||||
##############################################################################
|
||||
build:
|
||||
stage: build
|
||||
script:
|
||||
- docker build --tag $DOCKER_IMAGE --file $DOCKERFILE $DOCKER_BUILD_PATH
|
||||
- docker save $DOCKER_IMAGE > docker_image.tar
|
||||
artifacts:
|
||||
paths:
|
||||
- docker_image.tar
|
||||
|
||||
publish:
|
||||
stage: publish
|
||||
before_script:
|
||||
- docker login --username gitlab-ci-token --password $CI_JOB_TOKEN $CI_REGISTRY
|
||||
script:
|
||||
- docker load --input docker_image.tar
|
||||
- docker push $DOCKER_IMAGE
|
||||
after_script:
|
||||
- docker logout $CI_REGISTRY
|
||||
|
||||
container_scanning:
|
||||
stage: sca
|
||||
rules:
|
||||
# Run the job on commits to the default branch
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
when: always
|
||||
# Run the job on tag creation
|
||||
- if: $CI_COMMIT_TAG
|
||||
when: always
|
||||
# Don't run the job on all other occasions
|
||||
- when: never
|
||||
variables:
|
||||
CS_IMAGE: $DOCKER_IMAGE
|
5
.vscode/extensions.json
vendored
@@ -1,7 +1,8 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"samuelcolvin.jinjahtml",
|
||||
"irongeek.vscode-env",
|
||||
"ms-azuretools.vscode-docker",
|
||||
"ms-python.python"
|
||||
"ms-python.python",
|
||||
"samuelcolvin.jinjahtml"
|
||||
]
|
||||
}
|
||||
|
20
.vscode/settings.json
vendored
@@ -1 +1,19 @@
|
||||
{}
|
||||
{
|
||||
"editor.rulers": [79],
|
||||
"files.insertFinalNewline": true,
|
||||
"[css]": {
|
||||
"editor.tabSize": 2
|
||||
},
|
||||
"[html]": {
|
||||
"editor.tabSize": 2
|
||||
},
|
||||
"[javascript]": {
|
||||
"editor.tabSize": 2
|
||||
},
|
||||
"[jinja-html]": {
|
||||
"editor.tabSize": 2
|
||||
},
|
||||
"[scss]": {
|
||||
"editor.tabSize": 2
|
||||
}
|
||||
}
|
||||
|
41
Dockerfile
@@ -1,14 +1,9 @@
|
||||
FROM python:3.8.10-slim-buster
|
||||
FROM python:3.10.13-slim-bookworm
|
||||
|
||||
|
||||
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
|
||||
|
||||
|
||||
ARG DOCKER_GID
|
||||
ARG UID
|
||||
ARG GID
|
||||
|
||||
|
||||
ENV LANG="C.UTF-8"
|
||||
ENV PYTHONDONTWRITEBYTECODE="1"
|
||||
ENV PYTHONUNBUFFERED="1"
|
||||
@@ -17,34 +12,42 @@ ENV PYTHONUNBUFFERED="1"
|
||||
RUN apt-get update \
|
||||
&& apt-get install --no-install-recommends --yes \
|
||||
build-essential \
|
||||
gosu \
|
||||
libpq-dev \
|
||||
&& rm --recursive /var/lib/apt/lists/*
|
||||
|
||||
|
||||
RUN groupadd --gid "${DOCKER_GID}" docker \
|
||||
&& groupadd --gid "${GID}" nopaque \
|
||||
&& useradd --create-home --gid nopaque --groups "${DOCKER_GID}" --no-log-init --uid "${UID}" nopaque
|
||||
RUN useradd --create-home --no-log-init nopaque \
|
||||
&& groupadd docker \
|
||||
&& usermod --append --groups docker nopaque
|
||||
|
||||
|
||||
USER nopaque
|
||||
WORKDIR /home/nopaque
|
||||
|
||||
|
||||
ENV PYTHON3_VENV_PATH="/home/nopaque/venv"
|
||||
RUN python3 -m venv "${PYTHON3_VENV_PATH}"
|
||||
ENV PATH="${PYTHON3_VENV_PATH}/bin:${PATH}"
|
||||
|
||||
|
||||
COPY --chown=nopaque:nopaque requirements.txt .
|
||||
RUN python3 -m pip install --requirement requirements.txt \
|
||||
&& rm requirements.txt
|
||||
ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv"
|
||||
RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}"
|
||||
ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}"
|
||||
|
||||
|
||||
COPY --chown=nopaque:nopaque app app
|
||||
COPY --chown=nopaque:nopaque migrations migrations
|
||||
COPY --chown=nopaque:nopaque tests tests
|
||||
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py ./
|
||||
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./
|
||||
|
||||
|
||||
RUN python3 -m pip install --requirement requirements.txt \
|
||||
&& mkdir logs
|
||||
|
||||
|
||||
USER root
|
||||
|
||||
|
||||
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
|
||||
|
||||
|
||||
EXPOSE 5000
|
||||
|
||||
|
||||
ENTRYPOINT ["./boot.sh"]
|
||||
ENTRYPOINT ["docker-nopaque-entrypoint.sh"]
|
||||
|
@@ -1,5 +1,8 @@
|
||||
# nopaque
|
||||
|
||||

|
||||

|
||||
|
||||
nopaque bundles various tools and services that provide humanities scholars with DH methods and thus can support their various individual research processes. Using nopaque, researchers can subject digitized sources to Optical Character Recognition (OCR). The resulting text files can then be used as a data basis for Natural Language Processing (NLP). The texts are automatically subjected to various linguistic annotations. The data processed via NLP can then be summarized in the web application as corpora and analyzed by means of an information retrieval system through complex search queries. The range of functions of the web application will be successively extended according to the needs of the researchers.
|
||||
|
||||
## Prerequisites and requirements
|
||||
|
@@ -8,7 +8,7 @@
|
||||
pipeline_name: 'ca_core_news_md'
|
||||
version: '3.2.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.0'
|
||||
- title: 'German'
|
||||
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.2.0/de_core_news_md-3.2.0.tar.gz'
|
||||
@@ -19,7 +19,7 @@
|
||||
pipeline_name: 'de_core_news_md'
|
||||
version: '3.2.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.0'
|
||||
- title: 'Greek'
|
||||
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.2.0/el_core_news_md-3.2.0.tar.gz'
|
||||
@@ -120,7 +120,6 @@
|
||||
version: '3.4.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'German'
|
||||
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz'
|
||||
@@ -132,7 +131,6 @@
|
||||
version: '3.4.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'Greek'
|
||||
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz'
|
||||
@@ -144,7 +142,6 @@
|
||||
version: '3.4.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'English'
|
||||
description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz'
|
||||
@@ -156,7 +153,6 @@
|
||||
version: '3.4.1'
|
||||
compatible_service_versions:
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'Spanish'
|
||||
description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz'
|
||||
@@ -168,7 +164,6 @@
|
||||
version: '3.4.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'French'
|
||||
description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz'
|
||||
@@ -180,7 +175,6 @@
|
||||
version: '3.4.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'Italian'
|
||||
description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz'
|
||||
@@ -192,7 +186,6 @@
|
||||
version: '3.4.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'Polish'
|
||||
description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz'
|
||||
@@ -204,7 +197,6 @@
|
||||
version: '3.4.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'Russian'
|
||||
description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz'
|
||||
@@ -216,7 +208,6 @@
|
||||
version: '3.4.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'Chinese'
|
||||
description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
|
||||
url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz'
|
||||
@@ -228,4 +219,3 @@
|
||||
version: '3.4.0'
|
||||
compatible_service_versions:
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
|
@@ -9,6 +9,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Amharic'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
|
||||
@@ -20,6 +21,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Arabic'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
|
||||
@@ -31,6 +33,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Assamese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
|
||||
@@ -42,6 +45,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Azerbaijani'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
|
||||
@@ -53,6 +57,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Azerbaijani - Cyrillic'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
|
||||
@@ -64,6 +69,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Belarusian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
|
||||
@@ -75,6 +81,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Bengali'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
|
||||
@@ -86,6 +93,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Tibetan'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
|
||||
@@ -97,6 +105,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Bosnian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
|
||||
@@ -108,6 +117,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Bulgarian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
|
||||
@@ -119,6 +129,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Catalan; Valencian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
|
||||
@@ -130,6 +141,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Cebuano'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
|
||||
@@ -141,6 +153,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Czech'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
|
||||
@@ -152,6 +165,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Chinese - Simplified'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
|
||||
@@ -163,6 +177,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Chinese - Traditional'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
|
||||
@@ -174,6 +189,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Cherokee'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
|
||||
@@ -185,6 +201,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Welsh'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
|
||||
@@ -196,6 +213,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Danish'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
|
||||
@@ -207,6 +225,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'German'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
|
||||
@@ -218,6 +237,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Dzongkha'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
|
||||
@@ -229,6 +249,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Greek, Modern (1453-)'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
|
||||
@@ -240,6 +261,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'English'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
|
||||
@@ -251,6 +273,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'English, Middle (1100-1500)'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
|
||||
@@ -262,6 +285,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Esperanto'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
|
||||
@@ -273,6 +297,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Estonian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
|
||||
@@ -284,6 +309,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Basque'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
|
||||
@@ -295,6 +321,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Persian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
|
||||
@@ -306,6 +333,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Finnish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
|
||||
@@ -317,6 +345,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'French'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
|
||||
@@ -328,6 +357,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'German Fraktur'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
|
||||
@@ -339,6 +369,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'French, Middle (ca. 1400-1600)'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
|
||||
@@ -350,6 +381,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Irish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
|
||||
@@ -361,6 +393,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Galician'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
|
||||
@@ -372,6 +405,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Greek, Ancient (-1453)'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
|
||||
@@ -383,6 +417,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Gujarati'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
|
||||
@@ -394,6 +429,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Haitian; Haitian Creole'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
|
||||
@@ -405,6 +441,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Hebrew'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
|
||||
@@ -416,6 +453,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Hindi'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
|
||||
@@ -427,6 +465,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Croatian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
|
||||
@@ -438,6 +477,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Hungarian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
|
||||
@@ -449,6 +489,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Inuktitut'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
|
||||
@@ -460,6 +501,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Indonesian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
|
||||
@@ -471,6 +513,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Icelandic'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
|
||||
@@ -482,6 +525,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Italian'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
|
||||
@@ -493,6 +537,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'Italian - Old'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
|
||||
@@ -504,6 +549,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Javanese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
|
||||
@@ -515,6 +561,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Japanese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
|
||||
@@ -526,6 +573,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Kannada'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
|
||||
@@ -537,6 +585,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Georgian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
|
||||
@@ -548,6 +597,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Georgian - Old'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
|
||||
@@ -559,6 +609,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Kazakh'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
|
||||
@@ -570,6 +621,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Central Khmer'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
|
||||
@@ -581,6 +633,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Kirghiz; Kyrgyz'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
|
||||
@@ -592,6 +645,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Korean'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
|
||||
@@ -603,6 +657,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Kurdish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
|
||||
@@ -614,6 +669,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Lao'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
|
||||
@@ -625,6 +681,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Latin'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
|
||||
@@ -636,6 +693,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Latvian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
|
||||
@@ -647,6 +705,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Lithuanian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
|
||||
@@ -658,6 +717,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Malayalam'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
|
||||
@@ -669,6 +729,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Marathi'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
|
||||
@@ -680,6 +741,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Macedonian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
|
||||
@@ -691,6 +753,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Maltese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
|
||||
@@ -702,6 +765,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Malay'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
|
||||
@@ -713,6 +777,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Burmese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
|
||||
@@ -724,6 +789,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Nepali'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
|
||||
@@ -735,6 +801,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Dutch; Flemish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
|
||||
@@ -746,6 +813,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Norwegian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
|
||||
@@ -757,6 +825,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Oriya'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
|
||||
@@ -768,6 +837,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Panjabi; Punjabi'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
|
||||
@@ -779,6 +849,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Polish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
|
||||
@@ -790,6 +861,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Portuguese'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
|
||||
@@ -801,6 +873,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Pushto; Pashto'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
|
||||
@@ -812,6 +885,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Romanian; Moldavian; Moldovan'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
|
||||
@@ -823,6 +897,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Russian'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
|
||||
@@ -834,6 +909,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Sanskrit'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
|
||||
@@ -845,6 +921,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Sinhala; Sinhalese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
|
||||
@@ -856,6 +933,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Slovak'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
|
||||
@@ -867,6 +945,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Slovenian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
|
||||
@@ -878,6 +957,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
- title: 'Spanish; Castilian'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
|
||||
@@ -889,6 +969,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
- title: 'Spanish; Castilian - Old'
|
||||
description: ''
|
||||
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
|
||||
@@ -900,6 +981,7 @@
|
||||
compatible_service_versions:
|
||||
- '0.1.0'
|
||||
- '0.1.1'
|
||||
- '0.1.2'
|
||||
# - title: 'Albanian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
|
||||
@@ -911,6 +993,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Serbian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
|
||||
@@ -922,6 +1005,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Serbian - Latin'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
|
||||
@@ -933,6 +1017,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Swahili'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
|
||||
@@ -944,6 +1029,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Swedish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
|
||||
@@ -955,6 +1041,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Syriac'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
|
||||
@@ -966,6 +1053,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Tamil'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
|
||||
@@ -977,6 +1065,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Telugu'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
|
||||
@@ -988,6 +1077,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Tajik'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
|
||||
@@ -999,6 +1089,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Tagalog'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
|
||||
@@ -1010,6 +1101,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Thai'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
|
||||
@@ -1021,6 +1113,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Tigrinya'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
|
||||
@@ -1032,6 +1125,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Turkish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
|
||||
@@ -1043,6 +1137,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Uighur; Uyghur'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
|
||||
@@ -1054,6 +1149,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Ukrainian'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
|
||||
@@ -1065,6 +1161,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Urdu'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
|
||||
@@ -1076,6 +1173,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Uzbek'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
|
||||
@@ -1087,6 +1185,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Uzbek - Cyrillic'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
|
||||
@@ -1098,6 +1197,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Vietnamese'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
|
||||
@@ -1109,6 +1209,7 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
# - title: 'Yiddish'
|
||||
# description: ''
|
||||
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
|
||||
@@ -1120,3 +1221,4 @@
|
||||
# compatible_service_versions:
|
||||
# - '0.1.0'
|
||||
# - '0.1.1'
|
||||
# - '0.1.2'
|
||||
|
@@ -4,6 +4,7 @@ from docker import DockerClient
|
||||
from flask import Flask
|
||||
from flask_apscheduler import APScheduler
|
||||
from flask_assets import Environment
|
||||
from flask_breadcrumbs import Breadcrumbs, default_breadcrumb_root
|
||||
from flask_login import LoginManager
|
||||
from flask_mail import Mail
|
||||
from flask_marshmallow import Marshmallow
|
||||
@@ -16,6 +17,7 @@ from flask_hashids import Hashids
|
||||
|
||||
apifairy = APIFairy()
|
||||
assets = Environment()
|
||||
breadcrumbs = Breadcrumbs()
|
||||
db = SQLAlchemy()
|
||||
docker_client = DockerClient()
|
||||
hashids = Hashids()
|
||||
@@ -33,7 +35,7 @@ socketio = SocketIO()
|
||||
|
||||
def create_app(config: Config = Config) -> Flask:
|
||||
''' Creates an initialized Flask (WSGI Application) object. '''
|
||||
app: Flask = Flask(__name__)
|
||||
app = Flask(__name__)
|
||||
app.config.from_object(config)
|
||||
config.init_app(app)
|
||||
docker_client.login(
|
||||
@@ -44,6 +46,7 @@ def create_app(config: Config = Config) -> Flask:
|
||||
|
||||
apifairy.init_app(app)
|
||||
assets.init_app(app)
|
||||
breadcrumbs.init_app(app)
|
||||
db.init_app(app)
|
||||
hashids.init_app(app)
|
||||
login.init_app(app)
|
||||
@@ -55,36 +58,50 @@ def create_app(config: Config = Config) -> Flask:
|
||||
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
|
||||
|
||||
from .admin import bp as admin_blueprint
|
||||
default_breadcrumb_root(admin_blueprint, '.admin')
|
||||
app.register_blueprint(admin_blueprint, url_prefix='/admin')
|
||||
|
||||
from .api import bp as api_blueprint
|
||||
app.register_blueprint(api_blueprint, url_prefix='/api')
|
||||
|
||||
from .auth import bp as auth_blueprint
|
||||
app.register_blueprint(auth_blueprint, url_prefix='/auth')
|
||||
default_breadcrumb_root(auth_blueprint, '.')
|
||||
app.register_blueprint(auth_blueprint)
|
||||
|
||||
from .contributions import bp as contributions_blueprint
|
||||
default_breadcrumb_root(contributions_blueprint, '.contributions')
|
||||
app.register_blueprint(contributions_blueprint, url_prefix='/contributions')
|
||||
|
||||
from .corpora import bp as corpora_blueprint
|
||||
app.register_blueprint(corpora_blueprint, url_prefix='/corpora')
|
||||
from .corpora.cqi_over_sio import CQiNamespace
|
||||
default_breadcrumb_root(corpora_blueprint, '.corpora')
|
||||
app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
|
||||
socketio.on_namespace(CQiNamespace('/cqi_over_sio'))
|
||||
|
||||
from .errors import bp as errors_blueprint
|
||||
app.register_blueprint(errors_blueprint)
|
||||
from .errors import bp as errors_bp
|
||||
app.register_blueprint(errors_bp)
|
||||
|
||||
from .jobs import bp as jobs_blueprint
|
||||
default_breadcrumb_root(jobs_blueprint, '.jobs')
|
||||
app.register_blueprint(jobs_blueprint, url_prefix='/jobs')
|
||||
|
||||
from .main import bp as main_blueprint
|
||||
app.register_blueprint(main_blueprint, url_prefix='/')
|
||||
default_breadcrumb_root(main_blueprint, '.')
|
||||
app.register_blueprint(main_blueprint, cli_group=None)
|
||||
|
||||
from .services import bp as services_blueprint
|
||||
default_breadcrumb_root(services_blueprint, '.services')
|
||||
app.register_blueprint(services_blueprint, url_prefix='/services')
|
||||
|
||||
from .settings import bp as settings_blueprint
|
||||
default_breadcrumb_root(settings_blueprint, '.settings')
|
||||
app.register_blueprint(settings_blueprint, url_prefix='/settings')
|
||||
|
||||
from .users import bp as users_blueprint
|
||||
default_breadcrumb_root(users_blueprint, '.users')
|
||||
app.register_blueprint(users_blueprint, url_prefix='/users')
|
||||
|
||||
from .workshops import bp as workshops_blueprint
|
||||
app.register_blueprint(workshops_blueprint, url_prefix='/workshops')
|
||||
|
||||
return app
|
||||
|
@@ -1,5 +1,20 @@
|
||||
from flask import Blueprint
|
||||
from flask_login import login_required
|
||||
from app.decorators import admin_required
|
||||
|
||||
|
||||
bp = Blueprint('admin', __name__)
|
||||
from . import routes
|
||||
|
||||
|
||||
@bp.before_request
|
||||
@login_required
|
||||
@admin_required
|
||||
def before_request():
|
||||
'''
|
||||
Ensures that the routes in this package can be visited only by users with
|
||||
administrator privileges (login_required and admin_required).
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
from . import json_routes, routes
|
||||
|
@@ -1,13 +1,16 @@
|
||||
from app.models import Role
|
||||
from flask_wtf import FlaskForm
|
||||
from wtforms import BooleanField, SelectField, SubmitField
|
||||
from wtforms import SelectField, SubmitField
|
||||
from app.models import Role
|
||||
|
||||
|
||||
class AdminEditUserForm(FlaskForm):
|
||||
confirmed = BooleanField('Confirmed')
|
||||
class UpdateUserForm(FlaskForm):
|
||||
role = SelectField('Role')
|
||||
submit = SubmitField('Submit')
|
||||
submit = SubmitField()
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
def __init__(self, user, *args, **kwargs):
|
||||
if 'data' not in kwargs:
|
||||
kwargs['data'] = {'role': user.role.hashid}
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'update-user-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
self.role.choices = [(x.hashid, x.name) for x in Role.query.all()]
|
||||
|
23
app/admin/json_routes.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from flask import abort, request
|
||||
from app import db
|
||||
from app.decorators import content_negotiation
|
||||
from app.models import User
|
||||
from . import bp
|
||||
|
||||
|
||||
@bp.route('/users/<hashid:user_id>/confirmed', methods=['PUT'])
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def update_user_role(user_id):
|
||||
confirmed = request.json
|
||||
if not isinstance(confirmed, bool):
|
||||
abort(400)
|
||||
user = User.query.get_or_404(user_id)
|
||||
user.confirmed = confirmed
|
||||
db.session.commit()
|
||||
response_data = {
|
||||
'message': (
|
||||
f'User "{user.username}" is now '
|
||||
f'{"confirmed" if confirmed else "unconfirmed"}'
|
||||
)
|
||||
}
|
||||
return response_data, 200
|
@@ -1,111 +1,146 @@
|
||||
from flask import current_app, flash, redirect, render_template, url_for
|
||||
from flask_login import login_required
|
||||
from threading import Thread
|
||||
from flask import abort, flash, redirect, render_template, url_for
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from app import db, hashids
|
||||
from app.decorators import admin_required
|
||||
from app.models import Role, User, UserSettingJobStatusMailNotificationLevel
|
||||
from app.settings.forms import (
|
||||
EditNotificationSettingsForm
|
||||
from app.models import Avatar, Corpus, Role, User
|
||||
from app.users.settings.forms import (
|
||||
UpdateAvatarForm,
|
||||
UpdatePasswordForm,
|
||||
UpdateNotificationsForm,
|
||||
UpdateAccountInformationForm,
|
||||
UpdateProfileInformationForm
|
||||
)
|
||||
from app.users.forms import EditProfileSettingsForm
|
||||
from . import bp
|
||||
from .forms import AdminEditUserForm
|
||||
|
||||
|
||||
@bp.before_request
|
||||
@login_required
|
||||
@admin_required
|
||||
def before_request():
|
||||
'''
|
||||
Ensures that the routes in this package can be visited only by users with
|
||||
administrator privileges (login_required and admin_required).
|
||||
'''
|
||||
pass
|
||||
from .forms import UpdateUserForm
|
||||
from app.users.utils import (
|
||||
user_endpoint_arguments_constructor as user_eac,
|
||||
user_dynamic_list_constructor as user_dlc
|
||||
)
|
||||
|
||||
|
||||
@bp.route('')
|
||||
def index():
|
||||
return redirect(url_for('.users'))
|
||||
@register_breadcrumb(bp, '.', '<i class="material-icons left">admin_panel_settings</i>Administration')
|
||||
def admin():
|
||||
return render_template(
|
||||
'admin/admin.html.j2',
|
||||
title='Administration'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/corpora')
|
||||
@register_breadcrumb(bp, '.corpora', 'Corpora')
|
||||
def corpora():
|
||||
corpora = Corpus.query.all()
|
||||
return render_template(
|
||||
'admin/corpora.html.j2',
|
||||
title='Corpora',
|
||||
corpora=corpora
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/users')
|
||||
@register_breadcrumb(bp, '.users', '<i class="material-icons left">group</i>Users')
|
||||
def users():
|
||||
users = [x.to_json_serializeable(backrefs=True) for x in User.query.all()]
|
||||
users = User.query.all()
|
||||
return render_template(
|
||||
'admin/users.html.j2',
|
||||
users=users,
|
||||
title='Users'
|
||||
title='Users',
|
||||
users=users
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/users/<hashid:user_id>')
|
||||
@register_breadcrumb(bp, '.users.entity', '', dynamic_list_constructor=user_dlc)
|
||||
def user(user_id):
|
||||
user = User.query.get_or_404(user_id)
|
||||
return render_template('admin/user.html.j2', title='User', user=user)
|
||||
corpora = Corpus.query.filter(Corpus.user == user).all()
|
||||
return render_template(
|
||||
'admin/user.html.j2',
|
||||
title=user.username,
|
||||
user=user,
|
||||
corpora=corpora
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/users/<hashid:user_id>/edit', methods=['GET', 'POST'])
|
||||
def edit_user(user_id):
|
||||
@bp.route('/users/<hashid:user_id>/settings', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.users.entity.settings', '<i class="material-icons left">settings</i>Settings')
|
||||
def user_settings(user_id):
|
||||
user = User.query.get_or_404(user_id)
|
||||
admin_edit_user_form = AdminEditUserForm(
|
||||
data={'confirmed': user.confirmed, 'role': user.role.hashid},
|
||||
prefix='admin-edit-user-form'
|
||||
)
|
||||
edit_profile_settings_form = EditProfileSettingsForm(
|
||||
user,
|
||||
data=user.to_json_serializeable(),
|
||||
prefix='edit-profile-settings-form'
|
||||
)
|
||||
edit_notification_settings_form = EditNotificationSettingsForm(
|
||||
data=user.to_json_serializeable(),
|
||||
prefix='edit-notification-settings-form'
|
||||
)
|
||||
if (admin_edit_user_form.submit.data
|
||||
and admin_edit_user_form.validate()):
|
||||
user.confirmed = admin_edit_user_form.confirmed.data
|
||||
role_id = hashids.decode(admin_edit_user_form.role.data)
|
||||
update_account_information_form = UpdateAccountInformationForm(user)
|
||||
update_profile_information_form = UpdateProfileInformationForm(user)
|
||||
update_avatar_form = UpdateAvatarForm()
|
||||
update_password_form = UpdatePasswordForm(user)
|
||||
update_notifications_form = UpdateNotificationsForm(user)
|
||||
update_user_form = UpdateUserForm(user)
|
||||
|
||||
# region handle update profile information form
|
||||
if update_profile_information_form.submit.data and update_profile_information_form.validate():
|
||||
user.about_me = update_profile_information_form.about_me.data
|
||||
user.location = update_profile_information_form.location.data
|
||||
user.organization = update_profile_information_form.organization.data
|
||||
user.website = update_profile_information_form.website.data
|
||||
user.full_name = update_profile_information_form.full_name.data
|
||||
db.session.commit()
|
||||
flash('Your changes have been saved')
|
||||
return redirect(url_for('.user_settings', user_id=user.id))
|
||||
# endregion handle update profile information form
|
||||
|
||||
# region handle update avatar form
|
||||
if update_avatar_form.submit.data and update_avatar_form.validate():
|
||||
try:
|
||||
Avatar.create(
|
||||
update_avatar_form.avatar.data,
|
||||
user=user
|
||||
)
|
||||
except (AttributeError, OSError):
|
||||
abort(500)
|
||||
db.session.commit()
|
||||
flash('Your changes have been saved')
|
||||
return redirect(url_for('.user_settings', user_id=user.id))
|
||||
# endregion handle update avatar form
|
||||
|
||||
# region handle update account information form
|
||||
if update_account_information_form.submit.data and update_account_information_form.validate():
|
||||
user.email = update_account_information_form.email.data
|
||||
user.username = update_account_information_form.username.data
|
||||
db.session.commit()
|
||||
flash('Profile settings updated')
|
||||
return redirect(url_for('.user_settings', user_id=user.id))
|
||||
# endregion handle update account information form
|
||||
|
||||
# region handle update password form
|
||||
if update_password_form.submit.data and update_password_form.validate():
|
||||
user.password = update_password_form.new_password.data
|
||||
db.session.commit()
|
||||
flash('Your changes have been saved')
|
||||
return redirect(url_for('.user_settings', user_id=user.id))
|
||||
# endregion handle update password form
|
||||
|
||||
# region handle update notifications form
|
||||
if update_notifications_form.submit.data and update_notifications_form.validate():
|
||||
user.setting_job_status_mail_notification_level = \
|
||||
update_notifications_form.job_status_mail_notification_level.data
|
||||
db.session.commit()
|
||||
flash('Your changes have been saved')
|
||||
return redirect(url_for('.user_settings', user_id=user.id))
|
||||
# endregion handle update notifications form
|
||||
|
||||
# region handle update user form
|
||||
if update_user_form.submit.data and update_user_form.validate():
|
||||
role_id = hashids.decode(update_user_form.role.data)
|
||||
user.role = Role.query.get(role_id)
|
||||
db.session.commit()
|
||||
flash('Your changes have been saved')
|
||||
return redirect(url_for('.edit_user', user_id=user.id))
|
||||
if (edit_profile_settings_form.submit.data
|
||||
and edit_profile_settings_form.validate()):
|
||||
user.email = edit_profile_settings_form.email.data
|
||||
user.username = edit_profile_settings_form.username.data
|
||||
db.session.commit()
|
||||
flash('Your changes have been saved')
|
||||
return redirect(url_for('.edit_user', user_id=user.id))
|
||||
if (edit_notification_settings_form.submit.data
|
||||
and edit_notification_settings_form.validate()):
|
||||
user.setting_job_status_mail_notification_level = \
|
||||
UserSettingJobStatusMailNotificationLevel[
|
||||
edit_notification_settings_form.job_status_mail_notification_level.data # noqa
|
||||
]
|
||||
db.session.commit()
|
||||
flash('Your changes have been saved')
|
||||
return redirect(url_for('.edit_user', user_id=user.id))
|
||||
return redirect(url_for('.user_settings', user_id=user.id))
|
||||
# endregion handle update user form
|
||||
|
||||
return render_template(
|
||||
'admin/edit_user.html.j2',
|
||||
admin_edit_user_form=admin_edit_user_form,
|
||||
edit_profile_settings_form=edit_profile_settings_form,
|
||||
edit_notification_settings_form=edit_notification_settings_form,
|
||||
title='Edit user',
|
||||
'admin/user_settings.html.j2',
|
||||
title='Settings',
|
||||
update_account_information_form=update_account_information_form,
|
||||
update_avatar_form=update_avatar_form,
|
||||
update_notifications_form=update_notifications_form,
|
||||
update_password_form=update_password_form,
|
||||
update_profile_information_form=update_profile_information_form,
|
||||
update_user_form=update_user_form,
|
||||
user=user
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/users/<hashid:user_id>/delete', methods=['DELETE'])
|
||||
def delete_user(user_id):
|
||||
def _delete_user(app, user_id):
|
||||
with app.app_context():
|
||||
user = User.query.get(user_id)
|
||||
user.delete()
|
||||
db.session.commit()
|
||||
|
||||
User.query.get_or_404(user_id)
|
||||
thread = Thread(
|
||||
target=_delete_user,
|
||||
args=(current_app._get_current_object(), user_id)
|
||||
)
|
||||
thread.start()
|
||||
return {}, 202
|
||||
|
@@ -2,7 +2,6 @@ from apifairy.fields import FileField
|
||||
from marshmallow import validate, validates, ValidationError
|
||||
from marshmallow.decorators import post_dump
|
||||
from app import ma
|
||||
from app.auth import USERNAME_REGEX
|
||||
from app.models import (
|
||||
Job,
|
||||
JobStatus,
|
||||
@@ -142,7 +141,10 @@ class UserSchema(ma.SQLAlchemySchema):
|
||||
username = ma.auto_field(
|
||||
validate=[
|
||||
validate.Length(min=1, max=64),
|
||||
validate.Regexp(USERNAME_REGEX, error='Usernames must have only letters, numbers, dots or underscores')
|
||||
validate.Regexp(
|
||||
User.username_pattern,
|
||||
error='Usernames must have only letters, numbers, dots or underscores'
|
||||
)
|
||||
]
|
||||
)
|
||||
email = ma.auto_field(validate=validate.Email())
|
||||
|
@@ -1,8 +1,5 @@
|
||||
from flask import Blueprint
|
||||
|
||||
|
||||
USERNAME_REGEX = '^[A-Za-zÄÖÜäöüß0-9_.]*$'
|
||||
|
||||
|
||||
bp = Blueprint('auth', __name__)
|
||||
from . import routes
|
||||
|
@@ -8,7 +8,6 @@ from wtforms import (
|
||||
)
|
||||
from wtforms.validators import InputRequired, Email, EqualTo, Length, Regexp
|
||||
from app.models import User
|
||||
from . import USERNAME_REGEX
|
||||
|
||||
|
||||
class RegistrationForm(FlaskForm):
|
||||
@@ -22,7 +21,7 @@ class RegistrationForm(FlaskForm):
|
||||
InputRequired(),
|
||||
Length(max=64),
|
||||
Regexp(
|
||||
USERNAME_REGEX,
|
||||
User.username_pattern,
|
||||
message=(
|
||||
'Usernames must have only letters, numbers, dots or '
|
||||
'underscores'
|
||||
@@ -44,8 +43,17 @@ class RegistrationForm(FlaskForm):
|
||||
EqualTo('password', message='Passwords must match')
|
||||
]
|
||||
)
|
||||
terms_of_use_accepted = BooleanField(
|
||||
'I have read and accept the terms of use',
|
||||
validators=[InputRequired()]
|
||||
)
|
||||
submit = SubmitField()
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'registration-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def validate_email(self, field):
|
||||
if User.query.filter_by(email=field.data.lower()).first():
|
||||
raise ValidationError('Email already registered')
|
||||
@@ -61,11 +69,21 @@ class LoginForm(FlaskForm):
|
||||
remember_me = BooleanField('Keep me logged in')
|
||||
submit = SubmitField()
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'login-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class ResetPasswordRequestForm(FlaskForm):
|
||||
email = StringField('Email', validators=[InputRequired(), Email()])
|
||||
submit = SubmitField()
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'reset-password-request-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class ResetPasswordForm(FlaskForm):
|
||||
password = PasswordField(
|
||||
@@ -83,3 +101,8 @@ class ResetPasswordForm(FlaskForm):
|
||||
]
|
||||
)
|
||||
submit = SubmitField()
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'reset-password-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
|
@@ -1,11 +1,5 @@
|
||||
from flask import (
|
||||
abort,
|
||||
flash,
|
||||
redirect,
|
||||
render_template,
|
||||
request,
|
||||
url_for
|
||||
)
|
||||
from flask import abort, flash, redirect, render_template, request, url_for
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from flask_login import current_user, login_user, login_required, logout_user
|
||||
from app import db
|
||||
from app.email import create_message, send
|
||||
@@ -36,16 +30,18 @@ def before_request():
|
||||
|
||||
|
||||
@bp.route('/register', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.register', 'Register')
|
||||
def register():
|
||||
if current_user.is_authenticated:
|
||||
return redirect(url_for('main.dashboard'))
|
||||
form = RegistrationForm(prefix='registration-form')
|
||||
form = RegistrationForm()
|
||||
if form.validate_on_submit():
|
||||
try:
|
||||
user = User.create(
|
||||
email=form.email.data.lower(),
|
||||
password=form.password.data,
|
||||
username=form.username.data
|
||||
username=form.username.data,
|
||||
terms_of_use_accepted=form.terms_of_use_accepted.data
|
||||
)
|
||||
except OSError:
|
||||
flash('Internal Server Error', category='error')
|
||||
@@ -65,16 +61,17 @@ def register():
|
||||
return redirect(url_for('.login'))
|
||||
return render_template(
|
||||
'auth/register.html.j2',
|
||||
form=form,
|
||||
title='Register'
|
||||
title='Register',
|
||||
form=form
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/login', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.login', 'Login')
|
||||
def login():
|
||||
if current_user.is_authenticated:
|
||||
return redirect(url_for('main.dashboard'))
|
||||
form = LoginForm(prefix='login-form')
|
||||
form = LoginForm()
|
||||
if form.validate_on_submit():
|
||||
user = User.query.filter((User.email == form.user.data.lower()) | (User.username == form.user.data)).first()
|
||||
if user and user.verify_password(form.password.data):
|
||||
@@ -85,7 +82,11 @@ def login():
|
||||
flash('You have been logged in')
|
||||
return redirect(next)
|
||||
flash('Invalid email/username or password', category='error')
|
||||
return render_template('auth/login.html.j2', form=form, title='Log in')
|
||||
return render_template(
|
||||
'auth/login.html.j2',
|
||||
title='Log in',
|
||||
form=form
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/logout')
|
||||
@@ -97,14 +98,18 @@ def logout():
|
||||
|
||||
|
||||
@bp.route('/unconfirmed')
|
||||
@register_breadcrumb(bp, '.unconfirmed', 'Unconfirmed')
|
||||
@login_required
|
||||
def unconfirmed():
|
||||
if current_user.confirmed:
|
||||
return redirect(url_for('main.dashboard'))
|
||||
return render_template('auth/unconfirmed.html.j2', title='Unconfirmed')
|
||||
return render_template(
|
||||
'auth/unconfirmed.html.j2',
|
||||
title='Unconfirmed'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/confirm')
|
||||
@bp.route('/confirm-request')
|
||||
@login_required
|
||||
def confirm_request():
|
||||
if current_user.confirmed:
|
||||
@@ -135,11 +140,12 @@ def confirm(token):
|
||||
return redirect(url_for('.unconfirmed'))
|
||||
|
||||
|
||||
@bp.route('/reset_password', methods=['GET', 'POST'])
|
||||
@bp.route('/reset-password-request', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.reset_password_request', 'Password Reset')
|
||||
def reset_password_request():
|
||||
if current_user.is_authenticated:
|
||||
return redirect(url_for('main.dashboard'))
|
||||
form = ResetPasswordRequestForm(prefix='reset-password-request-form')
|
||||
form = ResetPasswordRequestForm()
|
||||
if form.validate_on_submit():
|
||||
user = User.query.filter_by(email=form.email.data.lower()).first()
|
||||
if user is not None:
|
||||
@@ -159,16 +165,17 @@ def reset_password_request():
|
||||
return redirect(url_for('.login'))
|
||||
return render_template(
|
||||
'auth/reset_password_request.html.j2',
|
||||
form=form,
|
||||
title='Password Reset'
|
||||
title='Password Reset',
|
||||
form=form
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/reset_password/<token>', methods=['GET', 'POST'])
|
||||
@bp.route('/reset-password/<token>', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.reset_password', 'Password Reset')
|
||||
def reset_password(token):
|
||||
if current_user.is_authenticated:
|
||||
return redirect(url_for('main.dashboard'))
|
||||
form = ResetPasswordForm(prefix='reset-password-form')
|
||||
form = ResetPasswordForm()
|
||||
if form.validate_on_submit():
|
||||
if User.reset_password(token, form.password.data):
|
||||
db.session.commit()
|
||||
@@ -177,7 +184,7 @@ def reset_password(token):
|
||||
return redirect(url_for('main.index'))
|
||||
return render_template(
|
||||
'auth/reset_password.html.j2',
|
||||
form=form,
|
||||
title='Password Reset',
|
||||
form=form,
|
||||
token=token
|
||||
)
|
||||
|
72
app/cli.py
@@ -1,72 +0,0 @@
|
||||
from flask import current_app
|
||||
from flask_migrate import upgrade
|
||||
import click
|
||||
import os
|
||||
from app.models import (
|
||||
Role,
|
||||
User,
|
||||
TesseractOCRPipelineModel,
|
||||
SpaCyNLPPipelineModel
|
||||
)
|
||||
|
||||
|
||||
def _make_default_dirs():
|
||||
base_dir = current_app.config['NOPAQUE_DATA_DIR']
|
||||
|
||||
default_directories = [
|
||||
os.path.join(base_dir, 'tmp'),
|
||||
os.path.join(base_dir, 'users')
|
||||
]
|
||||
for directory in default_directories:
|
||||
if os.path.exists(directory):
|
||||
if not os.path.isdir(directory):
|
||||
raise NotADirectoryError(f'{directory} is not a directory')
|
||||
else:
|
||||
os.mkdir(directory)
|
||||
|
||||
|
||||
def register(app):
|
||||
@app.cli.command()
|
||||
def deploy():
|
||||
''' Run deployment tasks. '''
|
||||
# Make default directories
|
||||
_make_default_dirs()
|
||||
|
||||
# migrate database to latest revision
|
||||
upgrade()
|
||||
|
||||
# Insert/Update default database values
|
||||
current_app.logger.info('Insert/Update default roles')
|
||||
Role.insert_defaults()
|
||||
current_app.logger.info('Insert/Update default users')
|
||||
User.insert_defaults()
|
||||
current_app.logger.info('Insert/Update default SpaCyNLPPipelineModels')
|
||||
SpaCyNLPPipelineModel.insert_defaults()
|
||||
current_app.logger.info('Insert/Update default TesseractOCRPipelineModels')
|
||||
TesseractOCRPipelineModel.insert_defaults()
|
||||
|
||||
@app.cli.group()
|
||||
def converter():
|
||||
''' Converter commands. '''
|
||||
pass
|
||||
|
||||
@converter.command()
|
||||
@click.argument('json_db')
|
||||
@click.argument('data_dir')
|
||||
def sandpaper(json_db, data_dir):
|
||||
''' Sandpaper converter '''
|
||||
from app.converters.sandpaper import convert
|
||||
convert(json_db, data_dir)
|
||||
|
||||
@app.cli.group()
|
||||
def test():
|
||||
''' Test commands. '''
|
||||
pass
|
||||
|
||||
@test.command('run')
|
||||
def run_test():
|
||||
''' Run unit tests. '''
|
||||
from unittest import TestLoader, TextTestRunner
|
||||
from unittest.suite import TestSuite
|
||||
tests: TestSuite = TestLoader().discover('tests')
|
||||
TextTestRunner(verbosity=2).run(tests)
|
@@ -1,5 +1,23 @@
|
||||
from flask import Blueprint
|
||||
from flask_login import login_required
|
||||
|
||||
|
||||
bp = Blueprint('contributions', __name__)
|
||||
from . import routes
|
||||
|
||||
|
||||
@bp.before_request
|
||||
@login_required
|
||||
def before_request():
|
||||
'''
|
||||
Ensures that the routes in this package can only be visited by users that
|
||||
are logged in.
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
from . import (
|
||||
routes,
|
||||
spacy_nlp_pipeline_models,
|
||||
tesseract_ocr_pipeline_models,
|
||||
transkribus_htr_pipeline_models
|
||||
)
|
||||
|
@@ -1,16 +1,11 @@
|
||||
from flask import current_app
|
||||
from flask_wtf import FlaskForm
|
||||
from flask_wtf.file import FileField, FileRequired
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
StringField,
|
||||
SubmitField,
|
||||
SelectMultipleField,
|
||||
IntegerField,
|
||||
ValidationError
|
||||
IntegerField
|
||||
)
|
||||
from wtforms.validators import InputRequired, Length
|
||||
from app.services import SERVICES
|
||||
|
||||
|
||||
class ContributionBaseForm(FlaskForm):
|
||||
@@ -48,74 +43,5 @@ class ContributionBaseForm(FlaskForm):
|
||||
submit = SubmitField()
|
||||
|
||||
|
||||
class CreateTesseractOCRPipelineModelForm(ContributionBaseForm):
|
||||
tesseract_model_file = FileField(
|
||||
'File',
|
||||
validators=[FileRequired()]
|
||||
)
|
||||
|
||||
def validate_tesseract_model_file(self, field):
|
||||
if not field.data.filename.lower().endswith('.traineddata'):
|
||||
raise ValidationError('traineddata files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
service_manifest = SERVICES['tesseract-ocr-pipeline']
|
||||
super().__init__(*args, **kwargs)
|
||||
self.compatible_service_versions.choices = [('', 'Choose your option')]
|
||||
self.compatible_service_versions.choices += [
|
||||
(x, x) for x in service_manifest['versions'].keys()
|
||||
]
|
||||
self.compatible_service_versions.default = ''
|
||||
|
||||
|
||||
class CreateSpaCyNLPPipelineModelForm(ContributionBaseForm):
|
||||
spacy_model_file = FileField(
|
||||
'File',
|
||||
validators=[FileRequired()]
|
||||
)
|
||||
pipeline_name = StringField(
|
||||
'Pipeline name',
|
||||
validators=[InputRequired(), Length(max=64)]
|
||||
)
|
||||
|
||||
def validate_spacy_model_file(self, field):
|
||||
if not field.data.filename.lower().endswith('.tar.gz'):
|
||||
raise ValidationError('.tar.gz files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||||
self.compatible_service_versions.choices = [('', 'Choose your option')]
|
||||
self.compatible_service_versions.choices += [
|
||||
(x, x) for x in service_manifest['versions'].keys()
|
||||
]
|
||||
self.compatible_service_versions.default = ''
|
||||
|
||||
|
||||
class EditContributionBaseForm(ContributionBaseForm):
|
||||
class UpdateContributionBaseForm(ContributionBaseForm):
|
||||
pass
|
||||
|
||||
class EditTesseractOCRPipelineModelForm(EditContributionBaseForm):
|
||||
def __init__(self, *args, **kwargs):
|
||||
service_manifest = SERVICES['tesseract-ocr-pipeline']
|
||||
super().__init__(*args, **kwargs)
|
||||
self.compatible_service_versions.choices = [('', 'Choose your option')]
|
||||
self.compatible_service_versions.choices += [
|
||||
(x, x) for x in service_manifest['versions'].keys()
|
||||
]
|
||||
self.compatible_service_versions.default = ''
|
||||
|
||||
|
||||
class EditSpaCyNLPPipelineModelForm(EditContributionBaseForm):
|
||||
pipeline_name = StringField(
|
||||
'Pipeline name',
|
||||
validators=[InputRequired(), Length(max=64)]
|
||||
)
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||||
self.compatible_service_versions.choices = [('', 'Choose your option')]
|
||||
self.compatible_service_versions.choices += [
|
||||
(x, x) for x in service_manifest['versions'].keys()
|
||||
]
|
||||
self.compatible_service_versions.default = ''
|
||||
|
@@ -1,233 +1,9 @@
|
||||
from flask import (
|
||||
abort,
|
||||
current_app,
|
||||
flash,
|
||||
Markup,
|
||||
redirect,
|
||||
render_template,
|
||||
url_for
|
||||
)
|
||||
from flask_login import login_required, current_user
|
||||
from threading import Thread
|
||||
from app import db
|
||||
from app.decorators import permission_required
|
||||
from app.models import (
|
||||
Permission,
|
||||
SpaCyNLPPipelineModel,
|
||||
TesseractOCRPipelineModel
|
||||
)
|
||||
from flask import redirect, url_for
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from . import bp
|
||||
from .forms import (
|
||||
CreateSpaCyNLPPipelineModelForm,
|
||||
CreateTesseractOCRPipelineModelForm,
|
||||
EditSpaCyNLPPipelineModelForm,
|
||||
EditTesseractOCRPipelineModelForm
|
||||
)
|
||||
|
||||
|
||||
@bp.before_request
|
||||
@login_required
|
||||
def before_request():
|
||||
pass
|
||||
|
||||
|
||||
@bp.route('/')
|
||||
@bp.route('')
|
||||
@register_breadcrumb(bp, '.', '<i class="material-icons left">new_label</i>My Contributions')
|
||||
def contributions():
|
||||
return render_template(
|
||||
'contributions/contributions.html.j2',
|
||||
title='Contributions'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models')
|
||||
def tesseract_ocr_pipeline_models():
|
||||
return render_template(
|
||||
'contributions/tesseract_ocr_pipeline_models.html.j2',
|
||||
title='Tesseract OCR Pipeline Models'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
|
||||
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
|
||||
tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||
form = EditTesseractOCRPipelineModelForm(
|
||||
data=tesseract_ocr_pipeline_model.to_json_serializeable(),
|
||||
prefix='edit-tesseract-ocr-pipeline-model-form'
|
||||
)
|
||||
if form.validate_on_submit():
|
||||
form.populate_obj(tesseract_ocr_pipeline_model)
|
||||
if db.session.is_modified(tesseract_ocr_pipeline_model):
|
||||
message = Markup(f'Tesseract OCR Pipeline model "<a href="{tesseract_ocr_pipeline_model.url}">{tesseract_ocr_pipeline_model.title}</a>" updated')
|
||||
flash(message)
|
||||
db.session.commit()
|
||||
return redirect(url_for('.tesseract_ocr_pipeline_models'))
|
||||
return render_template(
|
||||
'contributions/tesseract_ocr_pipeline_model.html.j2',
|
||||
form=form,
|
||||
tesseract_ocr_pipeline_model=tesseract_ocr_pipeline_model,
|
||||
title=f'{tesseract_ocr_pipeline_model.title} {tesseract_ocr_pipeline_model.version}'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
|
||||
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
||||
def _delete_tesseract_ocr_pipeline_model(app, tesseract_ocr_pipeline_model_id):
|
||||
with app.app_context():
|
||||
tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get(tesseract_ocr_pipeline_model_id)
|
||||
tesseract_ocr_pipeline_model.delete()
|
||||
db.session.commit()
|
||||
|
||||
tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||
if not (tesseract_ocr_pipeline_model.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = Thread(
|
||||
target=_delete_tesseract_ocr_pipeline_model,
|
||||
args=(current_app._get_current_object(), tesseract_ocr_pipeline_model_id)
|
||||
)
|
||||
thread.start()
|
||||
return {}, 202
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models/create', methods=['GET', 'POST'])
|
||||
def create_tesseract_ocr_pipeline_model():
|
||||
form = CreateTesseractOCRPipelineModelForm(prefix='create-tesseract-ocr-pipeline-model-form')
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.create(
|
||||
form.tesseract_model_file.data,
|
||||
compatible_service_versions=form.compatible_service_versions.data,
|
||||
description=form.description.data,
|
||||
publisher=form.publisher.data,
|
||||
publisher_url=form.publisher_url.data,
|
||||
publishing_url=form.publishing_url.data,
|
||||
publishing_year=form.publishing_year.data,
|
||||
is_public=False,
|
||||
title=form.title.data,
|
||||
version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
db.session.commit()
|
||||
tesseract_ocr_pipeline_model_url = url_for(
|
||||
'.tesseract_ocr_pipeline_model',
|
||||
tesseract_ocr_pipeline_model_id=tesseract_ocr_pipeline_model.id
|
||||
)
|
||||
message = Markup(f'Tesseract OCR Pipeline model "<a href="{tesseract_ocr_pipeline_model_url}">{tesseract_ocr_pipeline_model.title}</a>" created')
|
||||
flash(message)
|
||||
return {}, 201, {'Location': tesseract_ocr_pipeline_model_url}
|
||||
return render_template(
|
||||
'contributions/create_tesseract_ocr_pipeline_model.html.j2',
|
||||
form=form,
|
||||
title='Create Tesseract OCR Pipeline Model'
|
||||
)
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>/toggle-public-status', methods=['POST'])
|
||||
@permission_required(Permission.CONTRIBUTE)
|
||||
def toggle_tesseract_ocr_pipeline_model_public_status(tesseract_ocr_pipeline_model_id):
|
||||
tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||
if not (tesseract_ocr_pipeline_model.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
tesseract_ocr_pipeline_model.is_public = not tesseract_ocr_pipeline_model.is_public
|
||||
db.session.commit()
|
||||
return {}, 201
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline-models')
|
||||
def spacy_nlp_pipeline_models():
|
||||
return render_template(
|
||||
'contributions/spacy_nlp_pipeline_models.html.j2',
|
||||
title='SpaCy NLP Pipeline Models'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
|
||||
def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id):
|
||||
spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||
form = EditSpaCyNLPPipelineModelForm(
|
||||
data=spacy_nlp_pipeline_model.to_json_serializeable(),
|
||||
prefix='edit-spacy-nlp-pipeline-model-form'
|
||||
)
|
||||
if form.validate_on_submit():
|
||||
form.populate_obj(spacy_nlp_pipeline_model)
|
||||
if db.session.is_modified(spacy_nlp_pipeline_model):
|
||||
message = Markup(f'SpaCy NLP Pipeline model "<a href="{spacy_nlp_pipeline_model.url}">{spacy_nlp_pipeline_model.title}</a>" updated')
|
||||
flash(message)
|
||||
db.session.commit()
|
||||
return redirect(url_for('.spacy_nlp_pipeline_models'))
|
||||
return render_template(
|
||||
'contributions/spacy_nlp_pipeline_model.html.j2',
|
||||
form=form,
|
||||
spacy_nlp_pipeline_model=spacy_nlp_pipeline_model,
|
||||
title=f'{spacy_nlp_pipeline_model.title} {spacy_nlp_pipeline_model.version}'
|
||||
)
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
|
||||
def delete_spacy_model(spacy_nlp_pipeline_model_id):
|
||||
def _delete_spacy_model(app, spacy_nlp_pipeline_model_id):
|
||||
with app.app_context():
|
||||
spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id)
|
||||
spacy_nlp_pipeline_model.delete()
|
||||
db.session.commit()
|
||||
|
||||
spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||
if not (spacy_nlp_pipeline_model.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = Thread(
|
||||
target=_delete_spacy_model,
|
||||
args=(current_app._get_current_object(), spacy_nlp_pipeline_model_id)
|
||||
)
|
||||
thread.start()
|
||||
return {}, 202
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline-models/create', methods=['GET', 'POST'])
|
||||
def create_spacy_nlp_pipeline_model():
|
||||
form = CreateSpaCyNLPPipelineModelForm(prefix='create-spacy-nlp-pipeline-model-form')
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.create(
|
||||
form.spacy_model_file.data,
|
||||
compatible_service_versions=form.compatible_service_versions.data,
|
||||
description=form.description.data,
|
||||
pipeline_name=form.pipeline_name.data,
|
||||
publisher=form.publisher.data,
|
||||
publisher_url=form.publisher_url.data,
|
||||
publishing_url=form.publishing_url.data,
|
||||
publishing_year=form.publishing_year.data,
|
||||
is_public=False,
|
||||
title=form.title.data,
|
||||
version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
db.session.commit()
|
||||
spacy_nlp_pipeline_model_url = url_for(
|
||||
'.spacy_nlp_pipeline_model',
|
||||
spacy_nlp_pipeline_model_id=spacy_nlp_pipeline_model.id
|
||||
)
|
||||
message = Markup(f'SpaCy NLP Pipeline model "<a href="{spacy_nlp_pipeline_model_url}">{spacy_nlp_pipeline_model.title}</a>" created')
|
||||
flash(message)
|
||||
return {}, 201, {'Location': spacy_nlp_pipeline_model_url}
|
||||
return render_template(
|
||||
'contributions/create_spacy_nlp_pipeline_model.html.j2',
|
||||
form=form,
|
||||
title='Create SpaCy NLP Pipeline Model'
|
||||
)
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>/toggle-public-status', methods=['POST'])
|
||||
@permission_required(Permission.CONTRIBUTE)
|
||||
def toggle_spacy_nlp_pipeline_model_public_status(spacy_nlp_pipeline_model_id):
|
||||
spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||
if not (spacy_nlp_pipeline_model.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
spacy_nlp_pipeline_model.is_public = not spacy_nlp_pipeline_model.is_public
|
||||
db.session.commit()
|
||||
return {}, 201
|
||||
return redirect(url_for('main.dashboard', _anchor='contributions'))
|
||||
|
2
app/contributions/spacy_nlp_pipeline_models/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from .. import bp
|
||||
from . import json_routes, routes
|
48
app/contributions/spacy_nlp_pipeline_models/forms.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from flask_wtf.file import FileField, FileRequired
|
||||
from wtforms import StringField, ValidationError
|
||||
from wtforms.validators import InputRequired, Length
|
||||
from app.services import SERVICES
|
||||
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
|
||||
|
||||
|
||||
class CreateSpaCyNLPPipelineModelForm(ContributionBaseForm):
|
||||
spacy_model_file = FileField(
|
||||
'File',
|
||||
validators=[FileRequired()]
|
||||
)
|
||||
pipeline_name = StringField(
|
||||
'Pipeline name',
|
||||
validators=[InputRequired(), Length(max=64)]
|
||||
)
|
||||
|
||||
def validate_spacy_model_file(self, field):
|
||||
if not field.data.filename.lower().endswith('.tar.gz'):
|
||||
raise ValidationError('.tar.gz files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-spacy-nlp-pipeline-model-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||||
self.compatible_service_versions.choices = [('', 'Choose your option')]
|
||||
self.compatible_service_versions.choices += [
|
||||
(x, x) for x in service_manifest['versions'].keys()
|
||||
]
|
||||
self.compatible_service_versions.default = ''
|
||||
|
||||
|
||||
class UpdateSpaCyNLPPipelineModelForm(UpdateContributionBaseForm):
|
||||
pipeline_name = StringField(
|
||||
'Pipeline name',
|
||||
validators=[InputRequired(), Length(max=64)]
|
||||
)
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'edit-spacy-nlp-pipeline-model-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||||
self.compatible_service_versions.choices = [('', 'Choose your option')]
|
||||
self.compatible_service_versions.choices += [
|
||||
(x, x) for x in service_manifest['versions'].keys()
|
||||
]
|
||||
self.compatible_service_versions.default = ''
|
52
app/contributions/spacy_nlp_pipeline_models/json_routes.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from flask import abort, current_app, request
|
||||
from flask_login import current_user
|
||||
from threading import Thread
|
||||
from app import db
|
||||
from app.decorators import content_negotiation, permission_required
|
||||
from app.models import SpaCyNLPPipelineModel
|
||||
from .. import bp
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
|
||||
@content_negotiation(produces='application/json')
|
||||
def delete_spacy_model(spacy_nlp_pipeline_model_id):
|
||||
def _delete_spacy_model(app, spacy_nlp_pipeline_model_id):
|
||||
with app.app_context():
|
||||
snpm = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id)
|
||||
snpm.delete()
|
||||
db.session.commit()
|
||||
|
||||
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||
if not (snpm.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = Thread(
|
||||
target=_delete_spacy_model,
|
||||
args=(current_app._get_current_object(), snpm.id)
|
||||
)
|
||||
thread.start()
|
||||
response_data = {
|
||||
'message': \
|
||||
f'SpaCy NLP Pipeline Model "{snpm.title}" marked for deletion'
|
||||
}
|
||||
return response_data, 202
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>/is_public', methods=['PUT'])
|
||||
@permission_required('CONTRIBUTE')
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def update_spacy_nlp_pipeline_model_is_public(spacy_nlp_pipeline_model_id):
|
||||
is_public = request.json
|
||||
if not isinstance(is_public, bool):
|
||||
abort(400)
|
||||
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||
if not (snpm.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
snpm.is_public = is_public
|
||||
db.session.commit()
|
||||
response_data = {
|
||||
'message': (
|
||||
f'SpaCy NLP Pipeline Model "{snpm.title}"'
|
||||
f' is now {"public" if is_public else "private"}'
|
||||
)
|
||||
}
|
||||
return response_data, 200
|
77
app/contributions/spacy_nlp_pipeline_models/routes.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from flask import abort, flash, redirect, render_template, url_for
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from flask_login import current_user
|
||||
from app import db
|
||||
from app.models import SpaCyNLPPipelineModel
|
||||
from . import bp
|
||||
from .forms import (
|
||||
CreateSpaCyNLPPipelineModelForm,
|
||||
UpdateSpaCyNLPPipelineModelForm
|
||||
)
|
||||
from .utils import (
|
||||
spacy_nlp_pipeline_model_dlc as spacy_nlp_pipeline_model_dlc
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline-models')
|
||||
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models', 'SpaCy NLP Pipeline Models')
|
||||
def spacy_nlp_pipeline_models():
|
||||
return render_template(
|
||||
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_models.html.j2',
|
||||
title='SpaCy NLP Pipeline Models'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline-models/create', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.create', 'Create')
|
||||
def create_spacy_nlp_pipeline_model():
|
||||
form = CreateSpaCyNLPPipelineModelForm()
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return {'errors': form.errors}, 400
|
||||
try:
|
||||
snpm = SpaCyNLPPipelineModel.create(
|
||||
form.spacy_model_file.data,
|
||||
compatible_service_versions=form.compatible_service_versions.data,
|
||||
description=form.description.data,
|
||||
pipeline_name=form.pipeline_name.data,
|
||||
publisher=form.publisher.data,
|
||||
publisher_url=form.publisher_url.data,
|
||||
publishing_url=form.publishing_url.data,
|
||||
publishing_year=form.publishing_year.data,
|
||||
is_public=False,
|
||||
title=form.title.data,
|
||||
version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
db.session.commit()
|
||||
flash(f'SpaCy NLP Pipeline model "{snpm.title}" created')
|
||||
return {}, 201, {'Location': url_for('.spacy_nlp_pipeline_models')}
|
||||
return render_template(
|
||||
'contributions/spacy_nlp_pipeline_models/create.html.j2',
|
||||
title='Create SpaCy NLP Pipeline Model',
|
||||
form=form
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline-models/<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.spacy_nlp_pipeline_models.entity', '', dynamic_list_constructor=spacy_nlp_pipeline_model_dlc)
|
||||
def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id):
|
||||
snpm = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
|
||||
if not (snpm.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
form = UpdateSpaCyNLPPipelineModelForm(data=snpm.to_json_serializeable())
|
||||
if form.validate_on_submit():
|
||||
form.populate_obj(snpm)
|
||||
if db.session.is_modified(snpm):
|
||||
flash(f'SpaCy NLP Pipeline model "{snpm.title}" updated')
|
||||
db.session.commit()
|
||||
return redirect(url_for('.spacy_nlp_pipeline_models'))
|
||||
return render_template(
|
||||
'contributions/spacy_nlp_pipeline_models/spacy_nlp_pipeline_model.html.j2',
|
||||
title=f'{snpm.title} {snpm.version}',
|
||||
form=form,
|
||||
spacy_nlp_pipeline_model=snpm
|
||||
)
|
13
app/contributions/spacy_nlp_pipeline_models/utils.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from flask import request, url_for
|
||||
from app.models import SpaCyNLPPipelineModel
|
||||
|
||||
|
||||
def spacy_nlp_pipeline_model_dlc():
|
||||
snpm_id = request.view_args['spacy_nlp_pipeline_model_id']
|
||||
snpm = SpaCyNLPPipelineModel.query.get_or_404(snpm_id)
|
||||
return [
|
||||
{
|
||||
'text': f'{snpm.title} {snpm.version}',
|
||||
'url': url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=snpm_id)
|
||||
}
|
||||
]
|
@@ -0,0 +1,2 @@
|
||||
from .. import bp
|
||||
from . import json_routes, routes
|
39
app/contributions/tesseract_ocr_pipeline_models/forms.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from flask_wtf.file import FileField, FileRequired
|
||||
from wtforms import ValidationError
|
||||
from app.services import SERVICES
|
||||
from ..forms import ContributionBaseForm, UpdateContributionBaseForm
|
||||
|
||||
|
||||
class CreateTesseractOCRPipelineModelForm(ContributionBaseForm):
|
||||
tesseract_model_file = FileField(
|
||||
'File',
|
||||
validators=[FileRequired()]
|
||||
)
|
||||
|
||||
def validate_tesseract_model_file(self, field):
|
||||
if not field.data.filename.lower().endswith('.traineddata'):
|
||||
raise ValidationError('traineddata files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-tesseract-ocr-pipeline-model-form'
|
||||
service_manifest = SERVICES['tesseract-ocr-pipeline']
|
||||
super().__init__(*args, **kwargs)
|
||||
self.compatible_service_versions.choices = [('', 'Choose your option')]
|
||||
self.compatible_service_versions.choices += [
|
||||
(x, x) for x in service_manifest['versions'].keys()
|
||||
]
|
||||
self.compatible_service_versions.default = ''
|
||||
|
||||
|
||||
class UpdateTesseractOCRPipelineModelForm(UpdateContributionBaseForm):
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'edit-tesseract-ocr-pipeline-model-form'
|
||||
service_manifest = SERVICES['tesseract-ocr-pipeline']
|
||||
super().__init__(*args, **kwargs)
|
||||
self.compatible_service_versions.choices = [('', 'Choose your option')]
|
||||
self.compatible_service_versions.choices += [
|
||||
(x, x) for x in service_manifest['versions'].keys()
|
||||
]
|
||||
self.compatible_service_versions.default = ''
|
@@ -0,0 +1,52 @@
|
||||
from flask import abort, current_app, request
|
||||
from flask_login import current_user
|
||||
from threading import Thread
|
||||
from app import db
|
||||
from app.decorators import content_negotiation, permission_required
|
||||
from app.models import TesseractOCRPipelineModel
|
||||
from . import bp
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
|
||||
@content_negotiation(produces='application/json')
|
||||
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
|
||||
def _delete_tesseract_ocr_pipeline_model(app, tesseract_ocr_pipeline_model_id):
|
||||
with app.app_context():
|
||||
topm = TesseractOCRPipelineModel.query.get(tesseract_ocr_pipeline_model_id)
|
||||
topm.delete()
|
||||
db.session.commit()
|
||||
|
||||
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||
if not (topm.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = Thread(
|
||||
target=_delete_tesseract_ocr_pipeline_model,
|
||||
args=(current_app._get_current_object(), topm.id)
|
||||
)
|
||||
thread.start()
|
||||
response_data = {
|
||||
'message': \
|
||||
f'Tesseract OCR Pipeline Model "{topm.title}" marked for deletion'
|
||||
}
|
||||
return response_data, 202
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>/is_public', methods=['PUT'])
|
||||
@permission_required('CONTRIBUTE')
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def update_tesseract_ocr_pipeline_model_is_public(tesseract_ocr_pipeline_model_id):
|
||||
is_public = request.json
|
||||
if not isinstance(is_public, bool):
|
||||
abort(400)
|
||||
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||
if not (topm.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
topm.is_public = is_public
|
||||
db.session.commit()
|
||||
response_data = {
|
||||
'message': (
|
||||
f'Tesseract OCR Pipeline Model "{topm.title}"'
|
||||
f' is now {"public" if is_public else "private"}'
|
||||
)
|
||||
}
|
||||
return response_data, 200
|
76
app/contributions/tesseract_ocr_pipeline_models/routes.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from flask import abort, flash, redirect, render_template, url_for
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from flask_login import current_user
|
||||
from app import db
|
||||
from app.models import TesseractOCRPipelineModel
|
||||
from . import bp
|
||||
from .forms import (
|
||||
CreateTesseractOCRPipelineModelForm,
|
||||
UpdateTesseractOCRPipelineModelForm
|
||||
)
|
||||
from .utils import (
|
||||
tesseract_ocr_pipeline_model_dlc as tesseract_ocr_pipeline_model_dlc
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models')
|
||||
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models', 'Tesseract OCR Pipeline Models')
|
||||
def tesseract_ocr_pipeline_models():
|
||||
return render_template(
|
||||
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_models.html.j2',
|
||||
title='Tesseract OCR Pipeline Models'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models/create', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.create', 'Create')
|
||||
def create_tesseract_ocr_pipeline_model():
|
||||
form = CreateTesseractOCRPipelineModelForm()
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return {'errors': form.errors}, 400
|
||||
try:
|
||||
topm = TesseractOCRPipelineModel.create(
|
||||
form.tesseract_model_file.data,
|
||||
compatible_service_versions=form.compatible_service_versions.data,
|
||||
description=form.description.data,
|
||||
publisher=form.publisher.data,
|
||||
publisher_url=form.publisher_url.data,
|
||||
publishing_url=form.publishing_url.data,
|
||||
publishing_year=form.publishing_year.data,
|
||||
is_public=False,
|
||||
title=form.title.data,
|
||||
version=form.version.data,
|
||||
user=current_user
|
||||
)
|
||||
except OSError:
|
||||
abort(500)
|
||||
db.session.commit()
|
||||
flash(f'Tesseract OCR Pipeline model "{topm.title}" created')
|
||||
return {}, 201, {'Location': url_for('.tesseract_ocr_pipeline_models')}
|
||||
return render_template(
|
||||
'contributions/tesseract_ocr_pipeline_models/create.html.j2',
|
||||
title='Create Tesseract OCR Pipeline Model',
|
||||
form=form
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline-models/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.tesseract_ocr_pipeline_models.entity', '', dynamic_list_constructor=tesseract_ocr_pipeline_model_dlc)
|
||||
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
|
||||
topm = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
|
||||
if not (topm.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
form = UpdateTesseractOCRPipelineModelForm(data=topm.to_json_serializeable())
|
||||
if form.validate_on_submit():
|
||||
form.populate_obj(topm)
|
||||
if db.session.is_modified(topm):
|
||||
flash(f'Tesseract OCR Pipeline model "{topm.title}" updated')
|
||||
db.session.commit()
|
||||
return redirect(url_for('.tesseract_ocr_pipeline_models'))
|
||||
return render_template(
|
||||
'contributions/tesseract_ocr_pipeline_models/tesseract_ocr_pipeline_model.html.j2',
|
||||
title=f'{topm.title} {topm.version}',
|
||||
form=form,
|
||||
tesseract_ocr_pipeline_model=topm
|
||||
)
|
13
app/contributions/tesseract_ocr_pipeline_models/utils.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from flask import request, url_for
|
||||
from app.models import TesseractOCRPipelineModel
|
||||
|
||||
|
||||
def tesseract_ocr_pipeline_model_dlc():
|
||||
topm_id = request.view_args['tesseract_ocr_pipeline_model_id']
|
||||
topm = TesseractOCRPipelineModel.query.get_or_404(topm_id)
|
||||
return [
|
||||
{
|
||||
'text': f'{topm.title} {topm.version}',
|
||||
'url': url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=topm_id)
|
||||
}
|
||||
]
|
@@ -0,0 +1,2 @@
|
||||
from .. import bp
|
||||
from . import routes
|
@@ -0,0 +1,7 @@
|
||||
from flask import abort
|
||||
from . import bp
|
||||
|
||||
|
||||
@bp.route('/transkribus_htr_pipeline_models')
|
||||
def transkribus_htr_pipeline_models():
|
||||
return abort(503)
|
22
app/converters/cli.py
Normal file
@@ -0,0 +1,22 @@
|
||||
import click
|
||||
from . import bp
|
||||
from .sandpaper import SandpaperConverter
|
||||
|
||||
|
||||
@bp.cli.group('converter')
|
||||
def converter():
|
||||
''' Converter commands. '''
|
||||
pass
|
||||
|
||||
@converter.group('sandpaper')
|
||||
def sandpaper_converter():
|
||||
''' Sandpaper converter commands. '''
|
||||
pass
|
||||
|
||||
@sandpaper_converter.command('run')
|
||||
@click.argument('json_db_file')
|
||||
@click.argument('data_dir')
|
||||
def run_sandpaper_converter(json_db_file, data_dir):
|
||||
''' Run the sandpaper converter. '''
|
||||
sandpaper_converter = SandpaperConverter(json_db_file, data_dir)
|
||||
sandpaper_converter.run()
|
@@ -7,101 +7,106 @@ import os
|
||||
import shutil
|
||||
|
||||
|
||||
def convert(json_db_file, data_dir):
|
||||
with open(json_db_file, 'r') as f:
|
||||
json_db = json.loads(f.read())
|
||||
class SandpaperConverter:
|
||||
def __init__(self, json_db_file, data_dir):
|
||||
self.json_db_file = json_db_file
|
||||
self.data_dir = data_dir
|
||||
|
||||
for json_user in json_db:
|
||||
if not json_user['confirmed']:
|
||||
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
|
||||
continue
|
||||
user_dir = os.path.join(data_dir, str(json_user['id']))
|
||||
convert_user(json_user, user_dir)
|
||||
db.session.commit()
|
||||
def run(self):
|
||||
with open(self.json_db_file, 'r') as f:
|
||||
json_db = json.loads(f.read())
|
||||
|
||||
for json_user in json_db:
|
||||
if not json_user['confirmed']:
|
||||
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
|
||||
continue
|
||||
user_dir = os.path.join(self.data_dir, str(json_user['id']))
|
||||
self.convert_user(json_user, user_dir)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
def convert_user(json_user, user_dir):
|
||||
current_app.logger.info(f'Create User {json_user["username"]}...')
|
||||
user = User(
|
||||
confirmed=json_user['confirmed'],
|
||||
email=json_user['email'],
|
||||
last_seen=datetime.fromtimestamp(json_user['last_seen']),
|
||||
member_since=datetime.fromtimestamp(json_user['member_since']),
|
||||
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
|
||||
username=json_user['username']
|
||||
)
|
||||
db.session.add(user)
|
||||
db.session.flush(objects=[user])
|
||||
db.session.refresh(user)
|
||||
try:
|
||||
user.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise Exception('Internal Server Error')
|
||||
for json_corpus in json_user['corpora'].values():
|
||||
if not json_corpus['files'].values():
|
||||
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
|
||||
continue
|
||||
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
|
||||
convert_corpus(json_corpus, user, corpus_dir)
|
||||
current_app.logger.info('Done')
|
||||
|
||||
|
||||
def convert_corpus(json_corpus, user, corpus_dir):
|
||||
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
|
||||
corpus = Corpus(
|
||||
user=user,
|
||||
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
|
||||
description=json_corpus['description'],
|
||||
title=json_corpus['title']
|
||||
)
|
||||
db.session.add(corpus)
|
||||
db.session.flush(objects=[corpus])
|
||||
db.session.refresh(corpus)
|
||||
try:
|
||||
corpus.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise Exception('Internal Server Error')
|
||||
for json_corpus_file in json_corpus['files'].values():
|
||||
convert_corpus_file(json_corpus_file, corpus, corpus_dir)
|
||||
current_app.logger.info('Done')
|
||||
|
||||
|
||||
def convert_corpus_file(json_corpus_file, corpus, corpus_dir):
|
||||
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
|
||||
corpus_file = CorpusFile(
|
||||
corpus=corpus,
|
||||
address=json_corpus_file['address'],
|
||||
author=json_corpus_file['author'],
|
||||
booktitle=json_corpus_file['booktitle'],
|
||||
chapter=json_corpus_file['chapter'],
|
||||
editor=json_corpus_file['editor'],
|
||||
filename=json_corpus_file['filename'],
|
||||
institution=json_corpus_file['institution'],
|
||||
journal=json_corpus_file['journal'],
|
||||
mimetype='application/vrt+xml',
|
||||
pages=json_corpus_file['pages'],
|
||||
publisher=json_corpus_file['publisher'],
|
||||
publishing_year=json_corpus_file['publishing_year'],
|
||||
school=json_corpus_file['school'],
|
||||
title=json_corpus_file['title']
|
||||
)
|
||||
db.session.add(corpus_file)
|
||||
db.session.flush(objects=[corpus_file])
|
||||
db.session.refresh(corpus_file)
|
||||
try:
|
||||
shutil.copy2(
|
||||
os.path.join(corpus_dir, json_corpus_file['filename']),
|
||||
corpus_file.path
|
||||
def convert_user(self, json_user, user_dir):
|
||||
current_app.logger.info(f'Create User {json_user["username"]}...')
|
||||
user = User(
|
||||
confirmed=json_user['confirmed'],
|
||||
email=json_user['email'],
|
||||
last_seen=datetime.fromtimestamp(json_user['last_seen']),
|
||||
member_since=datetime.fromtimestamp(json_user['member_since']),
|
||||
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
|
||||
username=json_user['username']
|
||||
)
|
||||
except:
|
||||
current_app.logger.warning(
|
||||
'Can not convert corpus file: '
|
||||
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
|
||||
' -> '
|
||||
f'{corpus_file.path}'
|
||||
db.session.add(user)
|
||||
db.session.flush(objects=[user])
|
||||
db.session.refresh(user)
|
||||
try:
|
||||
user.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise Exception('Internal Server Error')
|
||||
for json_corpus in json_user['corpora'].values():
|
||||
if not json_corpus['files'].values():
|
||||
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
|
||||
continue
|
||||
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
|
||||
self.convert_corpus(json_corpus, user, corpus_dir)
|
||||
current_app.logger.info('Done')
|
||||
|
||||
|
||||
def convert_corpus(self, json_corpus, user, corpus_dir):
|
||||
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
|
||||
corpus = Corpus(
|
||||
user=user,
|
||||
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
|
||||
description=json_corpus['description'],
|
||||
title=json_corpus['title']
|
||||
)
|
||||
current_app.logger.info('Done')
|
||||
db.session.add(corpus)
|
||||
db.session.flush(objects=[corpus])
|
||||
db.session.refresh(corpus)
|
||||
try:
|
||||
corpus.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise Exception('Internal Server Error')
|
||||
for json_corpus_file in json_corpus['files'].values():
|
||||
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
|
||||
current_app.logger.info('Done')
|
||||
|
||||
|
||||
def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
|
||||
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
|
||||
corpus_file = CorpusFile(
|
||||
corpus=corpus,
|
||||
address=json_corpus_file['address'],
|
||||
author=json_corpus_file['author'],
|
||||
booktitle=json_corpus_file['booktitle'],
|
||||
chapter=json_corpus_file['chapter'],
|
||||
editor=json_corpus_file['editor'],
|
||||
filename=json_corpus_file['filename'],
|
||||
institution=json_corpus_file['institution'],
|
||||
journal=json_corpus_file['journal'],
|
||||
mimetype='application/vrt+xml',
|
||||
pages=json_corpus_file['pages'],
|
||||
publisher=json_corpus_file['publisher'],
|
||||
publishing_year=json_corpus_file['publishing_year'],
|
||||
school=json_corpus_file['school'],
|
||||
title=json_corpus_file['title']
|
||||
)
|
||||
db.session.add(corpus_file)
|
||||
db.session.flush(objects=[corpus_file])
|
||||
db.session.refresh(corpus_file)
|
||||
try:
|
||||
shutil.copy2(
|
||||
os.path.join(corpus_dir, json_corpus_file['filename']),
|
||||
corpus_file.path
|
||||
)
|
||||
except:
|
||||
current_app.logger.warning(
|
||||
'Can not convert corpus file: '
|
||||
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
|
||||
' -> '
|
||||
f'{corpus_file.path}'
|
||||
)
|
||||
current_app.logger.info('Done')
|
||||
|
@@ -1,5 +1,19 @@
|
||||
from flask import Blueprint
|
||||
from flask_login import login_required
|
||||
|
||||
|
||||
bp = Blueprint('corpora', __name__)
|
||||
from . import cqi_over_socketio, routes # noqa
|
||||
bp.cli.short_help = 'Corpus commands.'
|
||||
|
||||
|
||||
@bp.before_request
|
||||
@login_required
|
||||
def before_request():
|
||||
'''
|
||||
Ensures that the routes in this package can only be visited by users that
|
||||
are logged in.
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
from . import cli, files, followers, routes, json_routes
|
||||
|
27
app/corpora/cli.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from app.models import Corpus, CorpusStatus
|
||||
import os
|
||||
import shutil
|
||||
from app import db
|
||||
from . import bp
|
||||
|
||||
|
||||
@bp.cli.command('reset')
|
||||
def reset():
|
||||
''' Reset built corpora. '''
|
||||
status = [
|
||||
CorpusStatus.QUEUED,
|
||||
CorpusStatus.BUILDING,
|
||||
CorpusStatus.BUILT,
|
||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||
]
|
||||
for corpus in [x for x in Corpus.query.all() if x.status in status]:
|
||||
print(f'Resetting corpus {corpus}')
|
||||
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
|
||||
os.mkdir(os.path.join(corpus.path, 'cwb'))
|
||||
os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
|
||||
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
|
||||
corpus.status = CorpusStatus.UNPREPARED
|
||||
corpus.num_analysis_sessions = 0
|
||||
db.session.commit()
|
206
app/corpora/cqi_over_sio/__init__.py
Normal file
@@ -0,0 +1,206 @@
|
||||
from cqi import CQiClient
|
||||
from cqi.errors import CQiException
|
||||
from cqi.status import CQiStatus
|
||||
from docker.models.containers import Container
|
||||
from flask import current_app, session
|
||||
from flask_login import current_user
|
||||
from flask_socketio import Namespace
|
||||
from inspect import signature
|
||||
from threading import Lock
|
||||
from typing import Callable, Dict, List, Optional
|
||||
from app import db, docker_client, hashids, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus, CorpusStatus
|
||||
from . import extensions
|
||||
|
||||
|
||||
'''
|
||||
This package tunnels the Corpus Query interface (CQi) protocol through
|
||||
Socket.IO (SIO) by tunneling CQi API calls through an event called "exec".
|
||||
|
||||
Basic concept:
|
||||
1. A client connects to the "/cqi_over_sio" namespace.
|
||||
2. The client emits the "init" event and provides a corpus id for the corpus
|
||||
that should be analysed in this session.
|
||||
1.1 The analysis session counter of the corpus is incremented.
|
||||
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
|
||||
1.3 Wait until the CQP server is running.
|
||||
1.4 Connect the CQiClient to the server.
|
||||
1.5 Save the CQiClient, the Lock and the corpus id in the session for
|
||||
subsequential use.
|
||||
2. The client emits the "exec" event provides the name of a CQi API function
|
||||
arguments (optional).
|
||||
- The event "exec" handler will execute the function, make sure that the
|
||||
result is serializable and returns the result back to the client.
|
||||
4. Wait for more events
|
||||
5. The client disconnects from the "/cqi_over_sio" namespace
|
||||
1.1 The analysis session counter of the corpus is decremented.
|
||||
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
||||
'''
|
||||
|
||||
CQI_API_FUNCTION_NAMES: List[str] = [
|
||||
'ask_feature_cl_2_3',
|
||||
'ask_feature_cqi_1_0',
|
||||
'ask_feature_cqp_2_3',
|
||||
'cl_alg2cpos',
|
||||
'cl_attribute_size',
|
||||
'cl_cpos2alg',
|
||||
'cl_cpos2id',
|
||||
'cl_cpos2lbound',
|
||||
'cl_cpos2rbound',
|
||||
'cl_cpos2str',
|
||||
'cl_cpos2struc',
|
||||
'cl_drop_attribute',
|
||||
'cl_id2cpos',
|
||||
'cl_id2freq',
|
||||
'cl_id2str',
|
||||
'cl_idlist2cpos',
|
||||
'cl_lexicon_size',
|
||||
'cl_regex2id',
|
||||
'cl_str2id',
|
||||
'cl_struc2cpos',
|
||||
'cl_struc2str',
|
||||
'corpus_alignment_attributes',
|
||||
'corpus_charset',
|
||||
'corpus_drop_corpus',
|
||||
'corpus_full_name',
|
||||
'corpus_info',
|
||||
'corpus_list_corpora',
|
||||
'corpus_positional_attributes',
|
||||
'corpus_properties',
|
||||
'corpus_structural_attribute_has_values',
|
||||
'corpus_structural_attributes',
|
||||
'cqp_drop_subcorpus',
|
||||
'cqp_dump_subcorpus',
|
||||
'cqp_fdist_1',
|
||||
'cqp_fdist_2',
|
||||
'cqp_list_subcorpora',
|
||||
'cqp_query',
|
||||
'cqp_subcorpus_has_field',
|
||||
'cqp_subcorpus_size',
|
||||
'ctrl_bye',
|
||||
'ctrl_connect',
|
||||
'ctrl_last_general_error',
|
||||
'ctrl_ping',
|
||||
'ctrl_user_abort'
|
||||
]
|
||||
|
||||
|
||||
class CQiNamespace(Namespace):
|
||||
@socketio_login_required
|
||||
def on_connect(self):
|
||||
pass
|
||||
|
||||
@socketio_login_required
|
||||
def on_init(self, db_corpus_hashid: str):
|
||||
db_corpus_id: int = hashids.decode(db_corpus_hashid)
|
||||
db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
|
||||
if db_corpus is None:
|
||||
return {'code': 404, 'msg': 'Not Found'}
|
||||
if not (db_corpus.user == current_user
|
||||
or current_user.is_following_corpus(db_corpus)
|
||||
or current_user.is_administrator()):
|
||||
return {'code': 403, 'msg': 'Forbidden'}
|
||||
if db_corpus.status not in [
|
||||
CorpusStatus.BUILT,
|
||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||
]:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
if db_corpus.num_analysis_sessions is None:
|
||||
db_corpus.num_analysis_sessions = 0
|
||||
db.session.commit()
|
||||
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||
db.session.commit()
|
||||
retry_counter: int = 20
|
||||
while db_corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
||||
if retry_counter == 0:
|
||||
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
return {'code': 408, 'msg': 'Request Timeout'}
|
||||
socketio.sleep(3)
|
||||
retry_counter -= 1
|
||||
db.session.refresh(db_corpus)
|
||||
# cqi_client: CQiClient = CQiClient(f'cqpserver_{db_corpus_id}')
|
||||
cqpserver_container_name: str = f'cqpserver_{db_corpus_id}'
|
||||
cqpserver_container: Container = docker_client.containers.get(cqpserver_container_name)
|
||||
cqpserver_host: str = cqpserver_container.attrs['NetworkSettings']['Networks'][current_app.config['NOPAQUE_DOCKER_NETWORK_NAME']]['IPAddress']
|
||||
cqi_client: CQiClient = CQiClient(cqpserver_host)
|
||||
session['cqi_over_sio'] = {
|
||||
'cqi_client': cqi_client,
|
||||
'cqi_client_lock': Lock(),
|
||||
'db_corpus_id': db_corpus_id
|
||||
}
|
||||
return {'code': 200, 'msg': 'OK'}
|
||||
|
||||
@socketio_login_required
|
||||
def on_exec(self, fn_name: str, fn_args: Dict = {}):
|
||||
try:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||
except KeyError:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
if fn_name in CQI_API_FUNCTION_NAMES:
|
||||
fn: Callable = getattr(cqi_client.api, fn_name)
|
||||
elif fn_name in extensions.CQI_EXTENSION_FUNCTION_NAMES:
|
||||
fn: Callable = getattr(extensions, fn_name)
|
||||
else:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
for param in signature(fn).parameters.values():
|
||||
if param.default is param.empty:
|
||||
if param.name not in fn_args:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
else:
|
||||
if param.name not in fn_args:
|
||||
continue
|
||||
if type(fn_args[param.name]) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
cqi_client_lock.acquire()
|
||||
try:
|
||||
fn_return_value = fn(**fn_args)
|
||||
except BrokenPipeError as e:
|
||||
return {'code': 500, 'msg': 'Internal Server Error'}
|
||||
except CQiException as e:
|
||||
return {
|
||||
'code': 502,
|
||||
'msg': 'Bad Gateway',
|
||||
'payload': {
|
||||
'code': e.code,
|
||||
'desc': e.description,
|
||||
'msg': e.__class__.__name__
|
||||
}
|
||||
}
|
||||
finally:
|
||||
cqi_client_lock.release()
|
||||
if isinstance(fn_return_value, CQiStatus):
|
||||
payload = {
|
||||
'code': fn_return_value.code,
|
||||
'msg': fn_return_value.__class__.__name__
|
||||
}
|
||||
else:
|
||||
payload = fn_return_value
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
def on_disconnect(self):
|
||||
try:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_client_lock: Lock = session['cqi_over_sio']['cqi_client_lock']
|
||||
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||
except KeyError:
|
||||
return
|
||||
cqi_client_lock.acquire()
|
||||
try:
|
||||
session.pop('cqi_over_sio')
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
cqi_client.api.ctrl_bye()
|
||||
except (BrokenPipeError, CQiException):
|
||||
pass
|
||||
cqi_client_lock.release()
|
||||
db_corpus: Optional[Corpus] = Corpus.query.get(db_corpus_id)
|
||||
if db_corpus is None:
|
||||
return
|
||||
db_corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
288
app/corpora/cqi_over_sio/extensions.py
Normal file
@@ -0,0 +1,288 @@
|
||||
from collections import Counter
|
||||
from cqi import CQiClient
|
||||
from cqi.models.corpora import Corpus as CQiCorpus
|
||||
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
|
||||
from cqi.models.attributes import (
|
||||
PositionalAttribute as CQiPositionalAttribute,
|
||||
StructuralAttribute as CQiStructuralAttribute
|
||||
)
|
||||
from cqi.status import StatusOk as CQiStatusOk
|
||||
from flask import session
|
||||
from typing import Dict, List
|
||||
import gzip
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
from app import db
|
||||
from app.models import Corpus
|
||||
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
|
||||
|
||||
|
||||
CQI_EXTENSION_FUNCTION_NAMES: List[str] = [
|
||||
'ext_corpus_update_db',
|
||||
'ext_corpus_static_data',
|
||||
'ext_corpus_paginate_corpus',
|
||||
'ext_cqp_paginate_subcorpus',
|
||||
'ext_cqp_partial_export_subcorpus',
|
||||
'ext_cqp_export_subcorpus',
|
||||
]
|
||||
|
||||
|
||||
def ext_corpus_update_db(corpus: str) -> CQiStatusOk:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
|
||||
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
|
||||
db_corpus.num_tokens = cqi_corpus.size
|
||||
db.session.commit()
|
||||
return CQiStatusOk()
|
||||
|
||||
|
||||
def ext_corpus_static_data(corpus: str) -> Dict:
|
||||
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
|
||||
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
|
||||
|
||||
static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
|
||||
if os.path.exists(static_data_file_path):
|
||||
with open(static_data_file_path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus: CQiCorpus = cqi_client.corpora.get(corpus)
|
||||
cqi_p_attrs: List[CQiPositionalAttribute] = cqi_corpus.positional_attributes.list()
|
||||
cqi_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list()
|
||||
|
||||
static_data = {
|
||||
'corpus': {
|
||||
'bounds': [0, cqi_corpus.size - 1],
|
||||
'freqs': {}
|
||||
},
|
||||
'p_attrs': {},
|
||||
's_attrs': {},
|
||||
'values': {'p_attrs': {}, 's_attrs': {}}
|
||||
}
|
||||
|
||||
for p_attr in cqi_p_attrs:
|
||||
print(f'corpus.freqs.{p_attr.name}')
|
||||
static_data['corpus']['freqs'][p_attr.name] = []
|
||||
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
|
||||
static_data['corpus']['freqs'][p_attr.name].extend(p_attr.freqs_by_ids(p_attr_id_list))
|
||||
del p_attr_id_list
|
||||
|
||||
print(f'p_attrs.{p_attr.name}')
|
||||
static_data['p_attrs'][p_attr.name] = []
|
||||
cpos_list: List[int] = list(range(cqi_corpus.size))
|
||||
static_data['p_attrs'][p_attr.name].extend(p_attr.ids_by_cpos(cpos_list))
|
||||
del cpos_list
|
||||
|
||||
print(f'values.p_attrs.{p_attr.name}')
|
||||
static_data['values']['p_attrs'][p_attr.name] = []
|
||||
p_attr_id_list: List[int] = list(range(p_attr.lexicon_size))
|
||||
static_data['values']['p_attrs'][p_attr.name].extend(p_attr.values_by_ids(p_attr_id_list))
|
||||
del p_attr_id_list
|
||||
|
||||
for s_attr in cqi_s_attrs:
|
||||
if s_attr.has_values:
|
||||
continue
|
||||
|
||||
static_data['s_attrs'][s_attr.name] = {'lexicon': [], 'values': None}
|
||||
|
||||
if s_attr.name in ['s', 'ent']:
|
||||
##############################################################
|
||||
# A faster way to get cpos boundaries for smaller s_attrs #
|
||||
# Note: Needs more testing, don't use it in production #
|
||||
##############################################################
|
||||
cqi_corpus.query('Last', f'<{s_attr.name}> []* </{s_attr.name}>;')
|
||||
cqi_subcorpus: CQiSubcorpus = cqi_corpus.subcorpora.get('Last')
|
||||
first_match: int = 0
|
||||
last_match: int = cqi_subcorpus.size - 1
|
||||
match_boundaries = zip(
|
||||
range(first_match, last_match + 1),
|
||||
cqi_subcorpus.dump(
|
||||
cqi_subcorpus.fields['match'],
|
||||
first_match,
|
||||
last_match
|
||||
),
|
||||
cqi_subcorpus.dump(
|
||||
cqi_subcorpus.fields['matchend'],
|
||||
first_match,
|
||||
last_match
|
||||
)
|
||||
)
|
||||
cqi_subcorpus.drop()
|
||||
del cqi_subcorpus, first_match, last_match
|
||||
for id, lbound, rbound in match_boundaries:
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
|
||||
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||
del match_boundaries
|
||||
|
||||
if s_attr.name != 'text':
|
||||
continue
|
||||
|
||||
for id in range(0, s_attr.size):
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'].append({})
|
||||
# This is a very slow operation, thats why we only use it for
|
||||
# the text attribute
|
||||
lbound, rbound = s_attr.cpos_by_id(id)
|
||||
print(f's_attrs.{s_attr.name}.lexicon.{id}.bounds')
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['bounds'] = [lbound, rbound]
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'] = {}
|
||||
cpos_list: List[int] = list(range(lbound, rbound + 1))
|
||||
for p_attr in cqi_p_attrs:
|
||||
p_attr_ids: List[int] = []
|
||||
p_attr_ids.extend(p_attr.ids_by_cpos(cpos_list))
|
||||
print(f's_attrs.{s_attr.name}.lexicon.{id}.freqs.{p_attr.name}')
|
||||
static_data['s_attrs'][s_attr.name]['lexicon'][id]['freqs'][p_attr.name] = dict(Counter(p_attr_ids))
|
||||
del p_attr_ids
|
||||
del cpos_list
|
||||
|
||||
sub_s_attrs: List[CQiStructuralAttribute] = cqi_corpus.structural_attributes.list(filters={'part_of': s_attr})
|
||||
print(f's_attrs.{s_attr.name}.values')
|
||||
static_data['s_attrs'][s_attr.name]['values'] = [
|
||||
sub_s_attr.name[(len(s_attr.name) + 1):]
|
||||
for sub_s_attr in sub_s_attrs
|
||||
]
|
||||
s_attr_id_list: List[int] = list(range(s_attr.size))
|
||||
sub_s_attr_values: List[str] = []
|
||||
for sub_s_attr in sub_s_attrs:
|
||||
tmp = []
|
||||
tmp.extend(sub_s_attr.values_by_ids(s_attr_id_list))
|
||||
sub_s_attr_values.append(tmp)
|
||||
del tmp
|
||||
del s_attr_id_list
|
||||
print(f'values.s_attrs.{s_attr.name}')
|
||||
static_data['values']['s_attrs'][s_attr.name] = [
|
||||
{
|
||||
s_attr_value_name: sub_s_attr_values[s_attr_value_name_idx][s_attr_id]
|
||||
for s_attr_value_name_idx, s_attr_value_name in enumerate(
|
||||
static_data['s_attrs'][s_attr.name]['values']
|
||||
)
|
||||
} for s_attr_id in range(0, s_attr.size)
|
||||
]
|
||||
del sub_s_attr_values
|
||||
print('Saving static data to file')
|
||||
with gzip.open(static_data_file_path, 'wt') as f:
|
||||
json.dump(static_data, f)
|
||||
del static_data
|
||||
print('Sending static data to client')
|
||||
with open(static_data_file_path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def ext_corpus_paginate_corpus(
|
||||
corpus: str,
|
||||
page: int = 1,
|
||||
per_page: int = 20
|
||||
) -> Dict:
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus = cqi_client.corpora.get(corpus)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_corpus.size > 0
|
||||
and page > math.ceil(cqi_corpus.size / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
first_cpos = (page - 1) * per_page
|
||||
last_cpos = min(cqi_corpus.size, first_cpos + per_page)
|
||||
cpos_list = [*range(first_cpos, last_cpos)]
|
||||
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = [cpos_list]
|
||||
# the lookups for the items
|
||||
payload['lookups'] = lookups
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_corpus.size
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return payload
|
||||
|
||||
|
||||
def ext_cqp_paginate_subcorpus(
|
||||
subcorpus: str,
|
||||
context: int = 50,
|
||||
page: int = 1,
|
||||
per_page: int = 20
|
||||
) -> Dict:
|
||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_subcorpus.size > 0
|
||||
and page > math.ceil(cqi_subcorpus.size / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
offset = (page - 1) * per_page
|
||||
cutoff = per_page
|
||||
cqi_results_export = export_subcorpus(
|
||||
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = cqi_results_export.pop('matches')
|
||||
# the lookups for the items
|
||||
payload['lookups'] = cqi_results_export
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_subcorpus.size
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return payload
|
||||
|
||||
|
||||
def ext_cqp_partial_export_subcorpus(
|
||||
subcorpus: str,
|
||||
match_id_list: list,
|
||||
context: int = 50
|
||||
) -> Dict:
|
||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
||||
return cqi_subcorpus_partial_export
|
||||
|
||||
|
||||
def ext_cqp_export_subcorpus(
|
||||
subcorpus: str,
|
||||
context: int = 50
|
||||
) -> Dict:
|
||||
corpus_name, subcorpus_name = subcorpus.split(':', 1)
|
||||
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
||||
return cqi_subcorpus_export
|
131
app/corpora/cqi_over_sio/utils.py
Normal file
@@ -0,0 +1,131 @@
|
||||
from cqi.models.corpora import Corpus as CQiCorpus
|
||||
from cqi.models.subcorpora import Subcorpus as CQiSubcorpus
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
def lookups_by_cpos(corpus: CQiCorpus, cpos_list: List[int]) -> Dict:
|
||||
lookups = {}
|
||||
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
||||
for attr in corpus.positional_attributes.list():
|
||||
cpos_attr_values: List[str] = attr.values_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_values[i]
|
||||
for attr in corpus.structural_attributes.list():
|
||||
# We only want to iterate over non subattributes, identifiable by
|
||||
# attr.has_values == False
|
||||
if attr.has_values:
|
||||
continue
|
||||
cpos_attr_ids: List[int] = attr.ids_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
if cpos_attr_ids[i] == -1:
|
||||
continue
|
||||
lookups['cpos_lookup'][cpos][attr.name] = cpos_attr_ids[i]
|
||||
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
||||
if len(occured_attr_ids) == 0:
|
||||
continue
|
||||
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
||||
if len(subattrs) == 0:
|
||||
continue
|
||||
lookup_name: str = f'{attr.name}_lookup'
|
||||
lookups[lookup_name] = {}
|
||||
for attr_id in occured_attr_ids:
|
||||
lookups[lookup_name][attr_id] = {}
|
||||
for subattr in subattrs:
|
||||
subattr_name = subattr.name[(len(attr.name) + 1):] # noqa
|
||||
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
||||
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
||||
return lookups
|
||||
|
||||
|
||||
def partial_export_subcorpus(
|
||||
subcorpus: CQiSubcorpus,
|
||||
match_id_list: List[int],
|
||||
context: int = 25
|
||||
) -> Dict:
|
||||
if subcorpus.size == 0:
|
||||
return {"matches": []}
|
||||
match_boundaries = []
|
||||
for match_id in match_id_list:
|
||||
if match_id < 0 or match_id >= subcorpus.size:
|
||||
continue
|
||||
match_boundaries.append(
|
||||
(
|
||||
match_id,
|
||||
subcorpus.dump(subcorpus.fields['match'], match_id, match_id)[0],
|
||||
subcorpus.dump(subcorpus.fields['matchend'], match_id, match_id)[0]
|
||||
)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_boundary in match_boundaries:
|
||||
match_num, match_start, match_end = match_boundary
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.size - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
||||
|
||||
|
||||
def export_subcorpus(
|
||||
subcorpus: CQiSubcorpus,
|
||||
context: int = 25,
|
||||
cutoff: float = float('inf'),
|
||||
offset: int = 0
|
||||
) -> Dict:
|
||||
if subcorpus.size == 0:
|
||||
return {"matches": []}
|
||||
first_match = max(0, offset)
|
||||
last_match = min((offset + cutoff - 1), (subcorpus.size - 1))
|
||||
match_boundaries = zip(
|
||||
range(first_match, last_match + 1),
|
||||
subcorpus.dump(subcorpus.fields['match'], first_match, last_match),
|
||||
subcorpus.dump(subcorpus.fields['matchend'], first_match, last_match)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_num, match_start, match_end in match_boundaries:
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.size - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.size - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
@@ -1,115 +0,0 @@
|
||||
from flask import session
|
||||
from flask_login import current_user
|
||||
from flask_socketio import ConnectionRefusedError
|
||||
from threading import Lock
|
||||
import cqi
|
||||
from app import db, hashids, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus, CorpusStatus
|
||||
|
||||
|
||||
'''
|
||||
This package tunnels the Corpus Query interface (CQi) protocol through
|
||||
Socket.IO (SIO) by wrapping each CQi function in a seperate SIO event.
|
||||
|
||||
This module only handles the SIO connect/disconnect, which handles the setup
|
||||
and teardown of necessary ressources for later use. Each CQi function has a
|
||||
corresponding SIO event. The event handlers are spread across the different
|
||||
modules within this package.
|
||||
|
||||
Basic concept:
|
||||
1. A client connects to the SIO namespace and provides the id of a corpus to be
|
||||
analysed.
|
||||
1.1 The analysis session counter of the corpus is incremented.
|
||||
1.2 A CQiClient and a (Mutex) Lock belonging to it is created.
|
||||
1.3 Wait until the CQP server is running.
|
||||
1.4 Connect the CQiClient to the server.
|
||||
1.5 Save the CQiClient and the Lock in the session for subsequential use.
|
||||
2. A client emits an event and may provide a single json object with necessary
|
||||
arguments for the targeted CQi function.
|
||||
3. A SIO event handler (decorated with cqi_over_socketio) gets executed.
|
||||
- The event handler function defines all arguments. Hence the client
|
||||
is sent as a single json object, the decorator decomposes it to fit
|
||||
the functions signature. This also includes type checking and proper
|
||||
use of the lock (acquire/release) mechanism.
|
||||
4. Wait for more events
|
||||
5. The client disconnects from the SIO namespace
|
||||
1.1 The analysis session counter of the corpus is decremented.
|
||||
1.2 The CQiClient and (Mutex) Lock belonging to it are teared down.
|
||||
'''
|
||||
|
||||
|
||||
NAMESPACE = '/corpora/corpus/corpus_analysis'
|
||||
|
||||
|
||||
# Import all CQi over Socket.IO event handlers
|
||||
from .cqi_corpora_corpus_subcorpora import * # noqa
|
||||
from .cqi_corpora_corpus_structural_attributes import * # noqa
|
||||
from .cqi_corpora_corpus_positional_attributes import * # noqa
|
||||
from .cqi_corpora_corpus_alignment_attributes import * # noqa
|
||||
from .cqi_corpora_corpus import * # noqa
|
||||
from .cqi_corpora import * # noqa
|
||||
from .cqi import * # noqa
|
||||
|
||||
|
||||
@socketio.on('connect', namespace=NAMESPACE)
|
||||
@socketio_login_required
|
||||
def connect(auth):
|
||||
# the auth variable is used in a hacky way. It contains the corpus id for
|
||||
# which a corpus analysis session should be started.
|
||||
corpus_id = hashids.decode(auth['corpus_id'])
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
# return {'code': 404, 'msg': 'Not Found'}
|
||||
raise ConnectionRefusedError('Not Found')
|
||||
if not (corpus.user == current_user
|
||||
or current_user.is_following_corpus(corpus)
|
||||
or current_user.is_administrator()):
|
||||
# return {'code': 403, 'msg': 'Forbidden'}
|
||||
raise ConnectionRefusedError('Forbidden')
|
||||
if corpus.status not in [
|
||||
CorpusStatus.BUILT,
|
||||
CorpusStatus.STARTING_ANALYSIS_SESSION,
|
||||
CorpusStatus.RUNNING_ANALYSIS_SESSION,
|
||||
CorpusStatus.CANCELING_ANALYSIS_SESSION
|
||||
]:
|
||||
# return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
raise ConnectionRefusedError('Failed Dependency')
|
||||
if corpus.num_analysis_sessions is None:
|
||||
corpus.num_analysis_sessions = 0
|
||||
db.session.commit()
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions + 1
|
||||
db.session.commit()
|
||||
retry_counter = 20
|
||||
while corpus.status != CorpusStatus.RUNNING_ANALYSIS_SESSION:
|
||||
if retry_counter == 0:
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
return {'code': 408, 'msg': 'Request Timeout'}
|
||||
socketio.sleep(3)
|
||||
retry_counter -= 1
|
||||
db.session.refresh(corpus)
|
||||
cqi_client = cqi.CQiClient(f'cqpserver_{corpus_id}')
|
||||
session['d'] = {
|
||||
'corpus_id': corpus_id,
|
||||
'cqi_client': cqi_client,
|
||||
'cqi_client_lock': Lock(),
|
||||
}
|
||||
# return {'code': 200, 'msg': 'OK'}
|
||||
|
||||
|
||||
@socketio.on('disconnect', namespace=NAMESPACE)
|
||||
def disconnect():
|
||||
if 'd' not in session:
|
||||
return
|
||||
session['d']['cqi_client_lock'].acquire()
|
||||
try:
|
||||
session['d']['cqi_client'].disconnect()
|
||||
except (BrokenPipeError, cqi.errors.CQiException):
|
||||
pass
|
||||
session['d']['cqi_client_lock'].release()
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
corpus.num_analysis_sessions = Corpus.num_analysis_sessions - 1
|
||||
db.session.commit()
|
||||
session.pop('d')
|
||||
# return {'code': 200, 'msg': 'OK'}
|
@@ -1,43 +0,0 @@
|
||||
from socket import gaierror
|
||||
import cqi
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
|
||||
|
||||
@socketio.on('cqi.connect', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_connect(cqi_client: cqi.CQiClient):
|
||||
try:
|
||||
cqi_status = cqi_client.connect()
|
||||
except gaierror as e:
|
||||
return {
|
||||
'code': 500,
|
||||
'msg': 'Internal Server Error',
|
||||
'payload': {'code': e.args[0], 'desc': e.args[1]}
|
||||
}
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.disconnect', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_disconnect(cqi_client: cqi.CQiClient):
|
||||
cqi_status = cqi_client.disconnect()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.ping', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_ping(cqi_client: cqi.CQiClient):
|
||||
cqi_status = cqi_client.ping()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@@ -1,22 +0,0 @@
|
||||
import cqi
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_get(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = {**cqi_corpus.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_list(cqi_client: cqi.CQiClient):
|
||||
payload = [{**x.attrs} for x in cqi_client.corpora.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@@ -1,85 +0,0 @@
|
||||
from flask import session
|
||||
import cqi
|
||||
import math
|
||||
from app import db, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio, lookups_by_cpos
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.drop', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_drop(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_status = cqi_corpus.drop()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.query', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_query(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, query: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_status = cqi_corpus.query(subcorpus_name, query)
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
###############################################################################
|
||||
# nopaque specific CQi extensions #
|
||||
###############################################################################
|
||||
@socketio.on('cqi.corpora.corpus.update_db', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_update_db(cqi_client: cqi.CQiClient, corpus_name: str):
|
||||
corpus = Corpus.query.get(session['d']['corpus_id'])
|
||||
corpus.num_tokens = cqi_client.corpora.get('CORPUS').attrs['size']
|
||||
db.session.commit()
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.paginate', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, page: int = 1, per_page: int = 20): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_corpus.attrs['size'] > 0
|
||||
and page > math.ceil(cqi_corpus.attrs['size'] / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
first_cpos = (page - 1) * per_page
|
||||
last_cpos = min(cqi_corpus.attrs['size'], first_cpos + per_page)
|
||||
cpos_list = [*range(first_cpos, last_cpos)]
|
||||
lookups = lookups_by_cpos(cqi_corpus, cpos_list)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = [cpos_list]
|
||||
# the lookups for the items
|
||||
payload['lookups'] = lookups
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_corpus.attrs['size']
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@@ -1,24 +0,0 @@
|
||||
import cqi
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_alignment_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, alignment_attribute_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_alignment_attribute = cqi_corpus.alignment_attributes.get(alignment_attribute_name) # noqa
|
||||
payload = {**cqi_alignment_attribute.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.alignment_attributes.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_alignment_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.alignment_attributes.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@@ -1,24 +0,0 @@
|
||||
import cqi
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.positional_attributes.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_positional_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, positional_attribute_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_positional_attribute = cqi_corpus.positional_attributes.get(positional_attribute_name) # noqa
|
||||
payload = {**cqi_positional_attribute.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.positional_attributes.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_positional_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.positional_attributes.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@@ -1,24 +0,0 @@
|
||||
import cqi
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.structural_attributes.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_structural_attributes_get(cqi_client: cqi.CQiClient, corpus_name: str, structural_attribute_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_structural_attribute = cqi_corpus.structural_attributes.get(structural_attribute_name) # noqa
|
||||
payload = {**cqi_structural_attribute.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.structural_attributes.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_structural_attributes_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.structural_attributes.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
@@ -1,125 +0,0 @@
|
||||
import cqi
|
||||
import math
|
||||
from app import socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from . import NAMESPACE as ns
|
||||
from .utils import cqi_over_socketio, export_subcorpus, partial_export_subcorpus
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.get', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_get(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
payload = {**cqi_subcorpus.attrs}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.list', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_list(cqi_client: cqi.CQiClient, corpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
payload = [{**x.attrs} for x in cqi_corpus.subcorpora.list()]
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.drop', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_drop(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_status = cqi_subcorpus.drop()
|
||||
payload = {'code': cqi_status,
|
||||
'msg': cqi.api.specification.lookup[cqi_status]}
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.dump', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_dump(cqi_client: cqi.CQiClient):
|
||||
return {'code': 501, 'msg': 'Not Implemented'}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_1', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_1(cqi_client: cqi.CQiClient):
|
||||
return {'code': 501, 'msg': 'Not Implemented'}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.fdist_2', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_fdist_2(cqi_client: cqi.CQiClient):
|
||||
return {'code': 501, 'msg': 'Not Implemented'}
|
||||
|
||||
|
||||
###############################################################################
|
||||
# nopaque specific CQi extensions #
|
||||
###############################################################################
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.paginate', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_paginate(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50, page: int = 1, per_page: int = 20): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
# Sanity checks
|
||||
if (
|
||||
per_page < 1
|
||||
or page < 1
|
||||
or (
|
||||
cqi_subcorpus.attrs['size'] > 0
|
||||
and page > math.ceil(cqi_subcorpus.attrs['size'] / per_page)
|
||||
)
|
||||
):
|
||||
return {'code': 416, 'msg': 'Range Not Satisfiable'}
|
||||
offset = (page - 1) * per_page
|
||||
cutoff = per_page
|
||||
cqi_results_export = export_subcorpus(
|
||||
cqi_subcorpus, context=context, cutoff=cutoff, offset=offset)
|
||||
payload = {}
|
||||
# the items for the current page
|
||||
payload['items'] = cqi_results_export.pop('matches')
|
||||
# the lookups for the items
|
||||
payload['lookups'] = cqi_results_export
|
||||
# the total number of items matching the query
|
||||
payload['total'] = cqi_subcorpus.attrs['size']
|
||||
# the number of items to be displayed on a page.
|
||||
payload['per_page'] = per_page
|
||||
# The total number of pages
|
||||
payload['pages'] = math.ceil(payload['total'] / payload['per_page'])
|
||||
# the current page number (1 indexed)
|
||||
payload['page'] = page if payload['pages'] > 0 else None
|
||||
# True if a previous page exists
|
||||
payload['has_prev'] = payload['page'] > 1 if payload['page'] else False
|
||||
# True if a next page exists.
|
||||
payload['has_next'] = payload['page'] < payload['pages'] if payload['page'] else False # noqa
|
||||
# Number of the previous page.
|
||||
payload['prev_num'] = payload['page'] - 1 if payload['has_prev'] else None
|
||||
# Number of the next page
|
||||
payload['next_num'] = payload['page'] + 1 if payload['has_next'] else None
|
||||
return {'code': 200, 'msg': 'OK', 'payload': payload}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.partial_export', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_partial_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, match_id_list: list, context: int = 50): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_partial_export = partial_export_subcorpus(cqi_subcorpus, match_id_list, context=context)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_partial_export}
|
||||
|
||||
|
||||
@socketio.on('cqi.corpora.corpus.subcorpora.subcorpus.export', namespace=ns)
|
||||
@socketio_login_required
|
||||
@cqi_over_socketio
|
||||
def cqi_corpora_corpus_subcorpora_subcorpus_export(cqi_client: cqi.CQiClient, corpus_name: str, subcorpus_name: str, context: int = 50): # noqa
|
||||
cqi_corpus = cqi_client.corpora.get(corpus_name)
|
||||
cqi_subcorpus = cqi_corpus.subcorpora.get(subcorpus_name)
|
||||
cqi_subcorpus_export = export_subcorpus(cqi_subcorpus, context=context)
|
||||
return {'code': 200, 'msg': 'OK', 'payload': cqi_subcorpus_export}
|
@@ -1,178 +0,0 @@
|
||||
from flask import session
|
||||
from functools import wraps
|
||||
from inspect import signature
|
||||
import cqi
|
||||
|
||||
|
||||
def cqi_over_socketio(f):
|
||||
@wraps(f)
|
||||
def wrapped(*args):
|
||||
if 'd' not in session:
|
||||
return {'code': 424, 'msg': 'Failed Dependency'}
|
||||
f_args = {}
|
||||
# Check for missing args and if all provided args are of the right type
|
||||
for param in signature(f).parameters.values():
|
||||
if param.name == 'corpus_name':
|
||||
f_args[param.name] = f'NOPAQUE_{session["d"]["corpus_id"]}'
|
||||
continue
|
||||
if param.name == 'cqi_client':
|
||||
f_args[param.name] = session['d']['cqi_client']
|
||||
continue
|
||||
if param.default is param.empty:
|
||||
# args
|
||||
if param.name not in args[0]:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
arg = args[0][param.name]
|
||||
if type(arg) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
f_args[param.name] = arg
|
||||
else:
|
||||
# kwargs
|
||||
if param.name not in args[0]:
|
||||
continue
|
||||
arg = args[0][param.name]
|
||||
if type(arg) is not param.annotation:
|
||||
return {'code': 400, 'msg': 'Bad Request'}
|
||||
f_args[param.name] = arg
|
||||
session['d']['cqi_client_lock'].acquire()
|
||||
try:
|
||||
return_value = f(**f_args)
|
||||
except BrokenPipeError:
|
||||
return_value = {
|
||||
'code': 500,
|
||||
'msg': 'Internal Server Error'
|
||||
}
|
||||
except cqi.errors.CQiException as e:
|
||||
return_value = {
|
||||
'code': 500,
|
||||
'msg': 'Internal Server Error',
|
||||
'payload': {
|
||||
'code': e.code,
|
||||
'desc': e.description,
|
||||
'msg': e.name
|
||||
}
|
||||
}
|
||||
finally:
|
||||
session['d']['cqi_client_lock'].release()
|
||||
return return_value
|
||||
return wrapped
|
||||
|
||||
|
||||
def lookups_by_cpos(corpus, cpos_list):
|
||||
lookups = {}
|
||||
lookups['cpos_lookup'] = {cpos: {} for cpos in cpos_list}
|
||||
for attr in corpus.positional_attributes.list():
|
||||
cpos_attr_values = attr.values_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
lookups['cpos_lookup'][cpos][attr.attrs['name']] = \
|
||||
cpos_attr_values[i]
|
||||
for attr in corpus.structural_attributes.list():
|
||||
# We only want to iterate over non subattributes, identifiable by
|
||||
# attr.attrs['has_values'] == False
|
||||
if attr.attrs['has_values']:
|
||||
continue
|
||||
cpos_attr_ids = attr.ids_by_cpos(cpos_list)
|
||||
for i, cpos in enumerate(cpos_list):
|
||||
if cpos_attr_ids[i] == -1:
|
||||
continue
|
||||
lookups['cpos_lookup'][cpos][attr.attrs['name']] = cpos_attr_ids[i]
|
||||
occured_attr_ids = [x for x in set(cpos_attr_ids) if x != -1]
|
||||
if not occured_attr_ids:
|
||||
continue
|
||||
subattrs = corpus.structural_attributes.list(filters={'part_of': attr})
|
||||
if not subattrs:
|
||||
continue
|
||||
lookup_name = f'{attr.attrs["name"]}_lookup'
|
||||
lookups[lookup_name] = {}
|
||||
for attr_id in occured_attr_ids:
|
||||
lookups[lookup_name][attr_id] = {}
|
||||
for subattr in subattrs:
|
||||
subattr_name = subattr.attrs['name'][(len(attr.attrs['name']) + 1):] # noqa
|
||||
for i, subattr_value in enumerate(subattr.values_by_ids(occured_attr_ids)): # noqa
|
||||
lookups[lookup_name][occured_attr_ids[i]][subattr_name] = subattr_value # noqa
|
||||
return lookups
|
||||
|
||||
|
||||
def partial_export_subcorpus(subcorpus, match_id_list, context=25):
|
||||
if subcorpus.attrs['size'] == 0:
|
||||
return {"matches": []}
|
||||
match_boundaries = []
|
||||
for match_id in match_id_list:
|
||||
if match_id < 0 or match_id >= subcorpus.attrs['size']:
|
||||
continue
|
||||
match_boundaries.append(
|
||||
(
|
||||
match_id,
|
||||
subcorpus.dump(subcorpus.attrs['fields']['match'], match_id, match_id)[0],
|
||||
subcorpus.dump(subcorpus.attrs['fields']['matchend'], match_id, match_id)[0]
|
||||
)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_boundary in match_boundaries:
|
||||
match_num, match_start, match_end = match_boundary
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.attrs['size'] - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
||||
|
||||
|
||||
def export_subcorpus(subcorpus, context=25, cutoff=float('inf'), offset=0):
|
||||
if subcorpus.attrs['size'] == 0:
|
||||
return {"matches": []}
|
||||
first_match = max(0, offset)
|
||||
last_match = min((offset + cutoff - 1), (subcorpus.attrs['size'] - 1))
|
||||
match_boundaries = zip(
|
||||
list(range(first_match, last_match + 1)),
|
||||
subcorpus.dump(subcorpus.attrs['fields']['match'], first_match, last_match),
|
||||
subcorpus.dump(subcorpus.attrs['fields']['matchend'], first_match, last_match)
|
||||
)
|
||||
cpos_set = set()
|
||||
matches = []
|
||||
for match_num, match_start, match_end in match_boundaries:
|
||||
c = (match_start, match_end)
|
||||
if match_start == 0 or context == 0:
|
||||
lc = None
|
||||
cpos_list_lbound = match_start
|
||||
else:
|
||||
lc_lbound = max(0, (match_start - context))
|
||||
lc_rbound = match_start - 1
|
||||
lc = (lc_lbound, lc_rbound)
|
||||
cpos_list_lbound = lc_lbound
|
||||
if match_end == (subcorpus.collection.corpus.attrs['size'] - 1) or context == 0:
|
||||
rc = None
|
||||
cpos_list_rbound = match_end
|
||||
else:
|
||||
rc_lbound = match_end + 1
|
||||
rc_rbound = min(
|
||||
(match_end + context),
|
||||
(subcorpus.collection.corpus.attrs['size'] - 1)
|
||||
)
|
||||
rc = (rc_lbound, rc_rbound)
|
||||
cpos_list_rbound = rc_rbound
|
||||
match = {'num': match_num, 'lc': lc, 'c': c, 'rc': rc}
|
||||
matches.append(match)
|
||||
cpos_set.update(range(cpos_list_lbound, cpos_list_rbound + 1))
|
||||
lookups = lookups_by_cpos(subcorpus.collection.corpus, list(cpos_set))
|
||||
return {'matches': matches, **lookups}
|
33
app/corpora/decorators.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from flask import abort
|
||||
from flask_login import current_user
|
||||
from functools import wraps
|
||||
from app.models import Corpus, CorpusFollowerAssociation
|
||||
|
||||
|
||||
def corpus_follower_permission_required(*permissions):
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
corpus_id = kwargs.get('corpus_id')
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
|
||||
if cfa is None:
|
||||
abort(403)
|
||||
if not all([cfa.role.has_permission(p) for p in permissions]):
|
||||
abort(403)
|
||||
return f(*args, **kwargs)
|
||||
return decorated_function
|
||||
return decorator
|
||||
|
||||
|
||||
def corpus_owner_or_admin_required(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
corpus_id = kwargs.get('corpus_id')
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return f(*args, **kwargs)
|
||||
return decorated_function
|
||||
|
45
app/corpora/events.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from flask_login import current_user
|
||||
from flask_socketio import join_room
|
||||
from app import hashids, socketio
|
||||
from app.decorators import socketio_login_required
|
||||
from app.models import Corpus
|
||||
|
||||
|
||||
@socketio.on('GET /corpora/<corpus_id>')
|
||||
@socketio_login_required
|
||||
def get_corpus(corpus_hashid):
|
||||
corpus_id = hashids.decode(corpus_hashid)
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
return {'options': {'status': 404, 'statusText': 'Not found'}}
|
||||
if not (
|
||||
corpus.is_public
|
||||
or corpus.user == current_user
|
||||
or current_user.is_administrator()
|
||||
):
|
||||
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
|
||||
return {
|
||||
'body': corpus.to_json_serializable(),
|
||||
'options': {
|
||||
'status': 200,
|
||||
'statusText': 'OK',
|
||||
'headers': {'Content-Type: application/json'}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@socketio.on('SUBSCRIBE /corpora/<corpus_id>')
|
||||
@socketio_login_required
|
||||
def subscribe_corpus(corpus_hashid):
|
||||
corpus_id = hashids.decode(corpus_hashid)
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
if corpus is None:
|
||||
return {'options': {'status': 404, 'statusText': 'Not found'}}
|
||||
if not (
|
||||
corpus.is_public
|
||||
or corpus.user == current_user
|
||||
or current_user.is_administrator()
|
||||
):
|
||||
return {'options': {'status': 403, 'statusText': 'Forbidden'}}
|
||||
join_room(f'/corpora/{corpus.hashid}')
|
||||
return {'options': {'status': 200, 'statusText': 'OK'}}
|
2
app/corpora/files/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from .. import bp
|
||||
from . import json_routes, routes
|
54
app/corpora/files/forms.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from flask_wtf import FlaskForm
|
||||
from flask_wtf.file import FileField, FileRequired
|
||||
from wtforms import (
|
||||
StringField,
|
||||
SubmitField,
|
||||
ValidationError,
|
||||
IntegerField
|
||||
)
|
||||
from wtforms.validators import InputRequired, Length
|
||||
|
||||
|
||||
class CorpusFileBaseForm(FlaskForm):
|
||||
author = StringField(
|
||||
'Author',
|
||||
validators=[InputRequired(), Length(max=255)]
|
||||
)
|
||||
publishing_year = IntegerField(
|
||||
'Publishing year',
|
||||
validators=[InputRequired()]
|
||||
)
|
||||
title = StringField(
|
||||
'Title',
|
||||
validators=[InputRequired(), Length(max=255)]
|
||||
)
|
||||
address = StringField('Adress', validators=[Length(max=255)])
|
||||
booktitle = StringField('Booktitle', validators=[Length(max=255)])
|
||||
chapter = StringField('Chapter', validators=[Length(max=255)])
|
||||
editor = StringField('Editor', validators=[Length(max=255)])
|
||||
institution = StringField('Institution', validators=[Length(max=255)])
|
||||
journal = StringField('Journal', validators=[Length(max=255)])
|
||||
pages = StringField('Pages', validators=[Length(max=255)])
|
||||
publisher = StringField('Publisher', validators=[Length(max=255)])
|
||||
school = StringField('School', validators=[Length(max=255)])
|
||||
submit = SubmitField()
|
||||
|
||||
|
||||
class CreateCorpusFileForm(CorpusFileBaseForm):
|
||||
vrt = FileField('File', validators=[FileRequired()])
|
||||
|
||||
def validate_vrt(self, field):
|
||||
if not field.data.filename.lower().endswith('.vrt'):
|
||||
raise ValidationError('VRT files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-corpus-file-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class UpdateCorpusFileForm(CorpusFileBaseForm):
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'update-corpus-file-form'
|
||||
super().__init__(*args, **kwargs)
|
30
app/corpora/files/json_routes.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from flask import abort, current_app
|
||||
from threading import Thread
|
||||
from app import db
|
||||
from app.decorators import content_negotiation
|
||||
from app.models import CorpusFile
|
||||
from ..decorators import corpus_follower_permission_required
|
||||
from . import bp
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['DELETE'])
|
||||
@corpus_follower_permission_required('MANAGE_FILES')
|
||||
@content_negotiation(produces='application/json')
|
||||
def delete_corpus_file(corpus_id, corpus_file_id):
|
||||
def _delete_corpus_file(app, corpus_file_id):
|
||||
with app.app_context():
|
||||
corpus_file = CorpusFile.query.get(corpus_file_id)
|
||||
corpus_file.delete()
|
||||
db.session.commit()
|
||||
|
||||
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||
thread = Thread(
|
||||
target=_delete_corpus_file,
|
||||
args=(current_app._get_current_object(), corpus_file.id)
|
||||
)
|
||||
thread.start()
|
||||
response_data = {
|
||||
'message': f'Corpus File "{corpus_file.title}" marked for deletion',
|
||||
'category': 'corpus'
|
||||
}
|
||||
return response_data, 202
|
100
app/corpora/files/routes.py
Normal file
@@ -0,0 +1,100 @@
|
||||
from flask import (
|
||||
abort,
|
||||
flash,
|
||||
redirect,
|
||||
render_template,
|
||||
send_from_directory,
|
||||
url_for
|
||||
)
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
import os
|
||||
from app import db
|
||||
from app.models import Corpus, CorpusFile, CorpusStatus
|
||||
from ..decorators import corpus_follower_permission_required
|
||||
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
|
||||
from . import bp
|
||||
from .forms import CreateCorpusFileForm, UpdateCorpusFileForm
|
||||
from .utils import corpus_file_dynamic_list_constructor as corpus_file_dlc
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/files')
|
||||
@register_breadcrumb(bp, '.entity.files', 'Files', endpoint_arguments_constructor=corpus_eac)
|
||||
def corpus_files(corpus_id):
|
||||
return redirect(url_for('.corpus', _anchor='files', corpus_id=corpus_id))
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.entity.files.create', 'Create', endpoint_arguments_constructor=corpus_eac)
|
||||
@corpus_follower_permission_required('MANAGE_FILES')
|
||||
def create_corpus_file(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
form = CreateCorpusFileForm()
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
corpus_file = CorpusFile.create(
|
||||
form.vrt.data,
|
||||
address=form.address.data,
|
||||
author=form.author.data,
|
||||
booktitle=form.booktitle.data,
|
||||
chapter=form.chapter.data,
|
||||
editor=form.editor.data,
|
||||
institution=form.institution.data,
|
||||
journal=form.journal.data,
|
||||
pages=form.pages.data,
|
||||
publisher=form.publisher.data,
|
||||
publishing_year=form.publishing_year.data,
|
||||
school=form.school.data,
|
||||
title=form.title.data,
|
||||
mimetype='application/vrt+xml',
|
||||
corpus=corpus
|
||||
)
|
||||
except (AttributeError, OSError):
|
||||
abort(500)
|
||||
corpus.status = CorpusStatus.UNPREPARED
|
||||
db.session.commit()
|
||||
flash(f'Corpus File "{corpus_file.filename}" added', category='corpus')
|
||||
return '', 201, {'Location': corpus.url}
|
||||
return render_template(
|
||||
'corpora/files/create.html.j2',
|
||||
title='Add corpus file',
|
||||
form=form,
|
||||
corpus=corpus
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.entity.files.entity', '', dynamic_list_constructor=corpus_file_dlc)
|
||||
@corpus_follower_permission_required('MANAGE_FILES')
|
||||
def corpus_file(corpus_id, corpus_file_id):
|
||||
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||
form = UpdateCorpusFileForm(data=corpus_file.to_json_serializeable())
|
||||
if form.validate_on_submit():
|
||||
form.populate_obj(corpus_file)
|
||||
if db.session.is_modified(corpus_file):
|
||||
corpus_file.corpus.status = CorpusStatus.UNPREPARED
|
||||
db.session.commit()
|
||||
flash(f'Corpus file "{corpus_file.filename}" updated', category='corpus')
|
||||
return redirect(corpus_file.corpus.url)
|
||||
return render_template(
|
||||
'corpora/files/corpus_file.html.j2',
|
||||
title='Edit corpus file',
|
||||
form=form,
|
||||
corpus=corpus_file.corpus,
|
||||
corpus_file=corpus_file
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>/download')
|
||||
@corpus_follower_permission_required('VIEW')
|
||||
def download_corpus_file(corpus_id, corpus_file_id):
|
||||
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||
return send_from_directory(
|
||||
os.path.dirname(corpus_file.path),
|
||||
os.path.basename(corpus_file.path),
|
||||
as_attachment=True,
|
||||
attachment_filename=corpus_file.filename,
|
||||
mimetype=corpus_file.mimetype
|
||||
)
|
15
app/corpora/files/utils.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from flask import request, url_for
|
||||
from app.models import CorpusFile
|
||||
from ..utils import corpus_endpoint_arguments_constructor as corpus_eac
|
||||
|
||||
|
||||
def corpus_file_dynamic_list_constructor():
|
||||
corpus_id = request.view_args['corpus_id']
|
||||
corpus_file_id = request.view_args['corpus_file_id']
|
||||
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
|
||||
return [
|
||||
{
|
||||
'text': f'{corpus_file.author}: {corpus_file.title} ({corpus_file.publishing_year})',
|
||||
'url': url_for('.corpus_file', corpus_id=corpus_id, corpus_file_id=corpus_file_id)
|
||||
}
|
||||
]
|
2
app/corpora/followers/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from .. import bp
|
||||
from . import json_routes
|
76
app/corpora/followers/json_routes.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from flask import abort, flash, jsonify, make_response, request
|
||||
from flask_login import current_user
|
||||
from app import db
|
||||
from app.decorators import content_negotiation
|
||||
from app.models import (
|
||||
Corpus,
|
||||
CorpusFollowerAssociation,
|
||||
CorpusFollowerRole,
|
||||
User
|
||||
)
|
||||
from ..decorators import corpus_follower_permission_required
|
||||
from . import bp
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/followers', methods=['POST'])
|
||||
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def create_corpus_followers(corpus_id):
|
||||
usernames = request.json
|
||||
if not (isinstance(usernames, list) or all(isinstance(u, str) for u in usernames)):
|
||||
abort(400)
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
for username in usernames:
|
||||
user = User.query.filter_by(username=username, is_public=True).first_or_404()
|
||||
user.follow_corpus(corpus)
|
||||
db.session.commit()
|
||||
response_data = {
|
||||
'message': f'Users are now following "{corpus.title}"',
|
||||
'category': 'corpus'
|
||||
}
|
||||
return response_data, 200
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>/role', methods=['PUT'])
|
||||
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def update_corpus_follower_role(corpus_id, follower_id):
|
||||
role_name = request.json
|
||||
if not isinstance(role_name, str):
|
||||
abort(400)
|
||||
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||
if cfr is None:
|
||||
abort(400)
|
||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
|
||||
cfa.role = cfr
|
||||
db.session.commit()
|
||||
response_data = {
|
||||
'message': f'User "{cfa.follower.username}" is now {cfa.role.name}',
|
||||
'category': 'corpus'
|
||||
}
|
||||
return response_data, 200
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/followers/<hashid:follower_id>', methods=['DELETE'])
|
||||
def delete_corpus_follower(corpus_id, follower_id):
|
||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=follower_id).first_or_404()
|
||||
if not (
|
||||
current_user.id == follower_id
|
||||
or current_user == cfa.corpus.user
|
||||
or CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first().role.has_permission('MANAGE_FOLLOWERS')
|
||||
or current_user.is_administrator()):
|
||||
abort(403)
|
||||
if current_user.id == follower_id:
|
||||
flash(f'You are no longer following "{cfa.corpus.title}"', 'corpus')
|
||||
response = make_response()
|
||||
response.status_code = 204
|
||||
else:
|
||||
response_data = {
|
||||
'message': f'"{cfa.follower.username}" is not following "{cfa.corpus.title}" anymore',
|
||||
'category': 'corpus'
|
||||
}
|
||||
response = jsonify(response_data)
|
||||
response.status_code = 200
|
||||
cfa.follower.unfollow_corpus(cfa.corpus)
|
||||
db.session.commit()
|
||||
return response
|
@@ -1,13 +1,5 @@
|
||||
from flask_wtf import FlaskForm
|
||||
from flask_wtf.file import FileField, FileRequired
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
StringField,
|
||||
SubmitField,
|
||||
TextAreaField,
|
||||
ValidationError,
|
||||
IntegerField
|
||||
)
|
||||
from wtforms import StringField, SubmitField, TextAreaField
|
||||
from wtforms.validators import InputRequired, Length
|
||||
|
||||
|
||||
@@ -34,50 +26,8 @@ class UpdateCorpusForm(CorpusBaseForm):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class CorpusFileBaseForm(FlaskForm):
|
||||
author = StringField(
|
||||
'Author',
|
||||
validators=[InputRequired(), Length(max=255)]
|
||||
)
|
||||
publishing_year = IntegerField(
|
||||
'Publishing year',
|
||||
validators=[InputRequired()]
|
||||
)
|
||||
title = StringField(
|
||||
'Title',
|
||||
validators=[InputRequired(), Length(max=255)]
|
||||
)
|
||||
address = StringField('Adress', validators=[Length(max=255)])
|
||||
booktitle = StringField('Booktitle', validators=[Length(max=255)])
|
||||
chapter = StringField('Chapter', validators=[Length(max=255)])
|
||||
editor = StringField('Editor', validators=[Length(max=255)])
|
||||
institution = StringField('Institution', validators=[Length(max=255)])
|
||||
journal = StringField('Journal', validators=[Length(max=255)])
|
||||
pages = StringField('Pages', validators=[Length(max=255)])
|
||||
publisher = StringField('Publisher', validators=[Length(max=255)])
|
||||
school = StringField('School', validators=[Length(max=255)])
|
||||
submit = SubmitField()
|
||||
|
||||
|
||||
class CreateCorpusFileForm(CorpusFileBaseForm):
|
||||
vrt = FileField('File', validators=[FileRequired()])
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-corpus-file-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def validate_vrt(self, field):
|
||||
if not field.data.filename.lower().endswith('.vrt'):
|
||||
raise ValidationError('VRT files only!')
|
||||
|
||||
|
||||
class UpdateCorpusFileForm(CorpusFileBaseForm):
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'update-corpus-file-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class ImportCorpusForm(FlaskForm):
|
||||
pass
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'import-corpus-form'
|
||||
super().__init__(*args, **kwargs)
|
||||
|
125
app/corpora/json_routes.py
Normal file
@@ -0,0 +1,125 @@
|
||||
from datetime import datetime
|
||||
from flask import abort, current_app, request, url_for
|
||||
from flask_login import current_user
|
||||
from threading import Thread
|
||||
from app import db
|
||||
from app.decorators import content_negotiation
|
||||
from app.models import Corpus, CorpusFollowerRole
|
||||
from . import bp
|
||||
from .decorators import corpus_follower_permission_required, corpus_owner_or_admin_required
|
||||
import nltk
|
||||
from string import punctuation
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
|
||||
@corpus_owner_or_admin_required
|
||||
@content_negotiation(produces='application/json')
|
||||
def delete_corpus(corpus_id):
|
||||
def _delete_corpus(app, corpus_id):
|
||||
with app.app_context():
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
corpus.delete()
|
||||
db.session.commit()
|
||||
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
thread = Thread(
|
||||
target=_delete_corpus,
|
||||
args=(current_app._get_current_object(), corpus.id)
|
||||
)
|
||||
thread.start()
|
||||
response_data = {
|
||||
'message': f'Corpus "{corpus.title}" marked for deletion',
|
||||
'category': 'corpus'
|
||||
}
|
||||
return response_data, 200
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/build', methods=['POST'])
|
||||
@corpus_follower_permission_required('MANAGE_FILES')
|
||||
@content_negotiation(produces='application/json')
|
||||
def build_corpus(corpus_id):
|
||||
def _build_corpus(app, corpus_id):
|
||||
with app.app_context():
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
corpus.build()
|
||||
db.session.commit()
|
||||
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if len(corpus.files.all()) == 0:
|
||||
abort(409)
|
||||
thread = Thread(
|
||||
target=_build_corpus,
|
||||
args=(current_app._get_current_object(), corpus_id)
|
||||
)
|
||||
thread.start()
|
||||
response_data = {
|
||||
'message': f'Corpus "{corpus.title}" marked for building',
|
||||
'category': 'corpus'
|
||||
}
|
||||
return response_data, 202
|
||||
|
||||
@bp.route('/stopwords')
|
||||
@content_negotiation(produces='application/json')
|
||||
def get_stopwords():
|
||||
nltk.download('stopwords', quiet=True)
|
||||
languages = ["german", "english", "catalan", "greek", "spanish", "french", "italian", "russian", "chinese"]
|
||||
stopwords = {}
|
||||
for language in languages:
|
||||
stopwords[language] = nltk.corpus.stopwords.words(language)
|
||||
stopwords['punctuation'] = list(punctuation) + ['—', '|', '–', '“', '„', '--']
|
||||
stopwords['user_stopwords'] = []
|
||||
response_data = stopwords
|
||||
return response_data, 202
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/generate-share-link', methods=['POST'])
|
||||
@corpus_follower_permission_required('MANAGE_FOLLOWERS')
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def generate_corpus_share_link(corpus_id):
|
||||
data = request.json
|
||||
if not isinstance(data, dict):
|
||||
abort(400)
|
||||
expiration = data.get('expiration')
|
||||
if not isinstance(expiration, str):
|
||||
abort(400)
|
||||
role_name = data.get('role')
|
||||
if not isinstance(role_name, str):
|
||||
abort(400)
|
||||
expiration_date = datetime.strptime(expiration, '%b %d, %Y')
|
||||
cfr = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||
if cfr is None:
|
||||
abort(400)
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
token = current_user.generate_follow_corpus_token(corpus.hashid, role_name, expiration_date)
|
||||
corpus_share_link = url_for(
|
||||
'corpora.follow_corpus',
|
||||
corpus_id=corpus_id,
|
||||
token=token,
|
||||
_external=True
|
||||
)
|
||||
response_data = {
|
||||
'message': 'Corpus share link generated',
|
||||
'category': 'corpus',
|
||||
'corpusShareLink': corpus_share_link
|
||||
}
|
||||
return response_data, 200
|
||||
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/is_public', methods=['PUT'])
|
||||
@corpus_owner_or_admin_required
|
||||
@content_negotiation(consumes='application/json', produces='application/json')
|
||||
def update_corpus_is_public(corpus_id):
|
||||
is_public = request.json
|
||||
if not isinstance(is_public, bool):
|
||||
abort(400)
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
corpus.is_public = is_public
|
||||
db.session.commit()
|
||||
response_data = {
|
||||
'message': (
|
||||
f'Corpus "{corpus.title}" is now'
|
||||
f' {"public" if is_public else "private"}'
|
||||
),
|
||||
'category': 'corpus'
|
||||
}
|
||||
return response_data, 200
|
@@ -1,139 +1,30 @@
|
||||
from datetime import datetime
|
||||
from flask import (
|
||||
abort,
|
||||
current_app,
|
||||
flash,
|
||||
Markup,
|
||||
redirect,
|
||||
render_template,
|
||||
request,
|
||||
send_from_directory,
|
||||
url_for
|
||||
from flask import abort, flash, redirect, render_template, url_for
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from flask_login import current_user
|
||||
from app import db
|
||||
from app.models import (
|
||||
Corpus,
|
||||
CorpusFollowerAssociation,
|
||||
CorpusFollowerRole,
|
||||
User
|
||||
)
|
||||
from flask_login import current_user, login_required
|
||||
from threading import Thread
|
||||
import jwt
|
||||
import os
|
||||
from app import db, hashids
|
||||
from app.models import Corpus, CorpusFile, CorpusStatus, User
|
||||
from . import bp
|
||||
from .forms import (
|
||||
CreateCorpusFileForm,
|
||||
CreateCorpusForm,
|
||||
UpdateCorpusFileForm
|
||||
from .decorators import corpus_follower_permission_required
|
||||
from .forms import CreateCorpusForm
|
||||
from .utils import (
|
||||
corpus_endpoint_arguments_constructor as corpus_eac,
|
||||
corpus_dynamic_list_constructor as corpus_dlc
|
||||
)
|
||||
|
||||
|
||||
# @bp.route('/share/<token>', methods=['GET', 'POST'])
|
||||
# def share_corpus(token):
|
||||
# try:
|
||||
# payload = jwt.decode(
|
||||
# token,
|
||||
# current_app.config['SECRET_KEY'],
|
||||
# algorithms=['HS256'],
|
||||
# issuer=current_app.config['SERVER_NAME'],
|
||||
# options={'require': ['iat', 'iss', 'sub']}
|
||||
# )
|
||||
# except jwt.PyJWTError:
|
||||
# return False
|
||||
# corpus_hashid = payload.get('sub')
|
||||
# corpus_id = hashids.decode(corpus_hashid)
|
||||
# return redirect(url_for('.corpus', corpus_id=corpus_id))
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/enable_is_public', methods=['POST'])
|
||||
@login_required
|
||||
def enable_corpus_is_public(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
corpus.is_public = True
|
||||
db.session.commit()
|
||||
return '', 204
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/disable_is_public', methods=['POST'])
|
||||
@login_required
|
||||
def disable_corpus_is_public(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
corpus.is_public = False
|
||||
db.session.commit()
|
||||
return '', 204
|
||||
|
||||
|
||||
# @bp.route('/<hashid:corpus_id>/follow', methods=['GET', 'POST'])
|
||||
# @login_required
|
||||
# def follow_corpus(corpus_id):
|
||||
# corpus = Corpus.query.get_or_404(corpus_id)
|
||||
# user_hashid = request.args.get('user_id')
|
||||
# if user_hashid is None:
|
||||
# user = current_user
|
||||
# else:
|
||||
# if not current_user.is_administrator():
|
||||
# abort(403)
|
||||
# else:
|
||||
# user_id = hashids.decode(user_hashid)
|
||||
# user = User.query.get_or_404(user_id)
|
||||
# if not user.is_following_corpus(corpus):
|
||||
# user.follow_corpus(corpus)
|
||||
# db.session.commit()
|
||||
# flash(f'You are following {corpus.title} now', category='corpus')
|
||||
# return {}, 202
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/unfollow', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def unfollow_corpus(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
user_hashid = request.args.get('user_id')
|
||||
if user_hashid is None:
|
||||
user = current_user
|
||||
elif current_user.is_administrator():
|
||||
user_id = hashids.decode(user_hashid)
|
||||
user = User.query.get_or_404(user_id)
|
||||
else:
|
||||
abort(403)
|
||||
if user.is_following_corpus(corpus):
|
||||
user.unfollow_corpus(corpus)
|
||||
db.session.commit()
|
||||
flash(f'You are not following {corpus.title} anymore', category='corpus')
|
||||
return {}, 202
|
||||
|
||||
|
||||
# @bp.route('/add_permission/<hashid:corpus_id>/<hashid:user_id>/<int:permission>')
|
||||
# def add_permission(corpus_id, user_id, permission):
|
||||
# a = CorpusFollowerAssociation.query.filter_by(followed_corpus_id=corpus_id, following_user_id=user_id).first_or_404()
|
||||
# a.add_permission(permission)
|
||||
# db.session.commit()
|
||||
# return 'ok'
|
||||
|
||||
|
||||
# @bp.route('/remove_permission/<hashid:corpus_id>/<hashid:user_id>/<int:permission>')
|
||||
# def remove_permission(corpus_id, user_id, permission):
|
||||
# a = CorpusFollowerAssociation.query.filter_by(followed_corpus_id=corpus_id, following_user_id=user_id).first_or_404()
|
||||
# a.remove_permission(permission)
|
||||
# db.session.commit()
|
||||
# return 'ok'
|
||||
|
||||
|
||||
@bp.route('/public')
|
||||
@login_required
|
||||
def public_corpora():
|
||||
corpora = [
|
||||
c.to_json_serializeable()
|
||||
for c in Corpus.query.filter(Corpus.is_public == True).all()
|
||||
]
|
||||
return render_template(
|
||||
'corpora/public_corpora.html.j2',
|
||||
corpora=corpora,
|
||||
title='Corpora'
|
||||
)
|
||||
@bp.route('')
|
||||
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">I</i>My Corpora')
|
||||
def corpora():
|
||||
return redirect(url_for('main.dashboard', _anchor='corpora'))
|
||||
|
||||
|
||||
@bp.route('/create', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
@register_breadcrumb(bp, '.create', 'Create')
|
||||
def create_corpus():
|
||||
form = CreateCorpusForm()
|
||||
if form.validate_on_submit():
|
||||
@@ -146,224 +37,84 @@ def create_corpus():
|
||||
except OSError:
|
||||
abort(500)
|
||||
db.session.commit()
|
||||
message = Markup(
|
||||
f'Corpus "<a href="{corpus.url}">{corpus.title}</a>" created'
|
||||
)
|
||||
flash(message, 'corpus')
|
||||
flash(f'Corpus "{corpus.title}" created', 'corpus')
|
||||
return redirect(corpus.url)
|
||||
return render_template(
|
||||
'corpora/create_corpus.html.j2',
|
||||
form=form,
|
||||
title='Create corpus'
|
||||
'corpora/create.html.j2',
|
||||
title='Create corpus',
|
||||
form=form
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
@bp.route('/<hashid:corpus_id>')
|
||||
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=corpus_dlc)
|
||||
def corpus(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
cfrs = CorpusFollowerRole.query.all()
|
||||
# TODO: Better solution for filtering admin
|
||||
users = User.query.filter(User.is_public == True, User.id != current_user.id, User.id != corpus.user.id, User.role_id < 4).all()
|
||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus_id=corpus_id, follower_id=current_user.id).first()
|
||||
if cfa is None:
|
||||
if corpus.user == current_user or current_user.is_administrator():
|
||||
cfr = CorpusFollowerRole.query.filter_by(name='Administrator').first()
|
||||
else:
|
||||
cfr = CorpusFollowerRole.query.filter_by(name='Anonymous').first()
|
||||
else:
|
||||
cfr = cfa.role
|
||||
if corpus.user == current_user or current_user.is_administrator():
|
||||
# now = datetime.utcnow()
|
||||
# payload = {
|
||||
# 'exp': now + timedelta(weeks=1),
|
||||
# 'iat': now,
|
||||
# 'iss': current_app.config['SERVER_NAME'],
|
||||
# 'sub': corpus.hashid
|
||||
# }
|
||||
# token = jwt.encode(
|
||||
# payload,
|
||||
# current_app.config['SECRET_KEY'],
|
||||
# algorithm='HS256'
|
||||
# )
|
||||
return render_template(
|
||||
'corpora/corpus.html.j2',
|
||||
title=corpus.title,
|
||||
corpus=corpus,
|
||||
# token=token,
|
||||
title='Corpus'
|
||||
cfr=cfr,
|
||||
cfrs=cfrs,
|
||||
users=users
|
||||
)
|
||||
if current_user.is_following_corpus(corpus) or corpus.is_public:
|
||||
corpus_files = [x.to_json_serializeable() for x in corpus.files]
|
||||
if (current_user.is_following_corpus(corpus) or corpus.is_public):
|
||||
cfas = CorpusFollowerAssociation.query.filter(Corpus.id == corpus_id, CorpusFollowerAssociation.follower_id != corpus.user.id).all()
|
||||
return render_template(
|
||||
'corpora/public_corpus.html.j2',
|
||||
title=corpus.title,
|
||||
corpus=corpus,
|
||||
corpus_files=corpus_files,
|
||||
title='Corpus'
|
||||
cfrs=cfrs,
|
||||
cfr=cfr,
|
||||
cfas=cfas,
|
||||
users=users
|
||||
)
|
||||
abort(403)
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
|
||||
@login_required
|
||||
def delete_corpus(corpus_id):
|
||||
def _delete_corpus(app, corpus_id):
|
||||
with app.app_context():
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
corpus.delete()
|
||||
db.session.commit()
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/analysis')
|
||||
@corpus_follower_permission_required('VIEW')
|
||||
@register_breadcrumb(bp, '.entity.analysis', 'Analysis', endpoint_arguments_constructor=corpus_eac)
|
||||
def analysis(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = Thread(
|
||||
target=_delete_corpus,
|
||||
args=(current_app._get_current_object(), corpus_id)
|
||||
)
|
||||
thread.start()
|
||||
return {}, 202
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/analyse')
|
||||
@login_required
|
||||
def analyse_corpus(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if not (corpus.user == current_user
|
||||
or current_user.is_administrator()
|
||||
or current_user.is_following_corpus(corpus)):
|
||||
abort(403)
|
||||
return render_template(
|
||||
'corpora/analyse_corpus.html.j2',
|
||||
'corpora/analysis.html.j2',
|
||||
corpus=corpus,
|
||||
title=f'Analyse Corpus {corpus.title}'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/build', methods=['POST'])
|
||||
@login_required
|
||||
def build_corpus(corpus_id):
|
||||
def _build_corpus(app, corpus_id):
|
||||
with app.app_context():
|
||||
corpus = Corpus.query.get(corpus_id)
|
||||
corpus.build()
|
||||
db.session.commit()
|
||||
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
# Check if the corpus has corpus files
|
||||
if not corpus.files.all():
|
||||
response = {'errors': {'message': 'Corpus file(s) required'}}
|
||||
return response, 409
|
||||
thread = Thread(
|
||||
target=_build_corpus,
|
||||
args=(current_app._get_current_object(), corpus_id)
|
||||
)
|
||||
thread.start()
|
||||
return {}, 202
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def create_corpus_file(corpus_id):
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if not (corpus.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
form = CreateCorpusFileForm()
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
response = {'errors': form.errors}
|
||||
return response, 400
|
||||
try:
|
||||
corpus_file = CorpusFile.create(
|
||||
form.vrt.data,
|
||||
address=form.address.data,
|
||||
author=form.author.data,
|
||||
booktitle=form.booktitle.data,
|
||||
chapter=form.chapter.data,
|
||||
editor=form.editor.data,
|
||||
institution=form.institution.data,
|
||||
journal=form.journal.data,
|
||||
pages=form.pages.data,
|
||||
publisher=form.publisher.data,
|
||||
publishing_year=form.publishing_year.data,
|
||||
school=form.school.data,
|
||||
title=form.title.data,
|
||||
mimetype='application/vrt+xml',
|
||||
corpus=corpus
|
||||
)
|
||||
except (AttributeError, OSError):
|
||||
abort(500)
|
||||
corpus.status = CorpusStatus.UNPREPARED
|
||||
db.session.commit()
|
||||
message = Markup(
|
||||
'Corpus file'
|
||||
f'"<a href="{corpus_file.url}">{corpus_file.filename}</a>" added'
|
||||
)
|
||||
flash(message, category='corpus')
|
||||
return {}, 201, {'Location': corpus.url}
|
||||
return render_template(
|
||||
'corpora/create_corpus_file.html.j2',
|
||||
corpus=corpus,
|
||||
form=form,
|
||||
title='Add corpus file'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def corpus_file(corpus_id, corpus_file_id):
|
||||
corpus_file = CorpusFile.query.filter_by(corpus_id = corpus_id, id=corpus_file_id).first_or_404()
|
||||
if not (corpus_file.corpus.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
form = UpdateCorpusFileForm(data=corpus_file.to_json_serializeable())
|
||||
if form.validate_on_submit():
|
||||
form.populate_obj(corpus_file)
|
||||
if db.session.is_modified(corpus_file):
|
||||
corpus_file.corpus.status = CorpusStatus.UNPREPARED
|
||||
db.session.commit()
|
||||
message = Markup(f'Corpus file "<a href="{corpus_file.url}">{corpus_file.filename}</a>" updated')
|
||||
flash(message, category='corpus')
|
||||
return redirect(corpus_file.corpus.url)
|
||||
return render_template(
|
||||
'corpora/corpus_file.html.j2',
|
||||
corpus=corpus_file.corpus,
|
||||
corpus_file=corpus_file,
|
||||
form=form,
|
||||
title='Edit corpus file'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['DELETE'])
|
||||
@login_required
|
||||
def delete_corpus_file(corpus_id, corpus_file_id):
|
||||
def _delete_corpus_file(app, corpus_file_id):
|
||||
with app.app_context():
|
||||
corpus_file = CorpusFile.query.get(corpus_file_id)
|
||||
corpus_file.delete()
|
||||
db.session.commit()
|
||||
|
||||
corpus_file = CorpusFile.query.filter_by(corpus_id = corpus_id, id=corpus_file_id).first_or_404()
|
||||
if not (corpus_file.corpus.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = Thread(
|
||||
target=_delete_corpus_file,
|
||||
args=(current_app._get_current_object(), corpus_file_id)
|
||||
)
|
||||
thread.start()
|
||||
return {}, 202
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>/download')
|
||||
@login_required
|
||||
def download_corpus_file(corpus_id, corpus_file_id):
|
||||
corpus_file = CorpusFile.query.filter_by(corpus_id = corpus_id, id=corpus_file_id).first_or_404()
|
||||
if not (corpus_file.corpus.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return send_from_directory(
|
||||
os.path.dirname(corpus_file.path),
|
||||
os.path.basename(corpus_file.path),
|
||||
as_attachment=True,
|
||||
attachment_filename=corpus_file.filename,
|
||||
mimetype=corpus_file.mimetype
|
||||
)
|
||||
# @bp.route('/<hashid:corpus_id>/follow/<token>')
|
||||
# def follow_corpus(corpus_id, token):
|
||||
# corpus = Corpus.query.get_or_404(corpus_id)
|
||||
# if current_user.follow_corpus_by_token(token):
|
||||
# db.session.commit()
|
||||
# flash(f'You are following "{corpus.title}" now', category='corpus')
|
||||
# return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
|
||||
# abort(403)
|
||||
|
||||
|
||||
@bp.route('/import', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
@register_breadcrumb(bp, '.import', 'Import')
|
||||
def import_corpus():
|
||||
abort(503)
|
||||
|
||||
|
||||
@bp.route('/<hashid:corpus_id>/export')
|
||||
@login_required
|
||||
@corpus_follower_permission_required('VIEW')
|
||||
@register_breadcrumb(bp, '.entity.export', 'Export', endpoint_arguments_constructor=corpus_eac)
|
||||
def export_corpus(corpus_id):
|
||||
abort(503)
|
||||
|
17
app/corpora/utils.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from flask import request, url_for
|
||||
from app.models import Corpus
|
||||
|
||||
|
||||
def corpus_endpoint_arguments_constructor():
|
||||
return {'corpus_id': request.view_args['corpus_id']}
|
||||
|
||||
|
||||
def corpus_dynamic_list_constructor():
|
||||
corpus_id = request.view_args['corpus_id']
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
return [
|
||||
{
|
||||
'text': f'<i class="material-icons left">book</i>{corpus.title}',
|
||||
'url': url_for('.corpus', corpus_id=corpus_id)
|
||||
}
|
||||
]
|
@@ -28,24 +28,24 @@ def _create_build_corpus_service(corpus):
|
||||
''' ## Command ## '''
|
||||
command = ['bash', '-c']
|
||||
command.append(
|
||||
f'mkdir /corpora/data/nopaque_{corpus.id}'
|
||||
f'mkdir /corpora/data/nopaque-{corpus.hashid.lower()}'
|
||||
' && '
|
||||
'cwb-encode'
|
||||
' -c utf8'
|
||||
f' -d /corpora/data/nopaque_{corpus.id}'
|
||||
f' -d /corpora/data/nopaque-{corpus.hashid.lower()}'
|
||||
' -f /root/files/corpus.vrt'
|
||||
f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}'
|
||||
f' -R /usr/local/share/cwb/registry/nopaque-{corpus.hashid.lower()}'
|
||||
' -P pos -P lemma -P simple_pos'
|
||||
' -S ent:0+type -S s:0'
|
||||
' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
|
||||
' -xsB -9'
|
||||
' && '
|
||||
f'cwb-make -V NOPAQUE_{corpus.id}'
|
||||
f'cwb-make -V NOPAQUE-{corpus.hashid.upper()}'
|
||||
)
|
||||
''' ## Constraints ## '''
|
||||
constraints = ['node.role==worker']
|
||||
''' ## Image ## '''
|
||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
|
||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
|
||||
''' ## Labels ## '''
|
||||
labels = {
|
||||
'origin': current_app.config['SERVER_NAME'],
|
||||
@@ -139,21 +139,25 @@ def _create_cqpserver_container(corpus):
|
||||
''' ## Entrypoint ## '''
|
||||
entrypoint = ['bash', '-c']
|
||||
''' ## Image ## '''
|
||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
|
||||
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
|
||||
''' ## Name ## '''
|
||||
name = f'cqpserver_{corpus.id}'
|
||||
''' ## Network ## '''
|
||||
network = f'{current_app.config["DOCKER_NETWORK_NAME"]}'
|
||||
network = f'{current_app.config["NOPAQUE_DOCKER_NETWORK_NAME"]}'
|
||||
''' ## Volumes ## '''
|
||||
volumes = []
|
||||
''' ### Corpus data volume ### '''
|
||||
data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
|
||||
data_volume_target = '/corpora/data'
|
||||
# data_volume_source = os.path.join(corpus.path, 'cwb', 'data', f'nopaque_{corpus.id}')
|
||||
# data_volume_target = f'/corpora/data/nopaque_{corpus.hashid.lower()}'
|
||||
data_volume = f'{data_volume_source}:{data_volume_target}:rw'
|
||||
volumes.append(data_volume)
|
||||
''' ### Corpus registry volume ### '''
|
||||
registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
|
||||
registry_volume_target = '/usr/local/share/cwb/registry'
|
||||
# registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry', f'nopaque_{corpus.id}')
|
||||
# registry_volume_target = f'/usr/local/share/cwb/registry/nopaque_{corpus.hashid.lower()}'
|
||||
registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
|
||||
volumes.append(registry_volume)
|
||||
# Check if a cqpserver container already exists. If this is the case,
|
||||
|
@@ -1,7 +1,9 @@
|
||||
from flask import abort, current_app
|
||||
from flask import abort, current_app, request
|
||||
from flask_login import current_user
|
||||
from functools import wraps
|
||||
from threading import Thread
|
||||
from typing import List, Union
|
||||
from werkzeug.exceptions import NotAcceptable
|
||||
from app.models import Permission
|
||||
|
||||
|
||||
@@ -61,3 +63,37 @@ def background(f):
|
||||
thread.start()
|
||||
return thread
|
||||
return wrapped
|
||||
|
||||
|
||||
def content_negotiation(
|
||||
produces: Union[str, List[str], None] = None,
|
||||
consumes: Union[str, List[str], None] = None
|
||||
):
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
provided = request.mimetype
|
||||
if consumes is None:
|
||||
consumeables = None
|
||||
elif isinstance(consumes, str):
|
||||
consumeables = {consumes}
|
||||
elif isinstance(consumes, list) and all(isinstance(x, str) for x in consumes):
|
||||
consumeables = {*consumes}
|
||||
else:
|
||||
raise TypeError()
|
||||
accepted = {*request.accept_mimetypes.values()}
|
||||
if produces is None:
|
||||
produceables = None
|
||||
elif isinstance(produces, str):
|
||||
produceables = {produces}
|
||||
elif isinstance(produces, list) and all(isinstance(x, str) for x in produces):
|
||||
produceables = {*produces}
|
||||
else:
|
||||
raise TypeError()
|
||||
if produceables is not None and len(produceables & accepted) == 0:
|
||||
raise NotAcceptable()
|
||||
if consumeables is not None and provided not in consumeables:
|
||||
raise NotAcceptable()
|
||||
return f(*args, **kwargs)
|
||||
return decorated_function
|
||||
return decorator
|
||||
|
@@ -1,11 +1,14 @@
|
||||
from flask import render_template, request
|
||||
from flask import jsonify, render_template, request
|
||||
from werkzeug.exceptions import HTTPException
|
||||
from . import bp
|
||||
|
||||
|
||||
@bp.errorhandler(HTTPException)
|
||||
def generic_error_handler(e):
|
||||
if (request.accept_mimetypes.accept_json
|
||||
and not request.accept_mimetypes.accept_html):
|
||||
return {'errors': {'message': e.description}}, e.code
|
||||
return render_template('errors/error.html.j2', error=e), e.code
|
||||
@bp.app_errorhandler(HTTPException)
|
||||
def handle_http_exception(error):
|
||||
''' Generic HTTP exception handler '''
|
||||
accept_json = request.accept_mimetypes.accept_json
|
||||
accept_html = request.accept_mimetypes.accept_html
|
||||
if accept_json and not accept_html:
|
||||
response = jsonify(str(error))
|
||||
return response, error.code
|
||||
return render_template('errors/error.html.j2', error=error), error.code
|
||||
|
@@ -1,5 +1,18 @@
|
||||
from flask import Blueprint
|
||||
from flask_login import login_required
|
||||
|
||||
|
||||
bp = Blueprint('jobs', __name__)
|
||||
from . import routes
|
||||
|
||||
|
||||
@bp.before_request
|
||||
@login_required
|
||||
def before_request():
|
||||
'''
|
||||
Ensures that the routes in this package can only be visited by users that
|
||||
are logged in.
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
from . import routes, json_routes
|
||||
|
73
app/jobs/json_routes.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from flask import abort, current_app
|
||||
from flask_login import current_user
|
||||
from threading import Thread
|
||||
import os
|
||||
from app import db
|
||||
from app.decorators import admin_required, content_negotiation
|
||||
from app.models import Job, JobStatus
|
||||
from . import bp
|
||||
|
||||
|
||||
@bp.route('/<hashid:job_id>', methods=['DELETE'])
|
||||
@content_negotiation(produces='application/json')
|
||||
def delete_job(job_id):
|
||||
def _delete_job(app, job_id):
|
||||
with app.app_context():
|
||||
job = Job.query.get(job_id)
|
||||
job.delete()
|
||||
db.session.commit()
|
||||
|
||||
job = Job.query.get_or_404(job_id)
|
||||
if not (job.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = Thread(
|
||||
target=_delete_job,
|
||||
args=(current_app._get_current_object(), job_id)
|
||||
)
|
||||
thread.start()
|
||||
response_data = {
|
||||
'message': f'Job "{job.title}" marked for deletion'
|
||||
}
|
||||
return response_data, 202
|
||||
|
||||
|
||||
@bp.route('/<hashid:job_id>/log')
|
||||
@admin_required
|
||||
@content_negotiation(produces='application/json')
|
||||
def job_log(job_id):
|
||||
job = Job.query.get_or_404(job_id)
|
||||
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
||||
response = {'errors': {'message': 'Job status is not completed or failed'}}
|
||||
return response, 409
|
||||
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
|
||||
log = log_file.read()
|
||||
response_data = {
|
||||
'jobLog': log
|
||||
}
|
||||
return response_data, 200
|
||||
|
||||
|
||||
@bp.route('/<hashid:job_id>/restart', methods=['POST'])
|
||||
@content_negotiation(produces='application/json')
|
||||
def restart_job(job_id):
|
||||
def _restart_job(app, job_id):
|
||||
with app.app_context():
|
||||
job = Job.query.get(job_id)
|
||||
job.restart()
|
||||
db.session.commit()
|
||||
|
||||
job = Job.query.get_or_404(job_id)
|
||||
if not (job.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
if job.status == JobStatus.FAILED:
|
||||
response = {'errors': {'message': 'Job status is not "failed"'}}
|
||||
return response, 409
|
||||
thread = Thread(
|
||||
target=_restart_job,
|
||||
args=(current_app._get_current_object(), job_id)
|
||||
)
|
||||
thread.start()
|
||||
response_data = {
|
||||
'message': f'Job "{job.title}" marked for restarting'
|
||||
}
|
||||
return response_data, 202
|
@@ -1,93 +1,40 @@
|
||||
from flask import (
|
||||
abort,
|
||||
current_app,
|
||||
redirect,
|
||||
render_template,
|
||||
send_from_directory
|
||||
send_from_directory,
|
||||
url_for
|
||||
)
|
||||
from flask_login import current_user, login_required
|
||||
from threading import Thread
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from flask_login import current_user
|
||||
import os
|
||||
from app import db
|
||||
from app.decorators import admin_required
|
||||
from app.models import Job, JobInput, JobResult, JobStatus
|
||||
from app.models import Job, JobInput, JobResult
|
||||
from . import bp
|
||||
from .utils import job_dynamic_list_constructor as job_dlc
|
||||
|
||||
|
||||
@bp.route('')
|
||||
@register_breadcrumb(bp, '.', '<i class="nopaque-icons left">J</i>My Jobs')
|
||||
def corpora():
|
||||
return redirect(url_for('main.dashboard', _anchor='jobs'))
|
||||
|
||||
|
||||
@bp.route('/<hashid:job_id>')
|
||||
@login_required
|
||||
@register_breadcrumb(bp, '.entity', '', dynamic_list_constructor=job_dlc)
|
||||
def job(job_id):
|
||||
job = Job.query.get_or_404(job_id)
|
||||
if not (job.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return render_template(
|
||||
'jobs/job.html.j2',
|
||||
job=job,
|
||||
title='Job'
|
||||
title='Job',
|
||||
job=job
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<hashid:job_id>', methods=['DELETE'])
|
||||
@login_required
|
||||
def delete_job(job_id):
|
||||
def _delete_job(app, job_id):
|
||||
with app.app_context():
|
||||
job = Job.query.get(job_id)
|
||||
job.delete()
|
||||
db.session.commit()
|
||||
|
||||
job = Job.query.get_or_404(job_id)
|
||||
if not (job.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
thread = Thread(
|
||||
target=_delete_job,
|
||||
args=(current_app._get_current_object(), job_id)
|
||||
)
|
||||
thread.start()
|
||||
return {}, 202
|
||||
|
||||
|
||||
@bp.route('/<hashid:job_id>/log')
|
||||
@login_required
|
||||
@admin_required
|
||||
def job_log(job_id):
|
||||
job = Job.query.get_or_404(job_id)
|
||||
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
|
||||
response = {'errors': {'message': 'Job status is not completed or failed'}}
|
||||
return response, 409
|
||||
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
|
||||
log = log_file.read()
|
||||
return log, 200, {'Content-Type': 'text/plain; charset=utf-8'}
|
||||
|
||||
|
||||
@bp.route('/<hashid:job_id>/restart', methods=['POST'])
|
||||
@login_required
|
||||
def restart_job(job_id):
|
||||
def _restart_job(app, job_id):
|
||||
with app.app_context():
|
||||
job = Job.query.get(job_id)
|
||||
job.restart()
|
||||
db.session.commit()
|
||||
|
||||
job = Job.query.get_or_404(job_id)
|
||||
if not (job.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
if job.status == JobStatus.FAILED:
|
||||
response = {'errors': {'message': 'Job status is not "failed"'}}
|
||||
return response, 409
|
||||
thread = Thread(
|
||||
target=_restart_job,
|
||||
args=(current_app._get_current_object(), job_id)
|
||||
)
|
||||
thread.start()
|
||||
return {}, 202
|
||||
|
||||
|
||||
@bp.route('/<hashid:job_id>/inputs/<hashid:job_input_id>/download')
|
||||
@login_required
|
||||
def download_job_input(job_id, job_input_id):
|
||||
job_input = JobInput.query.get_or_404(job_input_id)
|
||||
if job_input.job.id != job_id:
|
||||
abort(404)
|
||||
job_input = JobInput.query.filter_by(job_id=job_id, id=job_input_id).first_or_404()
|
||||
if not (job_input.job.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return send_from_directory(
|
||||
@@ -100,11 +47,8 @@ def download_job_input(job_id, job_input_id):
|
||||
|
||||
|
||||
@bp.route('/<hashid:job_id>/results/<hashid:job_result_id>/download')
|
||||
@login_required
|
||||
def download_job_result(job_id, job_result_id):
|
||||
job_result = JobResult.query.get_or_404(job_result_id)
|
||||
if job_result.job.id != job_id:
|
||||
abort(404)
|
||||
job_result = JobResult.query.filter_by(job_id=job_id, id=job_result_id).first_or_404()
|
||||
if not (job_result.job.user == current_user or current_user.is_administrator()):
|
||||
abort(403)
|
||||
return send_from_directory(
|
||||
|
13
app/jobs/utils.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from flask import request, url_for
|
||||
from app.models import Job
|
||||
|
||||
|
||||
def job_dynamic_list_constructor():
|
||||
job_id = request.view_args['job_id']
|
||||
job = Job.query.get_or_404(job_id)
|
||||
return [
|
||||
{
|
||||
'text': f'<i class="nopaque-icons left service-icons" data-service="{job.service}"></i>{job.title}',
|
||||
'url': url_for('.job', job_id=job_id)
|
||||
}
|
||||
]
|
@@ -1,5 +1,5 @@
|
||||
from flask import Blueprint
|
||||
|
||||
|
||||
bp = Blueprint('main', __name__)
|
||||
from . import routes
|
||||
bp = Blueprint('main', __name__, cli_group=None)
|
||||
from . import cli, routes
|
||||
|
47
app/main/cli.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from flask import current_app
|
||||
from flask_migrate import upgrade
|
||||
import os
|
||||
from app.models import (
|
||||
CorpusFollowerRole,
|
||||
Role,
|
||||
SpaCyNLPPipelineModel,
|
||||
TesseractOCRPipelineModel,
|
||||
User
|
||||
)
|
||||
from . import bp
|
||||
|
||||
|
||||
@bp.cli.command('deploy')
|
||||
def deploy():
|
||||
''' Run deployment tasks. '''
|
||||
# Make default directories
|
||||
print('Make default directories')
|
||||
base_dir = current_app.config['NOPAQUE_DATA_DIR']
|
||||
default_dirs = [
|
||||
os.path.join(base_dir, 'tmp'),
|
||||
os.path.join(base_dir, 'users')
|
||||
]
|
||||
for dir in default_dirs:
|
||||
if os.path.exists(dir):
|
||||
if not os.path.isdir(dir):
|
||||
raise NotADirectoryError(f'{dir} is not a directory')
|
||||
else:
|
||||
os.mkdir(dir)
|
||||
|
||||
# migrate database to latest revision
|
||||
print('Migrate database to latest revision')
|
||||
upgrade()
|
||||
|
||||
# Insert/Update default database values
|
||||
print('Insert/Update default Roles')
|
||||
Role.insert_defaults()
|
||||
print('Insert/Update default Users')
|
||||
User.insert_defaults()
|
||||
print('Insert/Update default CorpusFollowerRoles')
|
||||
CorpusFollowerRole.insert_defaults()
|
||||
print('Insert/Update default SpaCyNLPPipelineModels')
|
||||
SpaCyNLPPipelineModel.insert_defaults()
|
||||
print('Insert/Update default TesseractOCRPipelineModels')
|
||||
TesseractOCRPipelineModel.insert_defaults()
|
||||
|
||||
# TODO: Implement checks for if the nopaque network exists
|
@@ -1,13 +1,16 @@
|
||||
from flask import flash, redirect, render_template, url_for
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from flask_login import current_user, login_required, login_user
|
||||
from app.auth.forms import LoginForm
|
||||
from app.models import Corpus, User
|
||||
from sqlalchemy import or_
|
||||
from . import bp
|
||||
|
||||
|
||||
@bp.route('', methods=['GET', 'POST'])
|
||||
@bp.route('/', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.', '<i class="material-icons">home</i>')
|
||||
def index():
|
||||
form = LoginForm(prefix='login-form')
|
||||
form = LoginForm()
|
||||
if form.validate_on_submit():
|
||||
user = User.query.filter((User.email == form.user.data.lower()) | (User.username == form.user.data)).first()
|
||||
if user and user.verify_password(form.password.data):
|
||||
@@ -16,54 +19,70 @@ def index():
|
||||
return redirect(url_for('.dashboard'))
|
||||
flash('Invalid email/username or password', category='error')
|
||||
redirect(url_for('.index'))
|
||||
return render_template('main/index.html.j2', form=form, title='nopaque')
|
||||
|
||||
|
||||
@bp.route('/faq')
|
||||
def faq():
|
||||
return render_template('main/faq.html.j2', title='Frequently Asked Questions')
|
||||
|
||||
|
||||
@bp.route('/dashboard')
|
||||
@login_required
|
||||
def dashboard():
|
||||
# users = [
|
||||
# u.to_json_serializeable(filter_by_privacy_settings=True) for u
|
||||
# in User.query.filter(User.is_public == True, User.id != current_user.id).all()
|
||||
# ]
|
||||
# corpora = [
|
||||
# c.to_json_serializeable() for c
|
||||
# in Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
|
||||
# ]
|
||||
return render_template(
|
||||
'main/dashboard.html.j2',
|
||||
title='Dashboard',
|
||||
# users=users,
|
||||
# corpora=corpora
|
||||
'main/index.html.j2',
|
||||
title='nopaque',
|
||||
form=form
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/dashboard2')
|
||||
@bp.route('/faq')
|
||||
@register_breadcrumb(bp, '.faq', 'Frequently Asked Questions')
|
||||
def faq():
|
||||
return render_template(
|
||||
'main/faq.html.j2',
|
||||
title='Frequently Asked Questions'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/dashboard')
|
||||
@register_breadcrumb(bp, '.dashboard', '<i class="material-icons left">dashboard</i>Dashboard')
|
||||
@login_required
|
||||
def dashboard2():
|
||||
return render_template('main/dashboard2.html.j2', title='Dashboard')
|
||||
|
||||
|
||||
@bp.route('/user_manual')
|
||||
def user_manual():
|
||||
return render_template('main/user_manual.html.j2', title='User manual')
|
||||
def dashboard():
|
||||
return render_template(
|
||||
'main/dashboard.html.j2',
|
||||
title='Dashboard'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/news')
|
||||
@register_breadcrumb(bp, '.news', '<i class="material-icons left">email</i>News')
|
||||
def news():
|
||||
return render_template('main/news.html.j2', title='News')
|
||||
return render_template(
|
||||
'main/news.html.j2',
|
||||
title='News'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/privacy_policy')
|
||||
@register_breadcrumb(bp, '.privacy_policy', 'Private statement (GDPR)')
|
||||
def privacy_policy():
|
||||
return render_template('main/privacy_policy.html.j2', title='Privacy statement (GDPR)')
|
||||
return render_template(
|
||||
'main/privacy_policy.html.j2',
|
||||
title='Privacy statement (GDPR)'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/terms_of_use')
|
||||
@register_breadcrumb(bp, '.terms_of_use', 'Terms of Use')
|
||||
def terms_of_use():
|
||||
return render_template('main/terms_of_use.html.j2', title='Terms of Use')
|
||||
return render_template(
|
||||
'main/terms_of_use.html.j2',
|
||||
title='Terms of Use'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/social-area')
|
||||
@register_breadcrumb(bp, '.social_area', '<i class="material-icons left">group</i>Social Area')
|
||||
@login_required
|
||||
def social_area():
|
||||
print('test')
|
||||
corpora = Corpus.query.filter(Corpus.is_public == True, Corpus.user != current_user).all()
|
||||
print(corpora)
|
||||
users = User.query.filter(User.is_public == True, User.id != current_user.id).all()
|
||||
return render_template(
|
||||
'main/social_area.html.j2',
|
||||
title='Social Area',
|
||||
corpora=corpora,
|
||||
users=users
|
||||
)
|
||||
|
649
app/models.py
@@ -1,16 +1,18 @@
|
||||
from datetime import datetime, timedelta
|
||||
from enum import Enum, IntEnum
|
||||
from flask import current_app, url_for
|
||||
from flask import abort, current_app, url_for
|
||||
from flask_hashids import HashidMixin
|
||||
from flask_login import UserMixin
|
||||
from sqlalchemy.ext.associationproxy import association_proxy
|
||||
from time import sleep
|
||||
from tqdm import tqdm
|
||||
from typing import Union
|
||||
from werkzeug.security import generate_password_hash, check_password_hash
|
||||
from werkzeug.utils import secure_filename
|
||||
import json
|
||||
import jwt
|
||||
import os
|
||||
import re
|
||||
import requests
|
||||
import secrets
|
||||
import shutil
|
||||
@@ -36,6 +38,16 @@ class CorpusStatus(IntEnum):
|
||||
RUNNING_ANALYSIS_SESSION = 8
|
||||
CANCELING_ANALYSIS_SESSION = 9
|
||||
|
||||
@staticmethod
|
||||
def get(corpus_status: Union['CorpusStatus', int, str]) -> 'CorpusStatus':
|
||||
if isinstance(corpus_status, CorpusStatus):
|
||||
return corpus_status
|
||||
if isinstance(corpus_status, int):
|
||||
return CorpusStatus(corpus_status)
|
||||
if isinstance(corpus_status, str):
|
||||
return CorpusStatus[corpus_status]
|
||||
raise TypeError('corpus_status must be CorpusStatus, int, or str')
|
||||
|
||||
|
||||
class JobStatus(IntEnum):
|
||||
INITIALIZING = 1
|
||||
@@ -47,6 +59,16 @@ class JobStatus(IntEnum):
|
||||
COMPLETED = 7
|
||||
FAILED = 8
|
||||
|
||||
@staticmethod
|
||||
def get(job_status: Union['JobStatus', int, str]) -> 'JobStatus':
|
||||
if isinstance(job_status, JobStatus):
|
||||
return job_status
|
||||
if isinstance(job_status, int):
|
||||
return JobStatus(job_status)
|
||||
if isinstance(job_status, str):
|
||||
return JobStatus[job_status]
|
||||
raise TypeError('job_status must be JobStatus, int, or str')
|
||||
|
||||
|
||||
class Permission(IntEnum):
|
||||
'''
|
||||
@@ -57,6 +79,16 @@ class Permission(IntEnum):
|
||||
CONTRIBUTE = 2
|
||||
USE_API = 4
|
||||
|
||||
@staticmethod
|
||||
def get(permission: Union['Permission', int, str]) -> 'Permission':
|
||||
if isinstance(permission, Permission):
|
||||
return permission
|
||||
if isinstance(permission, int):
|
||||
return Permission(permission)
|
||||
if isinstance(permission, str):
|
||||
return Permission[permission]
|
||||
raise TypeError('permission must be Permission, int, or str')
|
||||
|
||||
|
||||
class UserSettingJobStatusMailNotificationLevel(IntEnum):
|
||||
NONE = 1
|
||||
@@ -69,10 +101,31 @@ class ProfilePrivacySettings(IntEnum):
|
||||
SHOW_LAST_SEEN = 2
|
||||
SHOW_MEMBER_SINCE = 4
|
||||
|
||||
class CorpusFollowPermission(IntEnum):
|
||||
@staticmethod
|
||||
def get(profile_privacy_setting: Union['ProfilePrivacySettings', int, str]) -> 'ProfilePrivacySettings':
|
||||
if isinstance(profile_privacy_setting, ProfilePrivacySettings):
|
||||
return profile_privacy_setting
|
||||
if isinstance(profile_privacy_setting, int):
|
||||
return ProfilePrivacySettings(profile_privacy_setting)
|
||||
if isinstance(profile_privacy_setting, str):
|
||||
return ProfilePrivacySettings[profile_privacy_setting]
|
||||
raise TypeError('profile_privacy_setting must be ProfilePrivacySettings, int, or str')
|
||||
|
||||
class CorpusFollowerPermission(IntEnum):
|
||||
VIEW = 1
|
||||
CONTRIBUTE = 2
|
||||
ADMINISTRATE = 4
|
||||
MANAGE_FILES = 2
|
||||
MANAGE_FOLLOWERS = 4
|
||||
MANAGE_CORPUS = 8
|
||||
|
||||
@staticmethod
|
||||
def get(corpus_follower_permission: Union['CorpusFollowerPermission', int, str]) -> 'CorpusFollowerPermission':
|
||||
if isinstance(corpus_follower_permission, CorpusFollowerPermission):
|
||||
return corpus_follower_permission
|
||||
if isinstance(corpus_follower_permission, int):
|
||||
return CorpusFollowerPermission(corpus_follower_permission)
|
||||
if isinstance(corpus_follower_permission, str):
|
||||
return CorpusFollowerPermission[corpus_follower_permission]
|
||||
raise TypeError('corpus_follower_permission must be CorpusFollowerPermission, int, or str')
|
||||
# endregion enums
|
||||
|
||||
|
||||
@@ -180,16 +233,19 @@ class Role(HashidMixin, db.Model):
|
||||
def __repr__(self):
|
||||
return f'<Role {self.name}>'
|
||||
|
||||
def add_permission(self, permission):
|
||||
if not self.has_permission(permission):
|
||||
self.permissions += permission
|
||||
|
||||
def has_permission(self, permission):
|
||||
return self.permissions & permission == permission
|
||||
|
||||
def remove_permission(self, permission):
|
||||
if self.has_permission(permission):
|
||||
self.permissions -= permission
|
||||
def has_permission(self, permission: Union[Permission, int, str]):
|
||||
p = Permission.get(permission)
|
||||
return self.permissions & p.value == p.value
|
||||
|
||||
def add_permission(self, permission: Union[Permission, int, str]):
|
||||
p = Permission.get(permission)
|
||||
if not self.has_permission(p):
|
||||
self.permissions += p.value
|
||||
|
||||
def remove_permission(self, permission: Union[Permission, int, str]):
|
||||
p = Permission.get(permission)
|
||||
if self.has_permission(p):
|
||||
self.permissions -= p.value
|
||||
|
||||
def reset_permissions(self):
|
||||
self.permissions = 0
|
||||
@@ -199,8 +255,13 @@ class Role(HashidMixin, db.Model):
|
||||
'id': self.hashid,
|
||||
'default': self.default,
|
||||
'name': self.name,
|
||||
'permissions': self.permissions
|
||||
'permissions': [
|
||||
x.name for x in Permission
|
||||
if self.has_permission(x.value)
|
||||
]
|
||||
}
|
||||
if backrefs:
|
||||
pass
|
||||
if relationships:
|
||||
json_serializeable['users'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
@@ -252,6 +313,27 @@ class Token(db.Model):
|
||||
self.access_expiration = datetime.utcnow()
|
||||
self.refresh_expiration = datetime.utcnow()
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'access_token': self.access_token,
|
||||
'access_expiration': (
|
||||
None if self.access_expiration is None
|
||||
else f'{self.access_expiration.isoformat()}Z'
|
||||
),
|
||||
'refresh_token': self.refresh_token,
|
||||
'refresh_expiration': (
|
||||
None if self.refresh_expiration is None
|
||||
else f'{self.refresh_expiration.isoformat()}Z'
|
||||
)
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
||||
|
||||
@staticmethod
|
||||
def clean():
|
||||
"""Remove any tokens that have been expired for more than a day."""
|
||||
@@ -284,35 +366,143 @@ class Avatar(HashidMixin, FileMixin, db.Model):
|
||||
'id': self.hashid,
|
||||
**self.file_mixin_to_json_serializeable()
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
||||
|
||||
|
||||
class CorpusFollowerAssociation(db.Model):
|
||||
class CorpusFollowerRole(HashidMixin, db.Model):
|
||||
__tablename__ = 'corpus_follower_roles'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Fields
|
||||
name = db.Column(db.String(64), unique=True)
|
||||
default = db.Column(db.Boolean, default=False, index=True)
|
||||
permissions = db.Column(db.Integer, default=0)
|
||||
# Relationships
|
||||
corpus_follower_associations = db.relationship(
|
||||
'CorpusFollowerAssociation',
|
||||
back_populates='role'
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f'<CorpusFollowerRole {self.name}>'
|
||||
|
||||
def has_permission(self, permission: Union[CorpusFollowerPermission, int, str]):
|
||||
perm = CorpusFollowerPermission.get(permission)
|
||||
return self.permissions & perm.value == perm.value
|
||||
|
||||
def add_permission(self, permission: Union[CorpusFollowerPermission, int, str]):
|
||||
perm = CorpusFollowerPermission.get(permission)
|
||||
if not self.has_permission(perm):
|
||||
self.permissions += perm.value
|
||||
|
||||
def remove_permission(self, permission: Union[CorpusFollowerPermission, int, str]):
|
||||
perm = CorpusFollowerPermission.get(permission)
|
||||
if self.has_permission(perm):
|
||||
self.permissions -= perm.value
|
||||
|
||||
def reset_permissions(self):
|
||||
self.permissions = 0
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'default': self.default,
|
||||
'name': self.name,
|
||||
'permissions': [
|
||||
x.name
|
||||
for x in CorpusFollowerPermission
|
||||
if self.has_permission(x)
|
||||
]
|
||||
}
|
||||
if backrefs:
|
||||
pass
|
||||
if relationships:
|
||||
json_serializeable['corpus_follower_association'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.corpus_follower_association
|
||||
}
|
||||
return json_serializeable
|
||||
|
||||
@staticmethod
|
||||
def insert_defaults():
|
||||
roles = {
|
||||
'Anonymous': [],
|
||||
'Viewer': [
|
||||
CorpusFollowerPermission.VIEW
|
||||
],
|
||||
'Contributor': [
|
||||
CorpusFollowerPermission.VIEW,
|
||||
CorpusFollowerPermission.MANAGE_FILES
|
||||
],
|
||||
'Administrator': [
|
||||
CorpusFollowerPermission.VIEW,
|
||||
CorpusFollowerPermission.MANAGE_FILES,
|
||||
CorpusFollowerPermission.MANAGE_FOLLOWERS,
|
||||
CorpusFollowerPermission.MANAGE_CORPUS
|
||||
|
||||
]
|
||||
}
|
||||
default_role_name = 'Viewer'
|
||||
for role_name, permissions in roles.items():
|
||||
role = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||
if role is None:
|
||||
role = CorpusFollowerRole(name=role_name)
|
||||
role.reset_permissions()
|
||||
for permission in permissions:
|
||||
role.add_permission(permission)
|
||||
role.default = role.name == default_role_name
|
||||
db.session.add(role)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
class CorpusFollowerAssociation(HashidMixin, db.Model):
|
||||
__tablename__ = 'corpus_follower_associations'
|
||||
# Primary key
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
# Foreign keys
|
||||
following_user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
||||
followed_corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
|
||||
# Fields
|
||||
permissions = db.Column(db.Integer, default=0, nullable=False)
|
||||
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
|
||||
follower_id = db.Column(db.Integer, db.ForeignKey('users.id'))
|
||||
role_id = db.Column(db.Integer, db.ForeignKey('corpus_follower_roles.id'))
|
||||
# Relationships
|
||||
followed_corpus = db.relationship('Corpus', back_populates='following_user_associations')
|
||||
following_user = db.relationship('User', back_populates='followed_corpus_associations')
|
||||
corpus = db.relationship(
|
||||
'Corpus',
|
||||
back_populates='corpus_follower_associations'
|
||||
)
|
||||
follower = db.relationship(
|
||||
'User',
|
||||
back_populates='corpus_follower_associations'
|
||||
)
|
||||
role = db.relationship(
|
||||
'CorpusFollowerRole',
|
||||
back_populates='corpus_follower_associations'
|
||||
)
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if 'role' not in kwargs:
|
||||
kwargs['role'] = CorpusFollowerRole.query.filter_by(default=True).first()
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
return f'<CorpusFollowerAssociation {self.following_user.__repr__()} ~ {self.followed_corpus.__repr__()}>'
|
||||
return f'<CorpusFollowerAssociation {self.follower.__repr__()} ~ {self.role.__repr__()} ~ {self.corpus.__repr__()}>'
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'corpus': self.corpus.to_json_serializeable(backrefs=True),
|
||||
'follower': self.follower.to_json_serializeable(),
|
||||
'role': self.role.to_json_serializeable()
|
||||
}
|
||||
if backrefs:
|
||||
pass
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
||||
|
||||
def has_permission(self, permission):
|
||||
return self.permissions & permission == permission
|
||||
|
||||
def add_permission(self, permission):
|
||||
if not self.has_permission(permission):
|
||||
self.permissions += permission
|
||||
|
||||
def remove_permission(self, permission):
|
||||
if self.has_permission(permission):
|
||||
self.permissions -= permission
|
||||
|
||||
class User(HashidMixin, UserMixin, db.Model):
|
||||
__tablename__ = 'users'
|
||||
@@ -323,8 +513,10 @@ class User(HashidMixin, UserMixin, db.Model):
|
||||
# Fields
|
||||
email = db.Column(db.String(254), index=True, unique=True)
|
||||
username = db.Column(db.String(64), index=True, unique=True)
|
||||
username_pattern = re.compile(r'^[A-Za-zÄÖÜäöüß0-9_.]*$')
|
||||
password_hash = db.Column(db.String(128))
|
||||
confirmed = db.Column(db.Boolean, default=False)
|
||||
terms_of_use_accepted = db.Column(db.Boolean, default=False)
|
||||
member_since = db.Column(db.DateTime(), default=datetime.utcnow)
|
||||
setting_job_status_mail_notification_level = db.Column(
|
||||
IntEnumColumn(UserSettingJobStatusMailNotificationLevel),
|
||||
@@ -351,14 +543,15 @@ class User(HashidMixin, UserMixin, db.Model):
|
||||
cascade='all, delete-orphan',
|
||||
lazy='dynamic'
|
||||
)
|
||||
followed_corpus_associations = db.relationship(
|
||||
corpus_follower_associations = db.relationship(
|
||||
'CorpusFollowerAssociation',
|
||||
back_populates='following_user'
|
||||
back_populates='follower',
|
||||
cascade='all, delete-orphan'
|
||||
)
|
||||
followed_corpora = association_proxy(
|
||||
'followed_corpus_associations',
|
||||
'followed_corpus',
|
||||
creator=lambda c: CorpusFollowerAssociation(followed_corpus=c)
|
||||
'corpus_follower_associations',
|
||||
'corpus',
|
||||
creator=lambda c: CorpusFollowerAssociation(corpus=c)
|
||||
)
|
||||
jobs = db.relationship(
|
||||
'Job',
|
||||
@@ -388,15 +581,15 @@ class User(HashidMixin, UserMixin, db.Model):
|
||||
cascade='all, delete-orphan',
|
||||
lazy='dynamic'
|
||||
)
|
||||
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if 'role' not in kwargs:
|
||||
kwargs['role'] = (
|
||||
Role.query.filter_by(name='Administrator').first()
|
||||
if kwargs['email'] == current_app.config['NOPAQUE_ADMIN']
|
||||
else Role.query.filter_by(default=True).first()
|
||||
)
|
||||
super().__init__(**kwargs)
|
||||
if self.role is not None:
|
||||
return
|
||||
if self.email == current_app.config['NOPAQUE_ADMIN']:
|
||||
self.role = Role.query.filter_by(name='Administrator').first()
|
||||
else:
|
||||
self.role = Role.query.filter_by(default=True).first()
|
||||
|
||||
def __repr__(self):
|
||||
return f'<User {self.username}>'
|
||||
@@ -495,7 +688,7 @@ class User(HashidMixin, UserMixin, db.Model):
|
||||
db.session.commit()
|
||||
|
||||
def can(self, permission):
|
||||
return self.role.has_permission(permission)
|
||||
return self.role is not None and self.role.has_permission(permission)
|
||||
|
||||
def confirm(self, confirmation_token):
|
||||
try:
|
||||
@@ -506,7 +699,6 @@ class User(HashidMixin, UserMixin, db.Model):
|
||||
issuer=current_app.config['SERVER_NAME'],
|
||||
options={'require': ['exp', 'iat', 'iss', 'purpose', 'sub']}
|
||||
)
|
||||
current_app.logger.warning(payload)
|
||||
except jwt.PyJWTError:
|
||||
return False
|
||||
if payload.get('purpose') != 'user.confirm':
|
||||
@@ -577,42 +769,97 @@ class User(HashidMixin, UserMixin, db.Model):
|
||||
|
||||
#region Profile Privacy settings
|
||||
def has_profile_privacy_setting(self, setting):
|
||||
return self.profile_privacy_settings & setting == setting
|
||||
s = ProfilePrivacySettings.get(setting)
|
||||
return self.profile_privacy_settings & s.value == s.value
|
||||
|
||||
def add_profile_privacy_setting(self, setting):
|
||||
if not self.has_profile_privacy_setting(setting):
|
||||
self.profile_privacy_settings += setting
|
||||
s = ProfilePrivacySettings.get(setting)
|
||||
if not self.has_profile_privacy_setting(s):
|
||||
self.profile_privacy_settings += s.value
|
||||
|
||||
def remove_profile_privacy_setting(self, setting):
|
||||
if self.has_profile_privacy_setting(setting):
|
||||
self.profile_privacy_settings -= setting
|
||||
s = ProfilePrivacySettings.get(setting)
|
||||
if self.has_profile_privacy_setting(s):
|
||||
self.profile_privacy_settings -= s.value
|
||||
|
||||
def reset_profile_privacy_settings(self):
|
||||
self.profile_privacy_settings = 0
|
||||
#endregion Profile Privacy settings
|
||||
|
||||
def follow_corpus(self, corpus):
|
||||
if not self.is_following_corpus(corpus):
|
||||
self.followed_corpora.append(corpus)
|
||||
def follow_corpus(self, corpus, role=None):
|
||||
if role is None:
|
||||
cfr = CorpusFollowerRole.query.filter_by(default=True).first()
|
||||
else:
|
||||
cfr = role
|
||||
if self.is_following_corpus(corpus):
|
||||
cfa = CorpusFollowerAssociation.query.filter_by(corpus=corpus, follower=self).first()
|
||||
if cfa.role != cfr:
|
||||
cfa.role = cfr
|
||||
else:
|
||||
cfa = CorpusFollowerAssociation(corpus=corpus, role=cfr, follower=self)
|
||||
db.session.add(cfa)
|
||||
|
||||
def unfollow_corpus(self, corpus):
|
||||
if self.is_following_corpus(corpus):
|
||||
self.followed_corpora.remove(corpus)
|
||||
if not self.is_following_corpus(corpus):
|
||||
return
|
||||
self.followed_corpora.remove(corpus)
|
||||
|
||||
def is_following_corpus(self, corpus):
|
||||
return corpus in self.followed_corpora
|
||||
|
||||
|
||||
def generate_follow_corpus_token(self, corpus_hashid, role_name, expiration=7):
|
||||
now = datetime.utcnow()
|
||||
payload = {
|
||||
'exp': expiration,
|
||||
'iat': now,
|
||||
'iss': current_app.config['SERVER_NAME'],
|
||||
'purpose': 'User.follow_corpus',
|
||||
'role_name': role_name,
|
||||
'sub': corpus_hashid
|
||||
}
|
||||
return jwt.encode(
|
||||
payload,
|
||||
current_app.config['SECRET_KEY'],
|
||||
algorithm='HS256'
|
||||
)
|
||||
|
||||
def follow_corpus_by_token(self, token):
|
||||
try:
|
||||
payload = jwt.decode(
|
||||
token,
|
||||
current_app.config['SECRET_KEY'],
|
||||
algorithms=['HS256'],
|
||||
issuer=current_app.config['SERVER_NAME'],
|
||||
options={'require': ['exp', 'iat', 'iss', 'purpose', 'role_name', 'sub']}
|
||||
)
|
||||
except jwt.PyJWTError:
|
||||
return False
|
||||
if payload.get('purpose') != 'User.follow_corpus':
|
||||
return False
|
||||
corpus_hashid = payload.get('sub')
|
||||
corpus_id = hashids.decode(corpus_hashid)
|
||||
corpus = Corpus.query.get_or_404(corpus_id)
|
||||
if corpus is None:
|
||||
return False
|
||||
role_name = payload.get('role_name')
|
||||
role = CorpusFollowerRole.query.filter_by(name=role_name).first()
|
||||
if role is None:
|
||||
return False
|
||||
self.follow_corpus(corpus, role)
|
||||
# db.session.add(self)
|
||||
return True
|
||||
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False, filter_by_privacy_settings=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'confirmed': self.confirmed,
|
||||
'avatar': url_for('users.user_avatar', user_id=self.id),
|
||||
'email': self.email,
|
||||
'last_seen': (
|
||||
None if self.last_seen is None
|
||||
else self.last_seen.strftime('%Y-%m-%d %H:%M')
|
||||
else f'{self.last_seen.isoformat()}Z'
|
||||
),
|
||||
'member_since': self.member_since.strftime('%Y-%m-%d'),
|
||||
'member_since': f'{self.member_since.isoformat()}Z',
|
||||
'username': self.username,
|
||||
'full_name': self.full_name,
|
||||
'about_me': self.about_me,
|
||||
@@ -621,19 +868,21 @@ class User(HashidMixin, UserMixin, db.Model):
|
||||
'organization': self.organization,
|
||||
'job_status_mail_notification_level': \
|
||||
self.setting_job_status_mail_notification_level.name,
|
||||
'is_public': self.is_public,
|
||||
'show_email': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL),
|
||||
'show_last_seen': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_LAST_SEEN),
|
||||
'show_member_since': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_MEMBER_SINCE)
|
||||
'profile_privacy_settings': {
|
||||
'is_public': self.is_public,
|
||||
'show_email': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL),
|
||||
'show_last_seen': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_LAST_SEEN),
|
||||
'show_member_since': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_MEMBER_SINCE)
|
||||
}
|
||||
}
|
||||
json_serializeable['avatar'] = (
|
||||
None if self.avatar is None
|
||||
else self.avatar.to_json_serializeable(relationships=True)
|
||||
)
|
||||
if backrefs:
|
||||
json_serializeable['role'] = \
|
||||
self.role.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
json_serializeable['corpus_follower_associations'] = {
|
||||
x.hashid: x.to_json_serializeable()
|
||||
for x in self.corpus_follower_associations
|
||||
}
|
||||
json_serializeable['corpora'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.corpora
|
||||
@@ -650,10 +899,6 @@ class User(HashidMixin, UserMixin, db.Model):
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.spacy_nlp_pipeline_models
|
||||
}
|
||||
json_serializeable['followed_corpora'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.followed_corpora
|
||||
}
|
||||
|
||||
if filter_by_privacy_settings:
|
||||
if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL):
|
||||
@@ -708,7 +953,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
|
||||
return self.user.hashid
|
||||
|
||||
@staticmethod
|
||||
def insert_defaults():
|
||||
def insert_defaults(force_download=False):
|
||||
nopaque_user = User.query.filter_by(username='nopaque').first()
|
||||
defaults_file = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
@@ -721,6 +966,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
|
||||
if model is not None:
|
||||
model.compatible_service_versions = m['compatible_service_versions']
|
||||
model.description = m['description']
|
||||
model.filename = f'{model.id}.traineddata'
|
||||
model.publisher = m['publisher']
|
||||
model.publisher_url = m['publisher_url']
|
||||
model.publishing_url = m['publishing_url']
|
||||
@@ -728,38 +974,39 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
|
||||
model.is_public = True
|
||||
model.title = m['title']
|
||||
model.version = m['version']
|
||||
continue
|
||||
model = TesseractOCRPipelineModel(
|
||||
compatible_service_versions=m['compatible_service_versions'],
|
||||
description=m['description'],
|
||||
publisher=m['publisher'],
|
||||
publisher_url=m['publisher_url'],
|
||||
publishing_url=m['publishing_url'],
|
||||
publishing_year=m['publishing_year'],
|
||||
is_public=True,
|
||||
title=m['title'],
|
||||
user=nopaque_user,
|
||||
version=m['version']
|
||||
)
|
||||
db.session.add(model)
|
||||
db.session.flush(objects=[model])
|
||||
db.session.refresh(model)
|
||||
model.filename = f'{model.id}.traineddata'
|
||||
r = requests.get(m['url'], stream=True)
|
||||
pbar = tqdm(
|
||||
desc=f'{model.title} ({model.filename})',
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
total=int(r.headers['Content-Length'])
|
||||
)
|
||||
pbar.clear()
|
||||
with open(model.path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk: # filter out keep-alive new chunks
|
||||
pbar.update(len(chunk))
|
||||
f.write(chunk)
|
||||
pbar.close()
|
||||
else:
|
||||
model = TesseractOCRPipelineModel(
|
||||
compatible_service_versions=m['compatible_service_versions'],
|
||||
description=m['description'],
|
||||
publisher=m['publisher'],
|
||||
publisher_url=m['publisher_url'],
|
||||
publishing_url=m['publishing_url'],
|
||||
publishing_year=m['publishing_year'],
|
||||
is_public=True,
|
||||
title=m['title'],
|
||||
user=nopaque_user,
|
||||
version=m['version']
|
||||
)
|
||||
db.session.add(model)
|
||||
db.session.flush(objects=[model])
|
||||
db.session.refresh(model)
|
||||
model.filename = f'{model.id}.traineddata'
|
||||
if not os.path.exists(model.path) or force_download:
|
||||
r = requests.get(m['url'], stream=True)
|
||||
pbar = tqdm(
|
||||
desc=f'{model.title} ({model.filename})',
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
total=int(r.headers['Content-Length'])
|
||||
)
|
||||
pbar.clear()
|
||||
with open(model.path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk: # filter out keep-alive new chunks
|
||||
pbar.update(len(chunk))
|
||||
f.write(chunk)
|
||||
pbar.close()
|
||||
db.session.commit()
|
||||
|
||||
def delete(self):
|
||||
@@ -786,6 +1033,8 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
|
||||
if backrefs:
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
||||
|
||||
|
||||
@@ -833,7 +1082,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
|
||||
return self.user.hashid
|
||||
|
||||
@staticmethod
|
||||
def insert_defaults():
|
||||
def insert_defaults(force_download=False):
|
||||
nopaque_user = User.query.filter_by(username='nopaque').first()
|
||||
defaults_file = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
@@ -846,6 +1095,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
|
||||
if model is not None:
|
||||
model.compatible_service_versions = m['compatible_service_versions']
|
||||
model.description = m['description']
|
||||
model.filename = m['url'].split('/')[-1]
|
||||
model.publisher = m['publisher']
|
||||
model.publisher_url = m['publisher_url']
|
||||
model.publishing_url = m['publishing_url']
|
||||
@@ -854,39 +1104,40 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
|
||||
model.title = m['title']
|
||||
model.version = m['version']
|
||||
model.pipeline_name = m['pipeline_name']
|
||||
continue
|
||||
model = SpaCyNLPPipelineModel(
|
||||
compatible_service_versions=m['compatible_service_versions'],
|
||||
description=m['description'],
|
||||
publisher=m['publisher'],
|
||||
publisher_url=m['publisher_url'],
|
||||
publishing_url=m['publishing_url'],
|
||||
publishing_year=m['publishing_year'],
|
||||
is_public=True,
|
||||
title=m['title'],
|
||||
user=nopaque_user,
|
||||
version=m['version'],
|
||||
pipeline_name=m['pipeline_name']
|
||||
)
|
||||
db.session.add(model)
|
||||
db.session.flush(objects=[model])
|
||||
db.session.refresh(model)
|
||||
model.filename = m['url'].split('/')[-1]
|
||||
r = requests.get(m['url'], stream=True)
|
||||
pbar = tqdm(
|
||||
desc=f'{model.title} ({model.filename})',
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
total=int(r.headers['Content-Length'])
|
||||
)
|
||||
pbar.clear()
|
||||
with open(model.path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk: # filter out keep-alive new chunks
|
||||
pbar.update(len(chunk))
|
||||
f.write(chunk)
|
||||
pbar.close()
|
||||
else:
|
||||
model = SpaCyNLPPipelineModel(
|
||||
compatible_service_versions=m['compatible_service_versions'],
|
||||
description=m['description'],
|
||||
filename=m['url'].split('/')[-1],
|
||||
publisher=m['publisher'],
|
||||
publisher_url=m['publisher_url'],
|
||||
publishing_url=m['publishing_url'],
|
||||
publishing_year=m['publishing_year'],
|
||||
is_public=True,
|
||||
title=m['title'],
|
||||
user=nopaque_user,
|
||||
version=m['version'],
|
||||
pipeline_name=m['pipeline_name']
|
||||
)
|
||||
db.session.add(model)
|
||||
db.session.flush(objects=[model])
|
||||
db.session.refresh(model)
|
||||
if not os.path.exists(model.path) or force_download:
|
||||
r = requests.get(m['url'], stream=True)
|
||||
pbar = tqdm(
|
||||
desc=f'{model.title} ({model.filename})',
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
total=int(r.headers['Content-Length'])
|
||||
)
|
||||
pbar.clear()
|
||||
with open(model.path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk: # filter out keep-alive new chunks
|
||||
pbar.update(len(chunk))
|
||||
f.write(chunk)
|
||||
pbar.close()
|
||||
db.session.commit()
|
||||
|
||||
def delete(self):
|
||||
@@ -912,7 +1163,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
|
||||
**self.file_mixin_to_json_serializeable()
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = self.user.to_json_serializeable(backrefs=True)
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
||||
|
||||
|
||||
@@ -971,6 +1225,8 @@ class JobInput(FileMixin, HashidMixin, db.Model):
|
||||
if backrefs:
|
||||
json_serializeable['job'] = \
|
||||
self.job.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
||||
|
||||
|
||||
@@ -1035,6 +1291,8 @@ class JobResult(FileMixin, HashidMixin, db.Model):
|
||||
if backrefs:
|
||||
json_serializeable['job'] = \
|
||||
self.job.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
||||
|
||||
|
||||
@@ -1114,7 +1372,6 @@ class Job(HashidMixin, db.Model):
|
||||
raise e
|
||||
return job
|
||||
|
||||
|
||||
def delete(self):
|
||||
''' Delete the job and its inputs and results from the database. '''
|
||||
if self.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: # noqa
|
||||
@@ -1159,8 +1416,7 @@ class Job(HashidMixin, db.Model):
|
||||
'service_args': self.service_args,
|
||||
'service_version': self.service_version,
|
||||
'status': self.status.name,
|
||||
'title': self.title,
|
||||
'url': self.url
|
||||
'title': self.title
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = \
|
||||
@@ -1246,9 +1502,9 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
|
||||
def to_json_serializeable(self, backrefs=False, relationships=False):
|
||||
json_serializeable = {
|
||||
'id': self.hashid,
|
||||
'url': self.url,
|
||||
'address': self.address,
|
||||
'author': self.author,
|
||||
'description': self.description,
|
||||
'booktitle': self.booktitle,
|
||||
'chapter': self.chapter,
|
||||
'editor': self.editor,
|
||||
@@ -1267,6 +1523,8 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
|
||||
if backrefs:
|
||||
json_serializeable['corpus'] = \
|
||||
self.corpus.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
pass
|
||||
return json_serializeable
|
||||
|
||||
|
||||
@@ -1297,14 +1555,15 @@ class Corpus(HashidMixin, db.Model):
|
||||
lazy='dynamic',
|
||||
cascade='all, delete-orphan'
|
||||
)
|
||||
following_user_associations = db.relationship(
|
||||
corpus_follower_associations = db.relationship(
|
||||
'CorpusFollowerAssociation',
|
||||
back_populates='followed_corpus'
|
||||
back_populates='corpus',
|
||||
cascade='all, delete-orphan'
|
||||
)
|
||||
following_users = association_proxy(
|
||||
'following_user_associations',
|
||||
'following_user',
|
||||
creator=lambda u: CorpusFollowerAssociation(following_user=u)
|
||||
followers = association_proxy(
|
||||
'corpus_follower_associations',
|
||||
'follower',
|
||||
creator=lambda u: CorpusFollowerAssociation(follower=u)
|
||||
)
|
||||
user = db.relationship('User', back_populates='corpora')
|
||||
# "static" attributes
|
||||
@@ -1315,7 +1574,7 @@ class Corpus(HashidMixin, db.Model):
|
||||
|
||||
@property
|
||||
def analysis_url(self):
|
||||
return url_for('corpora.analyse_corpus', corpus_id=self.id)
|
||||
return url_for('corpora.analysis', corpus_id=self.id)
|
||||
|
||||
@property
|
||||
def jsonpatch_path(self):
|
||||
@@ -1352,9 +1611,14 @@ class Corpus(HashidMixin, db.Model):
|
||||
return corpus
|
||||
|
||||
def build(self):
|
||||
build_dir = os.path.join(self.path, 'cwb')
|
||||
shutil.rmtree(build_dir, ignore_errors=True)
|
||||
os.mkdir(build_dir)
|
||||
os.mkdir(os.path.join(build_dir, 'data'))
|
||||
os.mkdir(os.path.join(build_dir, 'registry'))
|
||||
corpus_element = ET.fromstring('<corpus>\n</corpus>')
|
||||
for corpus_file in self.files:
|
||||
normalized_vrt_path = os.path.join(self.path, 'cwb', f'{corpus_file.id}.norm.vrt')
|
||||
normalized_vrt_path = os.path.join(build_dir, f'{corpus_file.id}.norm.vrt')
|
||||
try:
|
||||
normalize_vrt_file(corpus_file.path, normalized_vrt_path)
|
||||
except:
|
||||
@@ -1381,7 +1645,7 @@ class Corpus(HashidMixin, db.Model):
|
||||
# corpus_element.insert(1, text_element)
|
||||
corpus_element.append(text_element)
|
||||
ET.ElementTree(corpus_element).write(
|
||||
os.path.join(self.path, 'cwb', 'corpus.vrt'),
|
||||
os.path.join(build_dir, 'corpus.vrt'),
|
||||
encoding='utf-8'
|
||||
)
|
||||
self.status = CorpusStatus.SUBMITTED
|
||||
@@ -1403,8 +1667,13 @@ class Corpus(HashidMixin, db.Model):
|
||||
'is_public': self.is_public
|
||||
}
|
||||
if backrefs:
|
||||
json_serializeable['user'] = self.user.to_json_serializeable(backrefs=True)
|
||||
json_serializeable['user'] = \
|
||||
self.user.to_json_serializeable(backrefs=True)
|
||||
if relationships:
|
||||
json_serializeable['corpus_follower_associations'] = {
|
||||
x.hashid: x.to_json_serializeable()
|
||||
for x in self.corpus_follower_associations
|
||||
}
|
||||
json_serializeable['files'] = {
|
||||
x.hashid: x.to_json_serializeable(relationships=True)
|
||||
for x in self.files
|
||||
@@ -1424,11 +1693,27 @@ class Corpus(HashidMixin, db.Model):
|
||||
@db.event.listens_for(JobResult, 'after_delete')
|
||||
@db.event.listens_for(SpaCyNLPPipelineModel, 'after_delete')
|
||||
@db.event.listens_for(TesseractOCRPipelineModel, 'after_delete')
|
||||
def ressource_after_delete(mapper, connection, ressource):
|
||||
jsonpatch = [{'op': 'remove', 'path': ressource.jsonpatch_path}]
|
||||
room = f'users.{ressource.user_hashid}'
|
||||
socketio.emit('users.patch', jsonpatch, room=room)
|
||||
room = f'/users/{ressource.user_hashid}'
|
||||
def resource_after_delete(mapper, connection, resource):
|
||||
jsonpatch = [
|
||||
{
|
||||
'op': 'remove',
|
||||
'path': resource.jsonpatch_path
|
||||
}
|
||||
]
|
||||
room = f'/users/{resource.user_hashid}'
|
||||
socketio.emit('PATCH', jsonpatch, room=room)
|
||||
|
||||
|
||||
@db.event.listens_for(CorpusFollowerAssociation, 'after_delete')
|
||||
def cfa_after_delete_handler(mapper, connection, cfa):
|
||||
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
|
||||
jsonpatch = [
|
||||
{
|
||||
'op': 'remove',
|
||||
'path': jsonpatch_path
|
||||
}
|
||||
]
|
||||
room = f'/users/{cfa.corpus.user.hashid}'
|
||||
socketio.emit('PATCH', jsonpatch, room=room)
|
||||
|
||||
|
||||
@@ -1439,14 +1724,33 @@ def ressource_after_delete(mapper, connection, ressource):
|
||||
@db.event.listens_for(JobResult, 'after_insert')
|
||||
@db.event.listens_for(SpaCyNLPPipelineModel, 'after_insert')
|
||||
@db.event.listens_for(TesseractOCRPipelineModel, 'after_insert')
|
||||
def ressource_after_insert_handler(mapper, connection, ressource):
|
||||
value = ressource.to_json_serializeable()
|
||||
def resource_after_insert_handler(mapper, connection, resource):
|
||||
jsonpatch_value = resource.to_json_serializeable()
|
||||
for attr in mapper.relationships:
|
||||
value[attr.key] = {}
|
||||
jsonpatch_value[attr.key] = {}
|
||||
jsonpatch = [
|
||||
{'op': 'add', 'path': ressource.jsonpatch_path, 'value': value}
|
||||
{
|
||||
'op': 'add',
|
||||
'path': resource.jsonpatch_path,
|
||||
'value': jsonpatch_value
|
||||
}
|
||||
]
|
||||
room = f'/users/{ressource.user_hashid}'
|
||||
room = f'/users/{resource.user_hashid}'
|
||||
socketio.emit('PATCH', jsonpatch, room=room)
|
||||
|
||||
|
||||
@db.event.listens_for(CorpusFollowerAssociation, 'after_insert')
|
||||
def cfa_after_insert_handler(mapper, connection, cfa):
|
||||
jsonpatch_value = cfa.to_json_serializeable()
|
||||
jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}'
|
||||
jsonpatch = [
|
||||
{
|
||||
'op': 'add',
|
||||
'path': jsonpatch_path,
|
||||
'value': jsonpatch_value
|
||||
}
|
||||
]
|
||||
room = f'/users/{cfa.corpus.user.hashid}'
|
||||
socketio.emit('PATCH', jsonpatch, room=room)
|
||||
|
||||
|
||||
@@ -1457,28 +1761,29 @@ def ressource_after_insert_handler(mapper, connection, ressource):
|
||||
@db.event.listens_for(JobResult, 'after_update')
|
||||
@db.event.listens_for(SpaCyNLPPipelineModel, 'after_update')
|
||||
@db.event.listens_for(TesseractOCRPipelineModel, 'after_update')
|
||||
def ressource_after_update_handler(mapper, connection, ressource):
|
||||
def resource_after_update_handler(mapper, connection, resource):
|
||||
jsonpatch = []
|
||||
for attr in db.inspect(ressource).attrs:
|
||||
for attr in db.inspect(resource).attrs:
|
||||
if attr.key in mapper.relationships:
|
||||
continue
|
||||
if not attr.load_history().has_changes():
|
||||
continue
|
||||
jsonpatch_path = f'{resource.jsonpatch_path}/{attr.key}'
|
||||
if isinstance(attr.value, datetime):
|
||||
value = f'{attr.value.isoformat()}Z'
|
||||
jsonpatch_value = f'{attr.value.isoformat()}Z'
|
||||
elif isinstance(attr.value, Enum):
|
||||
value = attr.value.name
|
||||
jsonpatch_value = attr.value.name
|
||||
else:
|
||||
value = attr.value
|
||||
jsonpatch_value = attr.value
|
||||
jsonpatch.append(
|
||||
{
|
||||
'op': 'replace',
|
||||
'path': f'{ressource.jsonpatch_path}/{attr.key}',
|
||||
'value': value
|
||||
'path': jsonpatch_path,
|
||||
'value': jsonpatch_value
|
||||
}
|
||||
)
|
||||
if jsonpatch:
|
||||
room = f'/users/{ressource.user_hashid}'
|
||||
room = f'/users/{resource.user_hashid}'
|
||||
socketio.emit('PATCH', jsonpatch, room=room)
|
||||
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
from flask import Blueprint
|
||||
from flask_login import login_required
|
||||
import os
|
||||
import yaml
|
||||
|
||||
@@ -9,4 +10,16 @@ with open(services_file, 'r') as f:
|
||||
SERVICES = yaml.safe_load(f)
|
||||
|
||||
bp = Blueprint('services', __name__)
|
||||
|
||||
|
||||
@bp.before_request
|
||||
@login_required
|
||||
def before_request():
|
||||
'''
|
||||
Ensures that the routes in this package can only be visited by users that
|
||||
are logged in.
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
from . import routes # noqa
|
||||
|
@@ -1,12 +1,17 @@
|
||||
from flask_login import current_user
|
||||
from flask_wtf import FlaskForm
|
||||
from flask_login import current_user
|
||||
from flask_wtf.file import FileField, FileRequired
|
||||
from wtforms import (BooleanField, DecimalRangeField, MultipleFileField,
|
||||
SelectField, StringField, SubmitField, ValidationError)
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
DecimalRangeField,
|
||||
MultipleFileField,
|
||||
SelectField,
|
||||
StringField,
|
||||
SubmitField,
|
||||
ValidationError
|
||||
)
|
||||
from wtforms.validators import InputRequired, Length
|
||||
|
||||
from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel
|
||||
|
||||
from . import SERVICES
|
||||
|
||||
|
||||
@@ -33,6 +38,8 @@ class CreateFileSetupPipelineJobForm(CreateJobBaseForm):
|
||||
raise ValidationError('JPEG, PNG and TIFF files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-file-setup-pipeline-job-form'
|
||||
service_manifest = SERVICES['file-setup-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -60,6 +67,8 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
|
||||
raise ValidationError('PDF files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-tesseract-ocr-pipeline-job-form'
|
||||
service_manifest = SERVICES['tesseract-ocr-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -75,12 +84,18 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
|
||||
del self.binarization.render_kw['disabled']
|
||||
if 'ocropus_nlbin_threshold' in service_info['methods']:
|
||||
del self.ocropus_nlbin_threshold.render_kw['disabled']
|
||||
user_models = [
|
||||
x for x in current_user.tesseract_ocr_pipeline_models.order_by(TesseractOCRPipelineModel.title).all()
|
||||
]
|
||||
models = [
|
||||
x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all()
|
||||
if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
|
||||
]
|
||||
self.model.choices = [('', 'Choose your option')]
|
||||
self.model.choices += [(x.hashid, f'{x.title} [{x.version}]') for x in models]
|
||||
self.model.choices = {
|
||||
'': [('', 'Choose your option')],
|
||||
'Your models': [(x.hashid, f'{x.title} [{x.version}]') for x in user_models] if user_models else [(0, 'Nothing here yet...')],
|
||||
'Public models': [(x.hashid, f'{x.title} [{x.version}]') for x in models]
|
||||
}
|
||||
self.model.default = ''
|
||||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||||
self.version.data = version
|
||||
@@ -106,6 +121,8 @@ class CreateTranskribusHTRPipelineJobForm(CreateJobBaseForm):
|
||||
raise ValidationError('PDF files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-transkribus-htr-pipeline-job-form'
|
||||
transkribus_htr_pipeline_models = kwargs.pop('transkribus_htr_pipeline_models', [])
|
||||
service_manifest = SERVICES['transkribus-htr-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
@@ -144,6 +161,8 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
|
||||
raise ValidationError('Plain text files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'prefix' not in kwargs:
|
||||
kwargs['prefix'] = 'create-spacy-nlp-pipeline-job-form'
|
||||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -155,12 +174,18 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
|
||||
if 'methods' in service_info:
|
||||
if 'encoding_detection' in service_info['methods']:
|
||||
del self.encoding_detection.render_kw['disabled']
|
||||
models = [
|
||||
x for x in SpaCyNLPPipelineModel.query.order_by(SpaCyNLPPipelineModel.title).all()
|
||||
if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
|
||||
user_models = [
|
||||
x for x in current_user.spacy_nlp_pipeline_models.order_by(SpaCyNLPPipelineModel.title).all()
|
||||
]
|
||||
self.model.choices = [('', 'Choose your option')]
|
||||
self.model.choices += [(x.hashid, f'{x.title} [{x.version}]') for x in models]
|
||||
models = [
|
||||
x for x in SpaCyNLPPipelineModel.query.filter(SpaCyNLPPipelineModel.user != current_user, SpaCyNLPPipelineModel.is_public == True).order_by(SpaCyNLPPipelineModel.title).all()
|
||||
if version in x.compatible_service_versions
|
||||
]
|
||||
self.model.choices = {
|
||||
'': [('', 'Choose your option')],
|
||||
'Your models': [(x.hashid, f'{x.title} [{x.version}]') for x in user_models] if user_models else [(0, 'Nothing here yet...')],
|
||||
'Public models': [(x.hashid, f'{x.title} [{x.version}]') for x in models]
|
||||
}
|
||||
self.model.default = ''
|
||||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||||
self.version.data = version
|
||||
|
@@ -1,5 +1,6 @@
|
||||
from flask import abort, current_app, flash, make_response, Markup, render_template, request
|
||||
from flask_login import current_user, login_required
|
||||
from flask import abort, current_app, flash, Markup, redirect, render_template, request, url_for
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from flask_login import current_user
|
||||
import requests
|
||||
from app import db, hashids
|
||||
from app.models import (
|
||||
@@ -18,8 +19,14 @@ from .forms import (
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/services')
|
||||
@register_breadcrumb(bp, '.', 'Services')
|
||||
def services():
|
||||
return redirect(url_for('main.dashboard'))
|
||||
|
||||
|
||||
@bp.route('/file-setup-pipeline', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
@register_breadcrumb(bp, '.file_setup_pipeline', '<i class="nopaque-icons service-icons left" data-service="file-setup-pipeline"></i>File Setup')
|
||||
def file_setup_pipeline():
|
||||
service = 'file-setup-pipeline'
|
||||
service_manifest = SERVICES[service]
|
||||
@@ -54,13 +61,13 @@ def file_setup_pipeline():
|
||||
return {}, 201, {'Location': job.url}
|
||||
return render_template(
|
||||
'services/file_setup_pipeline.html.j2',
|
||||
form=form,
|
||||
title=service_manifest['name']
|
||||
title=service_manifest['name'],
|
||||
form=form
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
@register_breadcrumb(bp, '.tesseract_ocr_pipeline', '<i class="nopaque-icons service-icons left" data-service="tesseract-ocr-pipeline"></i>Tesseract OCR Pipeline')
|
||||
def tesseract_ocr_pipeline():
|
||||
service_name = 'tesseract-ocr-pipeline'
|
||||
service_manifest = SERVICES[service_name]
|
||||
@@ -100,16 +107,18 @@ def tesseract_ocr_pipeline():
|
||||
x for x in TesseractOCRPipelineModel.query.all()
|
||||
if version in x.compatible_service_versions and (x.is_public == True or x.user == current_user)
|
||||
]
|
||||
user_tesseract_ocr_pipeline_models_count = len(current_user.tesseract_ocr_pipeline_models.all())
|
||||
return render_template(
|
||||
'services/tesseract_ocr_pipeline.html.j2',
|
||||
title=service_manifest['name'],
|
||||
form=form,
|
||||
tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
|
||||
title=service_manifest['name']
|
||||
user_tesseract_ocr_pipeline_models_count=user_tesseract_ocr_pipeline_models_count
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/transkribus-htr-pipeline', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
@register_breadcrumb(bp, '.transkribus_htr_pipeline', '<i class="nopaque-icons service-icons left" data-service="transkribus-htr-pipeline"></i>Transkribus HTR Pipeline')
|
||||
def transkribus_htr_pipeline():
|
||||
if not current_app.config.get('NOPAQUE_TRANSKRIBUS_ENABLED'):
|
||||
abort(404)
|
||||
@@ -126,10 +135,9 @@ def transkribus_htr_pipeline():
|
||||
abort(500)
|
||||
transkribus_htr_pipeline_models = r.json()['trpModelMetadata']
|
||||
transkribus_htr_pipeline_models.append({'modelId': 48513, 'name': 'Caroline Minuscle', 'language': 'lat', 'isoLanguages': ['lat']})
|
||||
print(transkribus_htr_pipeline_models[len(transkribus_htr_pipeline_models)-1])
|
||||
form = CreateTranskribusHTRPipelineJobForm(
|
||||
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models,
|
||||
prefix='create-job-form',
|
||||
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models,
|
||||
version=version
|
||||
)
|
||||
if form.is_submitted():
|
||||
@@ -161,14 +169,14 @@ def transkribus_htr_pipeline():
|
||||
return {}, 201, {'Location': job.url}
|
||||
return render_template(
|
||||
'services/transkribus_htr_pipeline.html.j2',
|
||||
form=form,
|
||||
title=service_manifest['name'],
|
||||
form=form,
|
||||
transkribus_htr_pipeline_models=transkribus_htr_pipeline_models
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
@register_breadcrumb(bp, '.spacy_nlp_pipeline', '<i class="nopaque-icons service-icons left" data-service="spacy-nlp-pipeline"></i>SpaCy NLP Pipeline')
|
||||
def spacy_nlp_pipeline():
|
||||
service = 'spacy-nlp-pipeline'
|
||||
service_manifest = SERVICES[service]
|
||||
@@ -177,6 +185,7 @@ def spacy_nlp_pipeline():
|
||||
abort(404)
|
||||
form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version)
|
||||
spacy_nlp_pipeline_models = SpaCyNLPPipelineModel.query.all()
|
||||
user_spacy_nlp_pipeline_models_count = len(current_user.spacy_nlp_pipeline_models.all())
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
response = {'errors': form.errors}
|
||||
@@ -206,16 +215,17 @@ def spacy_nlp_pipeline():
|
||||
return {}, 201, {'Location': job.url}
|
||||
return render_template(
|
||||
'services/spacy_nlp_pipeline.html.j2',
|
||||
title=service_manifest['name'],
|
||||
form=form,
|
||||
spacy_nlp_pipeline_models=spacy_nlp_pipeline_models,
|
||||
title=service_manifest['name']
|
||||
user_spacy_nlp_pipeline_models_count=user_spacy_nlp_pipeline_models_count
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/corpus-analysis')
|
||||
@login_required
|
||||
@register_breadcrumb(bp, '.corpus_analysis', '<i class="nopaque-icons service-icons left" data-service="corpus-analysis"></i>Corpus Analysis')
|
||||
def corpus_analysis():
|
||||
return render_template(
|
||||
'services/corpus_analysis.html.j2',
|
||||
title='Corpus analysis'
|
||||
title='Corpus Analysis'
|
||||
)
|
||||
|
@@ -10,7 +10,7 @@ file-setup-pipeline:
|
||||
tesseract-ocr-pipeline:
|
||||
name: 'Tesseract OCR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.1'
|
||||
latest_version: '0.1.2'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
@@ -23,6 +23,12 @@ tesseract-ocr-pipeline:
|
||||
- 'ocropus_nlbin_threshold'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
|
||||
0.1.2:
|
||||
methods:
|
||||
- 'binarization'
|
||||
- 'ocropus_nlbin_threshold'
|
||||
publishing_year: 2023
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
|
||||
transkribus-htr-pipeline:
|
||||
name: 'Transkribus HTR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
@@ -41,7 +47,7 @@ transkribus-htr-pipeline:
|
||||
spacy-nlp-pipeline:
|
||||
name: 'SpaCy NLP Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.2'
|
||||
latest_version: '0.1.1'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
@@ -53,8 +59,3 @@ spacy-nlp-pipeline:
|
||||
- 'encoding_detection'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1'
|
||||
0.1.2:
|
||||
methods:
|
||||
- 'encoding_detection'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'
|
||||
|
@@ -1,5 +1,18 @@
|
||||
from flask import Blueprint
|
||||
from flask_login import login_required
|
||||
|
||||
|
||||
bp = Blueprint('settings', __name__)
|
||||
from . import routes # noqa
|
||||
|
||||
|
||||
@bp.before_request
|
||||
@login_required
|
||||
def before_request():
|
||||
'''
|
||||
Ensures that the routes in this package can only be visited by users that
|
||||
are logged in.
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
from . import routes
|
||||
|
@@ -1,43 +0,0 @@
|
||||
from flask_wtf import FlaskForm
|
||||
from wtforms import PasswordField, SelectField, SubmitField, ValidationError
|
||||
from wtforms.validators import DataRequired, EqualTo
|
||||
from app.models import UserSettingJobStatusMailNotificationLevel
|
||||
|
||||
|
||||
class ChangePasswordForm(FlaskForm):
|
||||
password = PasswordField('Old password', validators=[DataRequired()])
|
||||
new_password = PasswordField(
|
||||
'New password',
|
||||
validators=[
|
||||
DataRequired(),
|
||||
EqualTo('new_password_2', message='Passwords must match')
|
||||
]
|
||||
)
|
||||
new_password_2 = PasswordField(
|
||||
'New password confirmation',
|
||||
validators=[
|
||||
DataRequired(),
|
||||
EqualTo('new_password', message='Passwords must match')
|
||||
]
|
||||
)
|
||||
submit = SubmitField()
|
||||
|
||||
def __init__(self, user, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.user = user
|
||||
|
||||
def validate_current_password(self, field):
|
||||
if not self.user.verify_password(field.data):
|
||||
raise ValidationError('Invalid password')
|
||||
|
||||
|
||||
class EditNotificationSettingsForm(FlaskForm):
|
||||
job_status_mail_notification_level = SelectField(
|
||||
'Job status mail notification level',
|
||||
choices=[
|
||||
(x.name, x.name.capitalize())
|
||||
for x in UserSettingJobStatusMailNotificationLevel
|
||||
],
|
||||
validators=[DataRequired()]
|
||||
)
|
||||
submit = SubmitField()
|
@@ -1,39 +1,12 @@
|
||||
from flask import flash, redirect, render_template, url_for
|
||||
from flask_login import current_user, login_required
|
||||
from app import db
|
||||
from app.models import UserSettingJobStatusMailNotificationLevel
|
||||
from flask import g, url_for
|
||||
from flask_breadcrumbs import register_breadcrumb
|
||||
from flask_login import current_user
|
||||
from app.users.settings.routes import settings as settings_route
|
||||
from . import bp
|
||||
from .forms import ChangePasswordForm, EditNotificationSettingsForm
|
||||
|
||||
|
||||
@bp.route('', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
@bp.route('/settings', methods=['GET', 'POST'])
|
||||
@register_breadcrumb(bp, '.', '<i class="material-icons left">settings</i>Settings')
|
||||
def settings():
|
||||
change_password_form = ChangePasswordForm(
|
||||
current_user,
|
||||
prefix='change-password-form'
|
||||
)
|
||||
edit_notification_settings_form = EditNotificationSettingsForm(
|
||||
data=current_user.to_json_serializeable(),
|
||||
prefix='edit-notification-settings-form'
|
||||
)
|
||||
# region handle change_password_form POST
|
||||
if change_password_form.submit.data and change_password_form.validate():
|
||||
current_user.password = change_password_form.new_password.data
|
||||
db.session.commit()
|
||||
flash('Your changes have been saved')
|
||||
return redirect(url_for('.settings'))
|
||||
# endregion handle change_password_form POST
|
||||
# region handle edit_notification_settings_form POST
|
||||
if edit_notification_settings_form.submit and edit_notification_settings_form.validate():
|
||||
current_user.setting_job_status_mail_notification_level = edit_notification_settings_form.job_status_mail_notification_level.data
|
||||
db.session.commit()
|
||||
flash('Your changes have been saved')
|
||||
return redirect(url_for('.settings'))
|
||||
# endregion handle edit_notification_settings_form POST
|
||||
return render_template(
|
||||
'settings/settings.html.j2',
|
||||
change_password_form=change_password_form,
|
||||
edit_notification_settings_form=edit_notification_settings_form,
|
||||
title='Settings'
|
||||
)
|
||||
g._nopaque_redirect_location_on_post = url_for('.settings')
|
||||
return settings_route(current_user.id)
|
||||
|
@@ -22,6 +22,11 @@ $color: (
|
||||
"surface": #ffffff,
|
||||
"error": #b00020
|
||||
),
|
||||
"social-area": (
|
||||
"base": #d6ae86,
|
||||
"darken": #C98536,
|
||||
"lighten": #EAE2DB
|
||||
),
|
||||
"service": (
|
||||
"corpus-analysis": (
|
||||
"base": #aa9cc9,
|
||||
@@ -108,6 +113,16 @@ $color: (
|
||||
}
|
||||
}
|
||||
|
||||
@each $key, $color-code in map-get($color, "social-area") {
|
||||
.social-area-color-#{$key} {
|
||||
background-color: $color-code !important;
|
||||
}
|
||||
|
||||
.social-area-color-border-#{$key} {
|
||||
border-color: $color-code !important;
|
||||
}
|
||||
}
|
||||
|
||||
@each $service-name, $color-palette in map-get($color, "service") {
|
||||
.service-color[data-service="#{$service-name}"] {
|
||||
background-color: map-get($color-palette, "base") !important;
|
||||
|
@@ -1,3 +1,8 @@
|
||||
.parallax-container .parallax {
|
||||
z-index: 0;
|
||||
}
|
||||
|
||||
.autocomplete-content {
|
||||
width: 100% !important;
|
||||
left: 0 !important;
|
||||
}
|
||||
|
@@ -1,132 +1,108 @@
|
||||
.modal-conent {
|
||||
#corpus-analysis-concordance-query-builder-input-field {
|
||||
border-bottom: #9E9E9E 1px solid;
|
||||
min-height: 38px;
|
||||
margin-top: 23px;
|
||||
}
|
||||
|
||||
#corpus-analysis-concordance-query-builder-input-field-placeholder {
|
||||
color: #9E9E9E;
|
||||
}
|
||||
|
||||
.modal-content {
|
||||
overflow-x: hidden;
|
||||
}
|
||||
|
||||
#concordance-query-builder {
|
||||
#corpus-analysis-concordance-positional-attr-modal, #corpus-analysis-concordance-corpus-analysis-concordance-structural-attr-modal {
|
||||
width: 70%;
|
||||
}
|
||||
|
||||
#concordance-query-builder nav {
|
||||
background-color: #6B3F89;
|
||||
margin-top: -25px;
|
||||
margin-left: -25px;
|
||||
width: 105%;
|
||||
}
|
||||
|
||||
#query-builder-nav{
|
||||
padding-left: 15px;
|
||||
}
|
||||
|
||||
#close-query-builder {
|
||||
margin-right: 50px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#general-options-query-builder-tutorial-info-icon {
|
||||
#corpus-analysis-concordance-general-options-query-builder-tutorial-info-icon {
|
||||
color: black;
|
||||
}
|
||||
|
||||
#your-query {
|
||||
border-bottom-style: solid;
|
||||
border-bottom-width: 1px;
|
||||
}
|
||||
|
||||
#insert-query-button {
|
||||
#corpus-analysis-concordance-insert-query-button {
|
||||
background-color: #00426f;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
#structural-attr h6 {
|
||||
margin-left: 15px;
|
||||
}
|
||||
|
||||
#add-structural-attribute-tutorial-info-icon {
|
||||
color: black;
|
||||
}
|
||||
|
||||
#sentence {
|
||||
background-color:#FD9720;
|
||||
}
|
||||
|
||||
#entity {
|
||||
background-color: #A6E22D;
|
||||
}
|
||||
|
||||
#text-annotation {
|
||||
background-color: #2FBBAB;
|
||||
}
|
||||
|
||||
#no-value-metadata-message {
|
||||
padding-top: 25px;
|
||||
margin-left: -20px;
|
||||
}
|
||||
|
||||
#token-kind-selector {
|
||||
.attr-modal-header {
|
||||
background-color: #f2eff7;
|
||||
padding: 15px;
|
||||
border-top-style: solid;
|
||||
border-color: #6B3F89;
|
||||
padding-left: 25px;
|
||||
border-top: 10px solid #6B3F89;
|
||||
margin-left: -24px;
|
||||
margin-top: -24px;
|
||||
margin-right: -24px;
|
||||
}
|
||||
|
||||
#token-kind-selector.s5 {
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
#token-kind-selector h6 {
|
||||
.attr-modal-header h6 {
|
||||
margin-left: 15px;
|
||||
}
|
||||
|
||||
#token-tutorial-info-icon {
|
||||
#corpus-analysis-concordance-add-structural-attribute-tutorial-info-icon {
|
||||
color: black;
|
||||
}
|
||||
|
||||
#no-value-message {
|
||||
[data-structural-attr-modal-action-button="sentence"]{
|
||||
background-color:#FD9720 !important;
|
||||
}
|
||||
|
||||
[data-structural-attr-modal-action-button="entity"]{
|
||||
background-color: #A6E22D !important;
|
||||
}
|
||||
|
||||
[data-structural-attr-modal-action-button="meta-data"]{
|
||||
background-color: #2FBBAB !important;
|
||||
}
|
||||
|
||||
#corpus-analysis-concordance-no-value-metadata-message {
|
||||
padding-top: 25px;
|
||||
margin-left: -20px;
|
||||
}
|
||||
|
||||
#token-edit-options h6 {
|
||||
margin-left: 15px;
|
||||
.attr-modal-header.input-field {
|
||||
margin-left: 41px;
|
||||
}
|
||||
|
||||
#edit-options-tutorial-info-icon {
|
||||
#corpus-analysis-concordance-token-attr {
|
||||
margin-left: 41px;
|
||||
}
|
||||
|
||||
#corpus-analysis-concordance-token-tutorial-info-icon {
|
||||
color: black;
|
||||
}
|
||||
|
||||
#incidence-modifiers-button a{
|
||||
background-color: #2FBBAB;
|
||||
#corpus-analysis-concordance-no-value-message {
|
||||
padding-top: 25px;
|
||||
margin-left: -20px;
|
||||
}
|
||||
|
||||
#incidence-modifiers a{
|
||||
background-color: white;
|
||||
#corpus-analysis-concordance-token-edit-options h6 {
|
||||
margin-left: 15px;
|
||||
}
|
||||
|
||||
#ignore-case {
|
||||
margin-left: 5px;
|
||||
#corpus-analysis-concordance-edit-options-tutorial-info-icon {
|
||||
color: black;
|
||||
}
|
||||
|
||||
#or, #and {
|
||||
background-color: #fc0;
|
||||
[data-toggle-area="input-field-options"] a {
|
||||
margin-right: 10px;
|
||||
}
|
||||
|
||||
#betweenNM {
|
||||
width: 60%;
|
||||
[data-target="corpus-analysis-concordance-character-incidence-modifiers-dropdown"], [data-target="corpus-analysis-concordance-token-incidence-modifiers-dropdown"] {
|
||||
background-color: #2FBBAB !important;
|
||||
}
|
||||
|
||||
#query-builder-tutorial-modal {
|
||||
width: 60%;
|
||||
#corpus-analysis-concordance-exactly-n-token-modal, #corpus-analysis-concordance-between-nm-token-modal {
|
||||
width: 30%;
|
||||
}
|
||||
|
||||
#query-builder-tutorial-modal ul {
|
||||
margin-top: 10px;
|
||||
[data-modal-id="corpus-analysis-concordance-exactly-n-token-modal"], [data-modal-id="corpus-analysis-concordance-between-nm-token-modal"] {
|
||||
margin-top: 15px !important;
|
||||
}
|
||||
|
||||
#query-builder-tutorial {
|
||||
padding:15px;
|
||||
}
|
||||
|
||||
#scroll-up-button-query-builder-tutorial {
|
||||
background-color: #28B3D1;
|
||||
[data-options-action="and"], [data-options-action="or"] {
|
||||
background-color: #fc0 !important;
|
||||
}
|
||||
|
||||
[data-type="start-sentence"], [data-type="end-sentence"] {
|
||||
@@ -134,13 +110,18 @@
|
||||
}
|
||||
|
||||
[data-type="start-empty-entity"], [data-type="start-entity"], [data-type="end-entity"] {
|
||||
background-color: #A6E22D;
|
||||
background-color: #a6e22d;
|
||||
}
|
||||
|
||||
[data-type="start-text-annotation"]{
|
||||
[data-type="text-annotation"]{
|
||||
background-color: #2FBBAB;
|
||||
}
|
||||
|
||||
[data-type="token"] {
|
||||
background-color: #28B3D1;
|
||||
}
|
||||
|
||||
[data-type="token-incidence-modifier"] {
|
||||
background-color: #4db6ac;
|
||||
color: white;
|
||||
}
|
||||
|
@@ -19,6 +19,10 @@
|
||||
height: 30px !important;
|
||||
}
|
||||
|
||||
#manual-modal .manual-chapter-title {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.show-if-only-child:not(:only-child) {
|
||||
display: none !important;
|
||||
}
|
||||
|
Before Width: | Height: | Size: 222 KiB After Width: | Height: | Size: 123 KiB |
Before Width: | Height: | Size: 378 KiB After Width: | Height: | Size: 402 KiB |
BIN
app/static/images/manual/query_builder/editing_chips.gif
Normal file
After Width: | Height: | Size: 720 KiB |
Before Width: | Height: | Size: 854 KiB After Width: | Height: | Size: 589 KiB |
BIN
app/static/images/manual/query_builder/expert_mode.gif
Normal file
After Width: | Height: | Size: 436 KiB |
BIN
app/static/images/manual/query_builder/incidence_modifier.gif
Normal file
After Width: | Height: | Size: 189 KiB |
Before Width: | Height: | Size: 511 KiB After Width: | Height: | Size: 381 KiB |
Before Width: | Height: | Size: 1009 KiB After Width: | Height: | Size: 759 KiB |
Before Width: | Height: | Size: 903 KiB After Width: | Height: | Size: 750 KiB |
Before Width: | Height: | Size: 413 KiB After Width: | Height: | Size: 524 KiB |
BIN
app/static/images/nopaque_slogan_transparent.png
Normal file
After Width: | Height: | Size: 23 KiB |
Before Width: | Height: | Size: 2.0 KiB After Width: | Height: | Size: 34 KiB |
After Width: | Height: | Size: 160 KiB |
BIN
app/static/images/workshops/fgho_sommerschule_2023/corpus.png
Normal file
After Width: | Height: | Size: 182 KiB |