mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-10-26 08:21:14 +00:00 
			
		
		
		
	Compare commits
	
		
			33 Commits
		
	
	
		
			manual
			...
			82d6f6003f
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 82d6f6003f | ||
|  | 9da74c1c6f | ||
|  | ec23bd94ee | ||
|  | 55a62053b0 | ||
|  | a1e5bd61e0 | ||
|  | cf8c164d60 | ||
|  | 05ab204e5a | ||
|  | 9f188afd16 | ||
|  | dc77ac7b76 | ||
|  | 84276af322 | ||
|  | d9d4067536 | ||
|  | ba65cf5911 | ||
|  | 69a1edc51e | ||
|  | 32ad8c7359 | ||
|  | 8c0843d2d0 | ||
|  | d4c9ab5821 | ||
|  | 518a245133 | ||
|  | b6864b355a | ||
|  | 0a45e1bb65 | ||
|  | 08ca938333 | ||
|  | cfdef8d1fa | ||
|  | 9ac626c64d | ||
|  | c9ad538bee | ||
|  | baf70750e8 | ||
|  | 525723818e | ||
|  | 20c0678d3e | ||
|  | c323c53f37 | ||
|  | 2d8cef64e8 | ||
|  | 9b9edf501d | ||
|  | 903310c17f | ||
|  | bc92fd249f | ||
|  | 422415065d | ||
|  | 07ec01ae2e | 
							
								
								
									
										27
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										27
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -4,11 +4,13 @@ FROM python:3.10.13-slim-bookworm | |||||||
| LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>" | LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Set environment variables | ||||||
| ENV LANG="C.UTF-8" | ENV LANG="C.UTF-8" | ||||||
| ENV PYTHONDONTWRITEBYTECODE="1" | ENV PYTHONDONTWRITEBYTECODE="1" | ||||||
| ENV PYTHONUNBUFFERED="1" | ENV PYTHONUNBUFFERED="1" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Install system dependencies | ||||||
| RUN apt-get update \ | RUN apt-get update \ | ||||||
|  && apt-get install --no-install-recommends --yes \ |  && apt-get install --no-install-recommends --yes \ | ||||||
|       build-essential \ |       build-essential \ | ||||||
| @@ -17,37 +19,42 @@ RUN apt-get update \ | |||||||
|  && rm --recursive /var/lib/apt/lists/* |  && rm --recursive /var/lib/apt/lists/* | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Create a non-root user | ||||||
| RUN useradd --create-home --no-log-init nopaque \ | RUN useradd --create-home --no-log-init nopaque \ | ||||||
|  && groupadd docker \ |  && groupadd docker \ | ||||||
|  && usermod --append --groups docker nopaque |  && usermod --append --groups docker nopaque | ||||||
|  |  | ||||||
|  |  | ||||||
| USER nopaque | USER nopaque | ||||||
| WORKDIR /home/nopaque | WORKDIR /home/nopaque | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Create a Python virtual environment | ||||||
| ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv" | ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv" | ||||||
| RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}" | RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}" | ||||||
| ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}" | ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Install Python dependencies | ||||||
|  | COPY --chown=nopaque:nopaque requirements.txt requirements.txt | ||||||
|  | RUN python3 -m pip install --requirement requirements.txt \ | ||||||
|  |  && rm requirements.txt | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Install the application | ||||||
|  | COPY docker-nopaque-entrypoint.sh /usr/local/bin/ | ||||||
|  |  | ||||||
| COPY --chown=nopaque:nopaque app app | COPY --chown=nopaque:nopaque app app | ||||||
| COPY --chown=nopaque:nopaque migrations migrations | COPY --chown=nopaque:nopaque migrations migrations | ||||||
| COPY --chown=nopaque:nopaque tests tests | COPY --chown=nopaque:nopaque tests tests | ||||||
| COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./ | COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./ | ||||||
|  |  | ||||||
|  | RUN mkdir logs | ||||||
| RUN python3 -m pip install --requirement requirements.txt \ |  | ||||||
|  && mkdir logs |  | ||||||
|  |  | ||||||
|  |  | ||||||
| USER root |  | ||||||
|  |  | ||||||
|  |  | ||||||
| COPY docker-nopaque-entrypoint.sh /usr/local/bin/ |  | ||||||
|  |  | ||||||
|  |  | ||||||
| EXPOSE 5000 | EXPOSE 5000 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | USER root | ||||||
|  |  | ||||||
|  |  | ||||||
| ENTRYPOINT ["docker-nopaque-entrypoint.sh"] | ENTRYPOINT ["docker-nopaque-entrypoint.sh"] | ||||||
|   | |||||||
| @@ -57,6 +57,9 @@ def create_app(config: Config = Config) -> Flask: | |||||||
|     scheduler.init_app(app) |     scheduler.init_app(app) | ||||||
|     socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI'])  # noqa |     socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI'])  # noqa | ||||||
|  |  | ||||||
|  |     from .models.event_listeners import register_event_listeners | ||||||
|  |     register_event_listeners() | ||||||
|  |  | ||||||
|     from .admin import bp as admin_blueprint |     from .admin import bp as admin_blueprint | ||||||
|     default_breadcrumb_root(admin_blueprint, '.admin') |     default_breadcrumb_root(admin_blueprint, '.admin') | ||||||
|     app.register_blueprint(admin_blueprint, url_prefix='/admin') |     app.register_blueprint(admin_blueprint, url_prefix='/admin') | ||||||
| @@ -99,7 +102,7 @@ def create_app(config: Config = Config) -> Flask: | |||||||
|  |  | ||||||
|     from .users import bp as users_blueprint |     from .users import bp as users_blueprint | ||||||
|     default_breadcrumb_root(users_blueprint, '.users') |     default_breadcrumb_root(users_blueprint, '.users') | ||||||
|     app.register_blueprint(users_blueprint, url_prefix='/users') |     app.register_blueprint(users_blueprint, cli_group='user', url_prefix='/users') | ||||||
|  |  | ||||||
|     from .workshops import bp as workshops_blueprint |     from .workshops import bp as workshops_blueprint | ||||||
|     app.register_blueprint(workshops_blueprint, url_prefix='/workshops') |     app.register_blueprint(workshops_blueprint, url_prefix='/workshops') | ||||||
|   | |||||||
| @@ -16,8 +16,8 @@ class CreateSpaCyNLPPipelineModelForm(ContributionBaseForm): | |||||||
|     ) |     ) | ||||||
|  |  | ||||||
|     def validate_spacy_model_file(self, field): |     def validate_spacy_model_file(self, field): | ||||||
|         if not field.data.filename.lower().endswith('.tar.gz'): |         if not field.data.filename.lower().endswith(('.tar.gz', ('.whl'))): | ||||||
|             raise ValidationError('.tar.gz files only!') |             raise ValidationError('.tar.gz or .whl files only!') | ||||||
|  |  | ||||||
|     def __init__(self, *args, **kwargs): |     def __init__(self, *args, **kwargs): | ||||||
|         if 'prefix' not in kwargs: |         if 'prefix' not in kwargs: | ||||||
|   | |||||||
| @@ -2,32 +2,34 @@ from flask import current_app | |||||||
| from app import db | from app import db | ||||||
| from app.models import User, Corpus, CorpusFile | from app.models import User, Corpus, CorpusFile | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
|  | from pathlib import Path | ||||||
|  | from typing import Dict, List | ||||||
| import json | import json | ||||||
| import os |  | ||||||
| import shutil | import shutil | ||||||
|  |  | ||||||
|  |  | ||||||
| class SandpaperConverter: | class SandpaperConverter: | ||||||
|     def __init__(self, json_db_file, data_dir): |     def __init__(self, json_db_file: Path, data_dir: Path): | ||||||
|         self.json_db_file = json_db_file |         self.json_db_file = json_db_file | ||||||
|         self.data_dir = data_dir |         self.data_dir = data_dir | ||||||
|  |  | ||||||
|     def run(self): |     def run(self): | ||||||
|         with open(self.json_db_file, 'r') as f: |         with self.json_db_file.open('r') as f: | ||||||
|             json_db = json.loads(f.read()) |             json_db: List[Dict] = json.load(f) | ||||||
|  |  | ||||||
|         for json_user in json_db: |         for json_user in json_db: | ||||||
|             if not json_user['confirmed']: |             if not json_user['confirmed']: | ||||||
|                 current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}') |                 current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}') | ||||||
|                 continue |                 continue | ||||||
|             user_dir = os.path.join(self.data_dir, str(json_user['id'])) |             user_dir = self.data_dir / f'{json_user["id"]}' | ||||||
|             self.convert_user(json_user, user_dir) |             self.convert_user(json_user, user_dir) | ||||||
|             db.session.commit() |             db.session.commit() | ||||||
|  |  | ||||||
|  |  | ||||||
|     def convert_user(self, json_user, user_dir): |     def convert_user(self, json_user: Dict, user_dir: Path): | ||||||
|         current_app.logger.info(f'Create User {json_user["username"]}...') |         current_app.logger.info(f'Create User {json_user["username"]}...') | ||||||
|         user = User( |         try: | ||||||
|  |             user = User.create( | ||||||
|                 confirmed=json_user['confirmed'], |                 confirmed=json_user['confirmed'], | ||||||
|                 email=json_user['email'], |                 email=json_user['email'], | ||||||
|                 last_seen=datetime.fromtimestamp(json_user['last_seen']), |                 last_seen=datetime.fromtimestamp(json_user['last_seen']), | ||||||
| @@ -35,47 +37,34 @@ class SandpaperConverter: | |||||||
|                 password_hash=json_user['password_hash'],  # TODO: Needs to be added manually |                 password_hash=json_user['password_hash'],  # TODO: Needs to be added manually | ||||||
|                 username=json_user['username'] |                 username=json_user['username'] | ||||||
|             ) |             ) | ||||||
|         db.session.add(user) |         except OSError: | ||||||
|         db.session.flush(objects=[user]) |  | ||||||
|         db.session.refresh(user) |  | ||||||
|         try: |  | ||||||
|             user.makedirs() |  | ||||||
|         except OSError as e: |  | ||||||
|             current_app.logger.error(e) |  | ||||||
|             db.session.rollback() |  | ||||||
|             raise Exception('Internal Server Error') |             raise Exception('Internal Server Error') | ||||||
|         for json_corpus in json_user['corpora'].values(): |         for json_corpus in json_user['corpora'].values(): | ||||||
|             if not json_corpus['files'].values(): |             if not json_corpus['files'].values(): | ||||||
|                 current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}') |                 current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}') | ||||||
|                 continue |                 continue | ||||||
|             corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id'])) |             corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}' | ||||||
|             self.convert_corpus(json_corpus, user, corpus_dir) |             self.convert_corpus(json_corpus, user, corpus_dir) | ||||||
|         current_app.logger.info('Done') |         current_app.logger.info('Done') | ||||||
|  |  | ||||||
|  |  | ||||||
|     def convert_corpus(self, json_corpus, user, corpus_dir): |     def convert_corpus(self, json_corpus: Dict, user: User, corpus_dir: Path): | ||||||
|         current_app.logger.info(f'Create Corpus {json_corpus["title"]}...') |         current_app.logger.info(f'Create Corpus {json_corpus["title"]}...') | ||||||
|         corpus = Corpus( |         try: | ||||||
|  |             corpus = Corpus.create( | ||||||
|                 user=user, |                 user=user, | ||||||
|                 creation_date=datetime.fromtimestamp(json_corpus['creation_date']), |                 creation_date=datetime.fromtimestamp(json_corpus['creation_date']), | ||||||
|                 description=json_corpus['description'], |                 description=json_corpus['description'], | ||||||
|                 title=json_corpus['title'] |                 title=json_corpus['title'] | ||||||
|             ) |             ) | ||||||
|         db.session.add(corpus) |         except OSError: | ||||||
|         db.session.flush(objects=[corpus]) |  | ||||||
|         db.session.refresh(corpus) |  | ||||||
|         try: |  | ||||||
|             corpus.makedirs() |  | ||||||
|         except OSError as e: |  | ||||||
|             current_app.logger.error(e) |  | ||||||
|             db.session.rollback() |  | ||||||
|             raise Exception('Internal Server Error') |             raise Exception('Internal Server Error') | ||||||
|         for json_corpus_file in json_corpus['files'].values(): |         for json_corpus_file in json_corpus['files'].values(): | ||||||
|             self.convert_corpus_file(json_corpus_file, corpus, corpus_dir) |             self.convert_corpus_file(json_corpus_file, corpus, corpus_dir) | ||||||
|         current_app.logger.info('Done') |         current_app.logger.info('Done') | ||||||
|  |  | ||||||
|  |  | ||||||
|     def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir): |     def convert_corpus_file(self, json_corpus_file: Dict, corpus: Corpus, corpus_dir: Path): | ||||||
|         current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...') |         current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...') | ||||||
|         corpus_file = CorpusFile( |         corpus_file = CorpusFile( | ||||||
|             corpus=corpus, |             corpus=corpus, | ||||||
| @@ -99,13 +88,13 @@ class SandpaperConverter: | |||||||
|         db.session.refresh(corpus_file) |         db.session.refresh(corpus_file) | ||||||
|         try: |         try: | ||||||
|             shutil.copy2( |             shutil.copy2( | ||||||
|                 os.path.join(corpus_dir, json_corpus_file['filename']), |                 corpus_dir / json_corpus_file['filename'], | ||||||
|                 corpus_file.path |                 corpus_file.path | ||||||
|             ) |             ) | ||||||
|         except: |         except: | ||||||
|             current_app.logger.warning( |             current_app.logger.warning( | ||||||
|                 'Can not convert corpus file: ' |                 'Can not convert corpus file: ' | ||||||
|                 f'{os.path.join(corpus_dir, json_corpus_file["filename"])}' |                 f'{corpus_dir / json_corpus_file["filename"]}' | ||||||
|                 ' -> ' |                 ' -> ' | ||||||
|                 f'{corpus_file.path}' |                 f'{corpus_file.path}' | ||||||
|             ) |             ) | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| from app.models import Corpus, CorpusStatus | from flask import current_app | ||||||
| import os |  | ||||||
| import shutil | import shutil | ||||||
| from app import db | from app import db | ||||||
|  | from app.models import Corpus, CorpusStatus | ||||||
| from . import bp | from . import bp | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -18,10 +18,17 @@ def reset(): | |||||||
|     ] |     ] | ||||||
|     for corpus in [x for x in Corpus.query.all() if x.status in status]: |     for corpus in [x for x in Corpus.query.all() if x.status in status]: | ||||||
|         print(f'Resetting corpus {corpus}') |         print(f'Resetting corpus {corpus}') | ||||||
|         shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True) |         corpus_cwb_dir = corpus.path / 'cwb' | ||||||
|         os.mkdir(os.path.join(corpus.path, 'cwb')) |         corpus_cwb_data_dir = corpus_cwb_dir / 'data' | ||||||
|         os.mkdir(os.path.join(corpus.path, 'cwb', 'data')) |         corpus_cwb_registry_dir = corpus_cwb_dir / 'registry' | ||||||
|         os.mkdir(os.path.join(corpus.path, 'cwb', 'registry')) |         try: | ||||||
|  |             shutil.rmtree(corpus.path / 'cwb', ignore_errors=True) | ||||||
|  |             corpus_cwb_dir.mkdir() | ||||||
|  |             corpus_cwb_data_dir.mkdir() | ||||||
|  |             corpus_cwb_registry_dir.mkdir() | ||||||
|  |         except OSError as e: | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             raise | ||||||
|         corpus.status = CorpusStatus.UNPREPARED |         corpus.status = CorpusStatus.UNPREPARED | ||||||
|         corpus.num_analysis_sessions = 0 |         corpus.num_analysis_sessions = 0 | ||||||
|     db.session.commit() |     db.session.commit() | ||||||
|   | |||||||
| @@ -12,7 +12,6 @@ from typing import Dict, List | |||||||
| import gzip | import gzip | ||||||
| import json | import json | ||||||
| import math | import math | ||||||
| import os |  | ||||||
| from app import db | from app import db | ||||||
| from app.models import Corpus | from app.models import Corpus | ||||||
| from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus | from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus | ||||||
| @@ -42,9 +41,9 @@ def ext_corpus_static_data(corpus: str) -> Dict: | |||||||
|     db_corpus_id: int = session['cqi_over_sio']['db_corpus_id'] |     db_corpus_id: int = session['cqi_over_sio']['db_corpus_id'] | ||||||
|     db_corpus: Corpus = Corpus.query.get(db_corpus_id) |     db_corpus: Corpus = Corpus.query.get(db_corpus_id) | ||||||
|  |  | ||||||
|     static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz') |     static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz' | ||||||
|     if os.path.exists(static_data_file_path): |     if static_data_file_path.exists(): | ||||||
|         with open(static_data_file_path, 'rb') as f: |         with static_data_file_path.open('rb') as f: | ||||||
|             return f.read() |             return f.read() | ||||||
|  |  | ||||||
|     cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] |     cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] | ||||||
|   | |||||||
| @@ -7,7 +7,6 @@ from flask import ( | |||||||
|     url_for |     url_for | ||||||
| ) | ) | ||||||
| from flask_breadcrumbs import register_breadcrumb | from flask_breadcrumbs import register_breadcrumb | ||||||
| import os |  | ||||||
| from app import db | from app import db | ||||||
| from app.models import Corpus, CorpusFile, CorpusStatus | from app.models import Corpus, CorpusFile, CorpusStatus | ||||||
| from ..decorators import corpus_follower_permission_required | from ..decorators import corpus_follower_permission_required | ||||||
| @@ -92,8 +91,8 @@ def corpus_file(corpus_id, corpus_file_id): | |||||||
| def download_corpus_file(corpus_id, corpus_file_id): | def download_corpus_file(corpus_id, corpus_file_id): | ||||||
|     corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404() |     corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404() | ||||||
|     return send_from_directory( |     return send_from_directory( | ||||||
|         os.path.dirname(corpus_file.path), |         corpus_file.path.parent, | ||||||
|         os.path.basename(corpus_file.path), |         corpus_file.path.name, | ||||||
|         as_attachment=True, |         as_attachment=True, | ||||||
|         attachment_filename=corpus_file.filename, |         attachment_filename=corpus_file.filename, | ||||||
|         mimetype=corpus_file.mimetype |         mimetype=corpus_file.mimetype | ||||||
|   | |||||||
| @@ -97,14 +97,14 @@ def analysis(corpus_id): | |||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  |  | ||||||
| # @bp.route('/<hashid:corpus_id>/follow/<token>') | @bp.route('/<hashid:corpus_id>/follow/<token>') | ||||||
| # def follow_corpus(corpus_id, token): | def follow_corpus(corpus_id, token): | ||||||
| #     corpus = Corpus.query.get_or_404(corpus_id) |     corpus = Corpus.query.get_or_404(corpus_id) | ||||||
| #     if current_user.follow_corpus_by_token(token): |     if current_user.follow_corpus_by_token(token): | ||||||
| #         db.session.commit() |         db.session.commit() | ||||||
| #         flash(f'You are following "{corpus.title}" now', category='corpus') |         flash(f'You are following "{corpus.title}" now', category='corpus') | ||||||
| #         return redirect(url_for('corpora.corpus', corpus_id=corpus_id)) |         return redirect(url_for('corpora.corpus', corpus_id=corpus_id)) | ||||||
| #     abort(403) |     abort(403) | ||||||
|  |  | ||||||
|  |  | ||||||
| @bp.route('/import', methods=['GET', 'POST']) | @bp.route('/import', methods=['GET', 'POST']) | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								app/ext/flask_sqlalchemy/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								app/ext/flask_sqlalchemy/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | |||||||
|  | from .container_column import ContainerColumn | ||||||
|  | from .int_enum_column import IntEnumColumn | ||||||
							
								
								
									
										21
									
								
								app/ext/flask_sqlalchemy/container_column.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								app/ext/flask_sqlalchemy/container_column.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,21 @@ | |||||||
|  | import json | ||||||
|  | from app import db | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ContainerColumn(db.TypeDecorator): | ||||||
|  |     impl = db.String | ||||||
|  |  | ||||||
|  |     def __init__(self, container_type, *args, **kwargs): | ||||||
|  |         super().__init__(*args, **kwargs) | ||||||
|  |         self.container_type = container_type | ||||||
|  |  | ||||||
|  |     def process_bind_param(self, value, dialect): | ||||||
|  |         if isinstance(value, self.container_type): | ||||||
|  |             return json.dumps(value) | ||||||
|  |         elif isinstance(value, str) and isinstance(json.loads(value), self.container_type): | ||||||
|  |             return value | ||||||
|  |         else: | ||||||
|  |             return TypeError() | ||||||
|  |  | ||||||
|  |     def process_result_value(self, value, dialect): | ||||||
|  |         return json.loads(value) | ||||||
							
								
								
									
										22
									
								
								app/ext/flask_sqlalchemy/int_enum_column.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								app/ext/flask_sqlalchemy/int_enum_column.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | |||||||
|  | from app import db | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class IntEnumColumn(db.TypeDecorator): | ||||||
|  |     impl = db.Integer | ||||||
|  |  | ||||||
|  |     def __init__(self, enum_type, *args, **kwargs): | ||||||
|  |         super().__init__(*args, **kwargs) | ||||||
|  |         self.enum_type = enum_type | ||||||
|  |  | ||||||
|  |     def process_bind_param(self, value, dialect): | ||||||
|  |         if isinstance(value, self.enum_type) and isinstance(value.value, int): | ||||||
|  |             return value.value | ||||||
|  |         elif isinstance(value, int): | ||||||
|  |             return self.enum_type(value).value | ||||||
|  |         elif isinstance(value, str): | ||||||
|  |             return self.enum_type[value].value | ||||||
|  |         else: | ||||||
|  |             return TypeError() | ||||||
|  |  | ||||||
|  |     def process_result_value(self, value, dialect): | ||||||
|  |         return self.enum_type(value) | ||||||
| @@ -1,7 +1,6 @@ | |||||||
| from flask import abort, current_app | from flask import abort, current_app | ||||||
| from flask_login import current_user | from flask_login import current_user | ||||||
| from threading import Thread | from threading import Thread | ||||||
| import os |  | ||||||
| from app import db | from app import db | ||||||
| from app.decorators import admin_required, content_negotiation | from app.decorators import admin_required, content_negotiation | ||||||
| from app.models import Job, JobStatus | from app.models import Job, JobStatus | ||||||
| @@ -39,7 +38,7 @@ def job_log(job_id): | |||||||
|     if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: |     if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: | ||||||
|         response = {'errors': {'message': 'Job status is not completed or failed'}} |         response = {'errors': {'message': 'Job status is not completed or failed'}} | ||||||
|         return response, 409 |         return response, 409 | ||||||
|     with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file: |     with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file: | ||||||
|         log = log_file.read() |         log = log_file.read() | ||||||
|     response_data = { |     response_data = { | ||||||
|         'jobLog': log |         'jobLog': log | ||||||
|   | |||||||
| @@ -7,7 +7,6 @@ from flask import ( | |||||||
| ) | ) | ||||||
| from flask_breadcrumbs import register_breadcrumb | from flask_breadcrumbs import register_breadcrumb | ||||||
| from flask_login import current_user | from flask_login import current_user | ||||||
| import os |  | ||||||
| from app.models import Job, JobInput, JobResult | from app.models import Job, JobInput, JobResult | ||||||
| from . import bp | from . import bp | ||||||
| from .utils import job_dynamic_list_constructor as job_dlc | from .utils import job_dynamic_list_constructor as job_dlc | ||||||
| @@ -38,8 +37,8 @@ def download_job_input(job_id, job_input_id): | |||||||
|     if not (job_input.job.user == current_user or current_user.is_administrator()): |     if not (job_input.job.user == current_user or current_user.is_administrator()): | ||||||
|         abort(403) |         abort(403) | ||||||
|     return send_from_directory( |     return send_from_directory( | ||||||
|         os.path.dirname(job_input.path), |         job_input.path.parent, | ||||||
|         os.path.basename(job_input.path), |         job_input.path.name, | ||||||
|         as_attachment=True, |         as_attachment=True, | ||||||
|         attachment_filename=job_input.filename, |         attachment_filename=job_input.filename, | ||||||
|         mimetype=job_input.mimetype |         mimetype=job_input.mimetype | ||||||
| @@ -52,8 +51,8 @@ def download_job_result(job_id, job_result_id): | |||||||
|     if not (job_result.job.user == current_user or current_user.is_administrator()): |     if not (job_result.job.user == current_user or current_user.is_administrator()): | ||||||
|         abort(403) |         abort(403) | ||||||
|     return send_from_directory( |     return send_from_directory( | ||||||
|         os.path.dirname(job_result.path), |         job_result.path.parent, | ||||||
|         os.path.basename(job_result.path), |         job_result.path.name, | ||||||
|         as_attachment=True, |         as_attachment=True, | ||||||
|         attachment_filename=job_result.filename, |         attachment_filename=job_result.filename, | ||||||
|         mimetype=job_result.mimetype |         mimetype=job_result.mimetype | ||||||
|   | |||||||
| @@ -1,6 +1,7 @@ | |||||||
| from flask import current_app | from flask import current_app | ||||||
| from flask_migrate import upgrade | from flask_migrate import upgrade | ||||||
| import os | from pathlib import Path | ||||||
|  | from typing import List | ||||||
| from app.models import ( | from app.models import ( | ||||||
|     CorpusFollowerRole, |     CorpusFollowerRole, | ||||||
|     Role, |     Role, | ||||||
| @@ -17,16 +18,15 @@ def deploy(): | |||||||
|     # Make default directories |     # Make default directories | ||||||
|     print('Make default directories') |     print('Make default directories') | ||||||
|     base_dir = current_app.config['NOPAQUE_DATA_DIR'] |     base_dir = current_app.config['NOPAQUE_DATA_DIR'] | ||||||
|     default_dirs = [ |     default_dirs: List[Path] = [ | ||||||
|         os.path.join(base_dir, 'tmp'), |         base_dir / 'tmp', | ||||||
|         os.path.join(base_dir, 'users') |         base_dir / 'users' | ||||||
|     ] |     ] | ||||||
|     for dir in default_dirs: |     for default_dir in default_dirs: | ||||||
|         if os.path.exists(dir): |         if not default_dir.exists(): | ||||||
|             if not os.path.isdir(dir): |             default_dir.mkdir() | ||||||
|                 raise NotADirectoryError(f'{dir} is not a directory') |         if not default_dir.is_dir(): | ||||||
|         else: |             raise NotADirectoryError(f'{default_dir} is not a directory') | ||||||
|             os.mkdir(dir) |  | ||||||
|  |  | ||||||
|     # migrate database to latest revision |     # migrate database to latest revision | ||||||
|     print('Migrate database to latest revision') |     print('Migrate database to latest revision') | ||||||
|   | |||||||
							
								
								
									
										1819
									
								
								app/models.py
									
									
									
									
									
								
							
							
						
						
									
										1819
									
								
								app/models.py
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										19
									
								
								app/models/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								app/models/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | |||||||
|  | from .avatar import * | ||||||
|  | from .corpus_file import * | ||||||
|  | from .corpus_follower_association import * | ||||||
|  | from .corpus_follower_role import * | ||||||
|  | from .corpus import * | ||||||
|  | from .job_input import * | ||||||
|  | from .job_result import * | ||||||
|  | from .job import * | ||||||
|  | from .role import * | ||||||
|  | from .spacy_nlp_pipeline_model import * | ||||||
|  | from .tesseract_ocr_pipeline_model import * | ||||||
|  | from .token import * | ||||||
|  | from .user import * | ||||||
|  | from app import login | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @login.user_loader | ||||||
|  | def load_user(user_id): | ||||||
|  |     return User.query.get(int(user_id)) | ||||||
							
								
								
									
										40
									
								
								app/models/avatar.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								app/models/avatar.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | |||||||
|  | from flask import current_app | ||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from pathlib import Path | ||||||
|  | from app import db | ||||||
|  | from .file_mixin import FileMixin | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Avatar(HashidMixin, FileMixin, db.Model): | ||||||
|  |     __tablename__ = 'avatars' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     user_id = db.Column(db.Integer, db.ForeignKey('users.id')) | ||||||
|  |     # Relationships | ||||||
|  |     user = db.relationship('User', back_populates='avatar') | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def path(self) -> Path: | ||||||
|  |         return self.user.path / 'avatar' | ||||||
|  |         # return os.path.join(self.user.path, 'avatar') | ||||||
|  |  | ||||||
|  |     def delete(self): | ||||||
|  |         try: | ||||||
|  |             self.path.unlink(missing_ok=True) | ||||||
|  |         except OSError as e: | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             raise | ||||||
|  |         db.session.delete(self) | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             **self.file_mixin_to_json_serializeable() | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             json_serializeable['user'] = \ | ||||||
|  |                 self.user.to_json_serializeable(backrefs=True) | ||||||
|  |         if relationships: | ||||||
|  |             pass | ||||||
|  |         return json_serializeable | ||||||
							
								
								
									
										200
									
								
								app/models/corpus.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										200
									
								
								app/models/corpus.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,200 @@ | |||||||
|  | from datetime import datetime | ||||||
|  | from enum import IntEnum | ||||||
|  | from flask import current_app, url_for | ||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from sqlalchemy.ext.associationproxy import association_proxy | ||||||
|  | from typing import Union | ||||||
|  | from pathlib import Path | ||||||
|  | import shutil | ||||||
|  | import xml.etree.ElementTree as ET | ||||||
|  | from app import db | ||||||
|  | from app.converters.vrt import normalize_vrt_file | ||||||
|  | from app.ext.flask_sqlalchemy import IntEnumColumn | ||||||
|  | from .corpus_follower_association import CorpusFollowerAssociation | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class CorpusStatus(IntEnum): | ||||||
|  |     UNPREPARED = 1 | ||||||
|  |     SUBMITTED = 2 | ||||||
|  |     QUEUED = 3 | ||||||
|  |     BUILDING = 4 | ||||||
|  |     BUILT = 5 | ||||||
|  |     FAILED = 6 | ||||||
|  |     STARTING_ANALYSIS_SESSION = 7 | ||||||
|  |     RUNNING_ANALYSIS_SESSION = 8 | ||||||
|  |     CANCELING_ANALYSIS_SESSION = 9 | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def get(corpus_status: Union['CorpusStatus', int, str]) -> 'CorpusStatus': | ||||||
|  |         if isinstance(corpus_status, CorpusStatus): | ||||||
|  |             return corpus_status | ||||||
|  |         if isinstance(corpus_status, int): | ||||||
|  |             return CorpusStatus(corpus_status) | ||||||
|  |         if isinstance(corpus_status, str): | ||||||
|  |             return CorpusStatus[corpus_status] | ||||||
|  |         raise TypeError('corpus_status must be CorpusStatus, int, or str') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Corpus(HashidMixin, db.Model): | ||||||
|  |     ''' | ||||||
|  |     Class to define a corpus. | ||||||
|  |     ''' | ||||||
|  |     __tablename__ = 'corpora' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     user_id = db.Column(db.Integer, db.ForeignKey('users.id')) | ||||||
|  |     # Fields | ||||||
|  |     creation_date = db.Column(db.DateTime(), default=datetime.utcnow) | ||||||
|  |     description = db.Column(db.String(255)) | ||||||
|  |     status = db.Column( | ||||||
|  |         IntEnumColumn(CorpusStatus), | ||||||
|  |         default=CorpusStatus.UNPREPARED | ||||||
|  |     ) | ||||||
|  |     title = db.Column(db.String(32)) | ||||||
|  |     num_analysis_sessions = db.Column(db.Integer, default=0) | ||||||
|  |     num_tokens = db.Column(db.Integer, default=0) | ||||||
|  |     is_public = db.Column(db.Boolean, default=False) | ||||||
|  |     # Relationships | ||||||
|  |     files = db.relationship( | ||||||
|  |         'CorpusFile', | ||||||
|  |         back_populates='corpus', | ||||||
|  |         lazy='dynamic', | ||||||
|  |         cascade='all, delete-orphan' | ||||||
|  |     ) | ||||||
|  |     corpus_follower_associations = db.relationship( | ||||||
|  |         'CorpusFollowerAssociation', | ||||||
|  |         back_populates='corpus', | ||||||
|  |         cascade='all, delete-orphan' | ||||||
|  |     ) | ||||||
|  |     followers = association_proxy( | ||||||
|  |         'corpus_follower_associations', | ||||||
|  |         'follower', | ||||||
|  |         creator=lambda u: CorpusFollowerAssociation(follower=u) | ||||||
|  |     ) | ||||||
|  |     user = db.relationship('User', back_populates='corpora') | ||||||
|  |     # "static" attributes | ||||||
|  |     max_num_tokens = 2_147_483_647 | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         return f'<Corpus {self.title}>' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def analysis_url(self): | ||||||
|  |         return url_for('corpora.analysis', corpus_id=self.id) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def jsonpatch_path(self): | ||||||
|  |         return f'{self.user.jsonpatch_path}/corpora/{self.hashid}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def path(self) -> Path: | ||||||
|  |         return self.user.path / 'corpora' / f'{self.id}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def url(self): | ||||||
|  |         return url_for('corpora.corpus', corpus_id=self.id) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def user_hashid(self): | ||||||
|  |         return self.user.hashid | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def create(**kwargs): | ||||||
|  |         corpus = Corpus(**kwargs) | ||||||
|  |         db.session.add(corpus) | ||||||
|  |         db.session.flush(objects=[corpus]) | ||||||
|  |         db.session.refresh(corpus) | ||||||
|  |         corpus_files_dir = corpus.path / 'files' | ||||||
|  |         corpus_cwb_dir = corpus.path / 'cwb' | ||||||
|  |         corpus_cwb_data_dir = corpus_cwb_dir / 'data' | ||||||
|  |         corpus_cwb_registry_dir = corpus_cwb_dir / 'registry' | ||||||
|  |         try: | ||||||
|  |             corpus.path.mkdir() | ||||||
|  |             corpus_files_dir.mkdir() | ||||||
|  |             corpus_cwb_dir.mkdir() | ||||||
|  |             corpus_cwb_data_dir.mkdir() | ||||||
|  |             corpus_cwb_registry_dir.mkdir() | ||||||
|  |         except OSError as e: | ||||||
|  |             # TODO: Potential leftover cleanup | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             db.session.rollback() | ||||||
|  |             raise | ||||||
|  |         return corpus | ||||||
|  |  | ||||||
|  |     def build(self): | ||||||
|  |         corpus_cwb_dir = self.path / 'cwb' | ||||||
|  |         corpus_cwb_data_dir = corpus_cwb_dir / 'data' | ||||||
|  |         corpus_cwb_registry_dir = corpus_cwb_dir / 'registry' | ||||||
|  |         try: | ||||||
|  |             shutil.rmtree(corpus_cwb_dir, ignore_errors=True) | ||||||
|  |             corpus_cwb_dir.mkdir() | ||||||
|  |             corpus_cwb_data_dir.mkdir() | ||||||
|  |             corpus_cwb_registry_dir.mkdir() | ||||||
|  |         except OSError as e: | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             self.status = CorpusStatus.FAILED | ||||||
|  |             raise | ||||||
|  |         corpus_element = ET.fromstring('<corpus>\n</corpus>') | ||||||
|  |         for corpus_file in self.files: | ||||||
|  |             normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt' | ||||||
|  |             try: | ||||||
|  |                 normalize_vrt_file(corpus_file.path, normalized_vrt_path) | ||||||
|  |             except: | ||||||
|  |                 self.status = CorpusStatus.FAILED | ||||||
|  |                 return | ||||||
|  |             element_tree = ET.parse(normalized_vrt_path) | ||||||
|  |             text_element = element_tree.getroot() | ||||||
|  |             text_element.set('author', corpus_file.author) | ||||||
|  |             text_element.set('title', corpus_file.title) | ||||||
|  |             text_element.set( | ||||||
|  |                 'publishing_year', | ||||||
|  |                 f'{corpus_file.publishing_year}' | ||||||
|  |             ) | ||||||
|  |             text_element.set('address', corpus_file.address or 'NULL') | ||||||
|  |             text_element.set('booktitle', corpus_file.booktitle or 'NULL') | ||||||
|  |             text_element.set('chapter', corpus_file.chapter or 'NULL') | ||||||
|  |             text_element.set('editor', corpus_file.editor or 'NULL') | ||||||
|  |             text_element.set('institution', corpus_file.institution or 'NULL') | ||||||
|  |             text_element.set('journal', corpus_file.journal or 'NULL') | ||||||
|  |             text_element.set('pages', f'{corpus_file.pages}' or 'NULL') | ||||||
|  |             text_element.set('publisher', corpus_file.publisher or 'NULL') | ||||||
|  |             text_element.set('school', corpus_file.school or 'NULL') | ||||||
|  |             text_element.tail = '\n' | ||||||
|  |             # corpus_element.insert(1, text_element) | ||||||
|  |             corpus_element.append(text_element) | ||||||
|  |         ET.ElementTree(corpus_element).write( | ||||||
|  |             corpus_cwb_dir / 'corpus.vrt', | ||||||
|  |             encoding='utf-8' | ||||||
|  |         ) | ||||||
|  |         self.status = CorpusStatus.SUBMITTED | ||||||
|  |  | ||||||
|  |     def delete(self): | ||||||
|  |         shutil.rmtree(self.path, ignore_errors=True) | ||||||
|  |         db.session.delete(self) | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'creation_date': f'{self.creation_date.isoformat()}Z', | ||||||
|  |             'description': self.description, | ||||||
|  |             'max_num_tokens': self.max_num_tokens, | ||||||
|  |             'num_analysis_sessions': self.num_analysis_sessions, | ||||||
|  |             'num_tokens': self.num_tokens, | ||||||
|  |             'status': self.status.name, | ||||||
|  |             'title': self.title, | ||||||
|  |             'is_public': self.is_public | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             json_serializeable['user'] = \ | ||||||
|  |                 self.user.to_json_serializeable(backrefs=True) | ||||||
|  |         if relationships: | ||||||
|  |             json_serializeable['corpus_follower_associations'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable() | ||||||
|  |                 for x in self.corpus_follower_associations | ||||||
|  |             } | ||||||
|  |             json_serializeable['files'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable(relationships=True) | ||||||
|  |                 for x in self.files | ||||||
|  |             } | ||||||
|  |         return json_serializeable | ||||||
							
								
								
									
										102
									
								
								app/models/corpus_file.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								app/models/corpus_file.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,102 @@ | |||||||
|  | from flask import current_app, url_for | ||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from pathlib import Path | ||||||
|  | from app import db | ||||||
|  | from .corpus import CorpusStatus | ||||||
|  | from .file_mixin import FileMixin | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class CorpusFile(FileMixin, HashidMixin, db.Model): | ||||||
|  |     __tablename__ = 'corpus_files' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id')) | ||||||
|  |     # Fields | ||||||
|  |     author = db.Column(db.String(255)) | ||||||
|  |     description = db.Column(db.String(255)) | ||||||
|  |     publishing_year = db.Column(db.Integer) | ||||||
|  |     title = db.Column(db.String(255)) | ||||||
|  |     address = db.Column(db.String(255)) | ||||||
|  |     booktitle = db.Column(db.String(255)) | ||||||
|  |     chapter = db.Column(db.String(255)) | ||||||
|  |     editor = db.Column(db.String(255)) | ||||||
|  |     institution = db.Column(db.String(255)) | ||||||
|  |     journal = db.Column(db.String(255)) | ||||||
|  |     pages = db.Column(db.String(255)) | ||||||
|  |     publisher = db.Column(db.String(255)) | ||||||
|  |     school = db.Column(db.String(255)) | ||||||
|  |     # Relationships | ||||||
|  |     corpus = db.relationship( | ||||||
|  |         'Corpus', | ||||||
|  |         back_populates='files' | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def download_url(self): | ||||||
|  |         return url_for( | ||||||
|  |             'corpora.download_corpus_file', | ||||||
|  |             corpus_id=self.corpus_id, | ||||||
|  |             corpus_file_id=self.id | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def jsonpatch_path(self): | ||||||
|  |         return f'{self.corpus.jsonpatch_path}/files/{self.hashid}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def path(self) -> Path: | ||||||
|  |         return self.corpus.path / 'files' / f'{self.id}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def url(self): | ||||||
|  |         return url_for( | ||||||
|  |             'corpora.corpus_file', | ||||||
|  |             corpus_id=self.corpus_id, | ||||||
|  |             corpus_file_id=self.id | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def user_hashid(self): | ||||||
|  |         return self.corpus.user.hashid | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def user_id(self): | ||||||
|  |         return self.corpus.user_id | ||||||
|  |  | ||||||
|  |     def delete(self): | ||||||
|  |         try: | ||||||
|  |             self.path.unlink(missing_ok=True) | ||||||
|  |         except OSError as e: | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             raise | ||||||
|  |         db.session.delete(self) | ||||||
|  |         self.corpus.status = CorpusStatus.UNPREPARED | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'address': self.address, | ||||||
|  |             'author': self.author, | ||||||
|  |             'description': self.description, | ||||||
|  |             'booktitle': self.booktitle, | ||||||
|  |             'chapter': self.chapter, | ||||||
|  |             'editor': self.editor, | ||||||
|  |             'institution': self.institution, | ||||||
|  |             'journal': self.journal, | ||||||
|  |             'pages': self.pages, | ||||||
|  |             'publisher': self.publisher, | ||||||
|  |             'publishing_year': self.publishing_year, | ||||||
|  |             'school': self.school, | ||||||
|  |             'title': self.title, | ||||||
|  |             **self.file_mixin_to_json_serializeable( | ||||||
|  |                 backrefs=backrefs, | ||||||
|  |                 relationships=relationships | ||||||
|  |             ) | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             json_serializeable['corpus'] = \ | ||||||
|  |                 self.corpus.to_json_serializeable(backrefs=True) | ||||||
|  |         if relationships: | ||||||
|  |             pass | ||||||
|  |         return json_serializeable | ||||||
							
								
								
									
										47
									
								
								app/models/corpus_follower_association.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								app/models/corpus_follower_association.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | |||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from app import db | ||||||
|  | from .corpus_follower_role import CorpusFollowerRole | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class CorpusFollowerAssociation(HashidMixin, db.Model): | ||||||
|  |     __tablename__ = 'corpus_follower_associations' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id')) | ||||||
|  |     follower_id = db.Column(db.Integer, db.ForeignKey('users.id')) | ||||||
|  |     role_id = db.Column(db.Integer, db.ForeignKey('corpus_follower_roles.id')) | ||||||
|  |     # Relationships | ||||||
|  |     corpus = db.relationship( | ||||||
|  |         'Corpus', | ||||||
|  |         back_populates='corpus_follower_associations' | ||||||
|  |     ) | ||||||
|  |     follower = db.relationship( | ||||||
|  |         'User', | ||||||
|  |         back_populates='corpus_follower_associations' | ||||||
|  |     ) | ||||||
|  |     role = db.relationship( | ||||||
|  |         'CorpusFollowerRole', | ||||||
|  |         back_populates='corpus_follower_associations' | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     def __init__(self, **kwargs): | ||||||
|  |         if 'role' not in kwargs: | ||||||
|  |             kwargs['role'] = CorpusFollowerRole.query.filter_by(default=True).first() | ||||||
|  |         super().__init__(**kwargs) | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         return f'<CorpusFollowerAssociation {self.follower.__repr__()} ~ {self.role.__repr__()} ~ {self.corpus.__repr__()}>' | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'corpus': self.corpus.to_json_serializeable(backrefs=True), | ||||||
|  |             'follower': self.follower.to_json_serializeable(), | ||||||
|  |             'role': self.role.to_json_serializeable() | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             pass | ||||||
|  |         if relationships: | ||||||
|  |             pass | ||||||
|  |         return json_serializeable | ||||||
							
								
								
									
										107
									
								
								app/models/corpus_follower_role.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								app/models/corpus_follower_role.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | |||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from enum import IntEnum | ||||||
|  | from typing import Union | ||||||
|  | from app import db | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class CorpusFollowerPermission(IntEnum): | ||||||
|  |     VIEW = 1 | ||||||
|  |     MANAGE_FILES = 2 | ||||||
|  |     MANAGE_FOLLOWERS = 4 | ||||||
|  |     MANAGE_CORPUS = 8 | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def get(corpus_follower_permission: Union['CorpusFollowerPermission', int, str]) -> 'CorpusFollowerPermission': | ||||||
|  |         if isinstance(corpus_follower_permission, CorpusFollowerPermission): | ||||||
|  |             return corpus_follower_permission | ||||||
|  |         if isinstance(corpus_follower_permission, int): | ||||||
|  |             return CorpusFollowerPermission(corpus_follower_permission) | ||||||
|  |         if isinstance(corpus_follower_permission, str): | ||||||
|  |             return CorpusFollowerPermission[corpus_follower_permission] | ||||||
|  |         raise TypeError('corpus_follower_permission must be CorpusFollowerPermission, int, or str') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class CorpusFollowerRole(HashidMixin, db.Model): | ||||||
|  |     __tablename__ = 'corpus_follower_roles' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Fields | ||||||
|  |     name = db.Column(db.String(64), unique=True) | ||||||
|  |     default = db.Column(db.Boolean, default=False, index=True) | ||||||
|  |     permissions = db.Column(db.Integer, default=0) | ||||||
|  |     # Relationships | ||||||
|  |     corpus_follower_associations = db.relationship( | ||||||
|  |         'CorpusFollowerAssociation', | ||||||
|  |         back_populates='role' | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         return f'<CorpusFollowerRole {self.name}>' | ||||||
|  |  | ||||||
|  |     def has_permission(self, permission: Union[CorpusFollowerPermission, int, str]): | ||||||
|  |         perm = CorpusFollowerPermission.get(permission) | ||||||
|  |         return self.permissions & perm.value == perm.value | ||||||
|  |      | ||||||
|  |     def add_permission(self, permission: Union[CorpusFollowerPermission, int, str]): | ||||||
|  |         perm = CorpusFollowerPermission.get(permission) | ||||||
|  |         if not self.has_permission(perm): | ||||||
|  |             self.permissions += perm.value | ||||||
|  |      | ||||||
|  |     def remove_permission(self, permission: Union[CorpusFollowerPermission, int, str]): | ||||||
|  |         perm = CorpusFollowerPermission.get(permission) | ||||||
|  |         if self.has_permission(perm): | ||||||
|  |             self.permissions -= perm.value | ||||||
|  |  | ||||||
|  |     def reset_permissions(self): | ||||||
|  |         self.permissions = 0 | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'default': self.default, | ||||||
|  |             'name': self.name, | ||||||
|  |             'permissions': [ | ||||||
|  |                 x.name | ||||||
|  |                 for x in CorpusFollowerPermission | ||||||
|  |                 if self.has_permission(x) | ||||||
|  |             ] | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             pass | ||||||
|  |         if relationships: | ||||||
|  |             json_serializeable['corpus_follower_association'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable(relationships=True) | ||||||
|  |                 for x in self.corpus_follower_association | ||||||
|  |             } | ||||||
|  |         return json_serializeable | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def insert_defaults(): | ||||||
|  |         roles = { | ||||||
|  |             'Anonymous': [], | ||||||
|  |             'Viewer': [ | ||||||
|  |                 CorpusFollowerPermission.VIEW | ||||||
|  |             ], | ||||||
|  |             'Contributor': [ | ||||||
|  |                 CorpusFollowerPermission.VIEW, | ||||||
|  |                 CorpusFollowerPermission.MANAGE_FILES | ||||||
|  |             ], | ||||||
|  |             'Administrator': [ | ||||||
|  |                 CorpusFollowerPermission.VIEW, | ||||||
|  |                 CorpusFollowerPermission.MANAGE_FILES, | ||||||
|  |                 CorpusFollowerPermission.MANAGE_FOLLOWERS, | ||||||
|  |                 CorpusFollowerPermission.MANAGE_CORPUS | ||||||
|  |  | ||||||
|  |             ] | ||||||
|  |         } | ||||||
|  |         default_role_name = 'Viewer' | ||||||
|  |         for role_name, permissions in roles.items(): | ||||||
|  |             role = CorpusFollowerRole.query.filter_by(name=role_name).first() | ||||||
|  |             if role is None: | ||||||
|  |                 role = CorpusFollowerRole(name=role_name) | ||||||
|  |             role.reset_permissions() | ||||||
|  |             for permission in permissions: | ||||||
|  |                 role.add_permission(permission) | ||||||
|  |             role.default = role.name == default_role_name | ||||||
|  |             db.session.add(role) | ||||||
|  |         db.session.commit() | ||||||
| @@ -120,6 +120,7 @@ | |||||||
|   version: '3.4.0' |   version: '3.4.0' | ||||||
|   compatible_service_versions: |   compatible_service_versions: | ||||||
|     - '0.1.1' |     - '0.1.1' | ||||||
|  |     - '0.1.2' | ||||||
| - title: 'German' | - title: 'German' | ||||||
|   description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.' |   description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.' | ||||||
|   url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz' |   url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz' | ||||||
| @@ -131,6 +132,7 @@ | |||||||
|   version: '3.4.0' |   version: '3.4.0' | ||||||
|   compatible_service_versions: |   compatible_service_versions: | ||||||
|     - '0.1.1' |     - '0.1.1' | ||||||
|  |     - '0.1.2' | ||||||
| - title: 'Greek' | - title: 'Greek' | ||||||
|   description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.' |   description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.' | ||||||
|   url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz' |   url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz' | ||||||
| @@ -142,6 +144,7 @@ | |||||||
|   version: '3.4.0' |   version: '3.4.0' | ||||||
|   compatible_service_versions: |   compatible_service_versions: | ||||||
|     - '0.1.1' |     - '0.1.1' | ||||||
|  |     - '0.1.2' | ||||||
| - title: 'English' | - title: 'English' | ||||||
|   description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' |   description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' | ||||||
|   url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz' |   url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz' | ||||||
| @@ -153,6 +156,7 @@ | |||||||
|   version: '3.4.1' |   version: '3.4.1' | ||||||
|   compatible_service_versions: |   compatible_service_versions: | ||||||
|     - '0.1.1' |     - '0.1.1' | ||||||
|  |     - '0.1.2' | ||||||
| - title: 'Spanish' | - title: 'Spanish' | ||||||
|   description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' |   description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' | ||||||
|   url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz' |   url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz' | ||||||
| @@ -164,6 +168,7 @@ | |||||||
|   version: '3.4.0' |   version: '3.4.0' | ||||||
|   compatible_service_versions: |   compatible_service_versions: | ||||||
|     - '0.1.1' |     - '0.1.1' | ||||||
|  |     - '0.1.2' | ||||||
| - title: 'French' | - title: 'French' | ||||||
|   description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' |   description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' | ||||||
|   url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz' |   url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz' | ||||||
| @@ -175,6 +180,7 @@ | |||||||
|   version: '3.4.0' |   version: '3.4.0' | ||||||
|   compatible_service_versions: |   compatible_service_versions: | ||||||
|     - '0.1.1' |     - '0.1.1' | ||||||
|  |     - '0.1.2' | ||||||
| - title: 'Italian' | - title: 'Italian' | ||||||
|   description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner' |   description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner' | ||||||
|   url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz' |   url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz' | ||||||
| @@ -186,6 +192,7 @@ | |||||||
|   version: '3.4.0' |   version: '3.4.0' | ||||||
|   compatible_service_versions: |   compatible_service_versions: | ||||||
|     - '0.1.1' |     - '0.1.1' | ||||||
|  |     - '0.1.2' | ||||||
| - title: 'Polish' | - title: 'Polish' | ||||||
|   description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.' |   description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.' | ||||||
|   url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz' |   url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz' | ||||||
| @@ -197,6 +204,7 @@ | |||||||
|   version: '3.4.0' |   version: '3.4.0' | ||||||
|   compatible_service_versions: |   compatible_service_versions: | ||||||
|     - '0.1.1' |     - '0.1.1' | ||||||
|  |     - '0.1.2' | ||||||
| - title: 'Russian' | - title: 'Russian' | ||||||
|   description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' |   description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' | ||||||
|   url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz' |   url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz' | ||||||
| @@ -208,6 +216,7 @@ | |||||||
|   version: '3.4.0' |   version: '3.4.0' | ||||||
|   compatible_service_versions: |   compatible_service_versions: | ||||||
|     - '0.1.1' |     - '0.1.1' | ||||||
|  |     - '0.1.2' | ||||||
| - title: 'Chinese' | - title: 'Chinese' | ||||||
|   description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' |   description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' | ||||||
|   url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz' |   url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz' | ||||||
| @@ -219,3 +228,4 @@ | |||||||
|   version: '3.4.0' |   version: '3.4.0' | ||||||
|   compatible_service_versions: |   compatible_service_versions: | ||||||
|     - '0.1.1' |     - '0.1.1' | ||||||
|  |     - '0.1.2' | ||||||
							
								
								
									
										133
									
								
								app/models/event_listeners.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								app/models/event_listeners.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | |||||||
|  | from datetime import datetime | ||||||
|  | from enum import Enum | ||||||
|  | from app import db, mail, socketio | ||||||
|  | from app.email import create_message | ||||||
|  | from .corpus_file import CorpusFile | ||||||
|  | from .corpus_follower_association import CorpusFollowerAssociation | ||||||
|  | from .corpus import Corpus | ||||||
|  | from .job_input import JobInput | ||||||
|  | from .job_result import JobResult | ||||||
|  | from .job import Job, JobStatus | ||||||
|  | from .spacy_nlp_pipeline_model import SpaCyNLPPipelineModel | ||||||
|  | from .tesseract_ocr_pipeline_model import TesseractOCRPipelineModel | ||||||
|  | from .user import UserSettingJobStatusMailNotificationLevel | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def register_event_listeners(): | ||||||
|  |     resources = [ | ||||||
|  |         Corpus, | ||||||
|  |         CorpusFile, | ||||||
|  |         Job, | ||||||
|  |         JobInput, | ||||||
|  |         JobResult, | ||||||
|  |         SpaCyNLPPipelineModel, | ||||||
|  |         TesseractOCRPipelineModel | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     for resource in resources: | ||||||
|  |         db.event.listen(resource, 'after_delete', resource_after_delete) | ||||||
|  |         db.event.listen(resource, 'after_insert', resource_after_insert) | ||||||
|  |         db.event.listen(resource, 'after_update', resource_after_update) | ||||||
|  |  | ||||||
|  |     db.event.listen(CorpusFollowerAssociation, 'after_delete', cfa_after_delete) | ||||||
|  |     db.event.listen(CorpusFollowerAssociation, 'after_insert', cfa_after_insert) | ||||||
|  |  | ||||||
|  |     db.event.listen(Job, 'after_update', job_after_update) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def resource_after_delete(mapper, connection, resource): | ||||||
|  |     jsonpatch = [ | ||||||
|  |         { | ||||||
|  |             'op': 'remove', | ||||||
|  |             'path': resource.jsonpatch_path | ||||||
|  |         } | ||||||
|  |     ] | ||||||
|  |     room = f'/users/{resource.user_hashid}' | ||||||
|  |     socketio.emit('PATCH', jsonpatch, room=room) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cfa_after_delete(mapper, connection, cfa): | ||||||
|  |     jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}' | ||||||
|  |     jsonpatch = [ | ||||||
|  |         { | ||||||
|  |             'op': 'remove', | ||||||
|  |             'path': jsonpatch_path | ||||||
|  |         } | ||||||
|  |     ] | ||||||
|  |     room = f'/users/{cfa.corpus.user.hashid}' | ||||||
|  |     socketio.emit('PATCH', jsonpatch, room=room) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def resource_after_insert(mapper, connection, resource): | ||||||
|  |     jsonpatch_value = resource.to_json_serializeable() | ||||||
|  |     for attr in mapper.relationships: | ||||||
|  |         jsonpatch_value[attr.key] = {} | ||||||
|  |     jsonpatch = [ | ||||||
|  |         { | ||||||
|  |             'op': 'add', | ||||||
|  |             'path': resource.jsonpatch_path, | ||||||
|  |             'value': jsonpatch_value | ||||||
|  |         } | ||||||
|  |     ] | ||||||
|  |     room = f'/users/{resource.user_hashid}' | ||||||
|  |     socketio.emit('PATCH', jsonpatch, room=room) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def cfa_after_insert(mapper, connection, cfa): | ||||||
|  |     jsonpatch_value = cfa.to_json_serializeable() | ||||||
|  |     jsonpatch_path = f'/users/{cfa.corpus.user.hashid}/corpora/{cfa.corpus.hashid}/corpus_follower_associations/{cfa.hashid}' | ||||||
|  |     jsonpatch = [ | ||||||
|  |         { | ||||||
|  |             'op': 'add', | ||||||
|  |             'path': jsonpatch_path, | ||||||
|  |             'value': jsonpatch_value | ||||||
|  |         } | ||||||
|  |     ] | ||||||
|  |     room = f'/users/{cfa.corpus.user.hashid}' | ||||||
|  |     socketio.emit('PATCH', jsonpatch, room=room) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def resource_after_update(mapper, connection, resource): | ||||||
|  |     jsonpatch = [] | ||||||
|  |     for attr in db.inspect(resource).attrs: | ||||||
|  |         if attr.key in mapper.relationships: | ||||||
|  |             continue | ||||||
|  |         if not attr.load_history().has_changes(): | ||||||
|  |             continue | ||||||
|  |         jsonpatch_path = f'{resource.jsonpatch_path}/{attr.key}' | ||||||
|  |         if isinstance(attr.value, datetime): | ||||||
|  |             jsonpatch_value = f'{attr.value.isoformat()}Z' | ||||||
|  |         elif isinstance(attr.value, Enum): | ||||||
|  |             jsonpatch_value = attr.value.name | ||||||
|  |         else: | ||||||
|  |             jsonpatch_value = attr.value | ||||||
|  |         jsonpatch.append( | ||||||
|  |             { | ||||||
|  |                 'op': 'replace', | ||||||
|  |                 'path': jsonpatch_path, | ||||||
|  |                 'value': jsonpatch_value | ||||||
|  |             } | ||||||
|  |         ) | ||||||
|  |     if jsonpatch: | ||||||
|  |         room = f'/users/{resource.user_hashid}' | ||||||
|  |         socketio.emit('PATCH', jsonpatch, room=room) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def job_after_update(mapper, connection, job): | ||||||
|  |     for attr in db.inspect(job).attrs: | ||||||
|  |         if attr.key != 'status': | ||||||
|  |             continue | ||||||
|  |         if not attr.load_history().has_changes(): | ||||||
|  |             return | ||||||
|  |         if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE: | ||||||
|  |             return | ||||||
|  |         if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END: | ||||||
|  |             if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: | ||||||
|  |                 return | ||||||
|  |         msg = create_message( | ||||||
|  |             job.user.email, | ||||||
|  |             f'Status update for your Job "{job.title}"', | ||||||
|  |             'tasks/email/notification', | ||||||
|  |             job=job | ||||||
|  |         ) | ||||||
|  |         mail.send(msg) | ||||||
							
								
								
									
										40
									
								
								app/models/file_mixin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								app/models/file_mixin.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | |||||||
|  | from datetime import datetime | ||||||
|  | from flask import current_app | ||||||
|  | from werkzeug.utils import secure_filename | ||||||
|  | from app import db | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class FileMixin: | ||||||
|  |     ''' | ||||||
|  |     Mixin for db.Model classes. All file related models should use this. | ||||||
|  |     ''' | ||||||
|  |     creation_date = db.Column(db.DateTime, default=datetime.utcnow) | ||||||
|  |     filename = db.Column(db.String(255)) | ||||||
|  |     mimetype = db.Column(db.String(255)) | ||||||
|  |  | ||||||
|  |     def file_mixin_to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         return { | ||||||
|  |             'creation_date': f'{self.creation_date.isoformat()}Z', | ||||||
|  |             'filename': self.filename, | ||||||
|  |             'mimetype': self.mimetype | ||||||
|  |         } | ||||||
|  |      | ||||||
|  |     @classmethod | ||||||
|  |     def create(cls, file_storage, **kwargs): | ||||||
|  |         filename = kwargs.pop('filename', file_storage.filename) | ||||||
|  |         mimetype = kwargs.pop('mimetype', file_storage.mimetype) | ||||||
|  |         obj = cls( | ||||||
|  |             filename=secure_filename(filename), | ||||||
|  |             mimetype=mimetype, | ||||||
|  |             **kwargs | ||||||
|  |         ) | ||||||
|  |         db.session.add(obj) | ||||||
|  |         db.session.flush(objects=[obj]) | ||||||
|  |         db.session.refresh(obj) | ||||||
|  |         try: | ||||||
|  |             file_storage.save(obj.path) | ||||||
|  |         except (AttributeError, OSError) as e: | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             db.session.rollback() | ||||||
|  |             raise e | ||||||
|  |         return obj | ||||||
							
								
								
									
										172
									
								
								app/models/job.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								app/models/job.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,172 @@ | |||||||
|  | from datetime import datetime | ||||||
|  | from enum import IntEnum | ||||||
|  | from flask import current_app, url_for | ||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from time import sleep | ||||||
|  | from typing import Union | ||||||
|  | from pathlib import Path | ||||||
|  | import shutil | ||||||
|  | from app import db | ||||||
|  | from app.ext.flask_sqlalchemy import ContainerColumn, IntEnumColumn | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class JobStatus(IntEnum): | ||||||
|  |     INITIALIZING = 1 | ||||||
|  |     SUBMITTED = 2 | ||||||
|  |     QUEUED = 3 | ||||||
|  |     RUNNING = 4 | ||||||
|  |     CANCELING = 5 | ||||||
|  |     CANCELED = 6 | ||||||
|  |     COMPLETED = 7 | ||||||
|  |     FAILED = 8 | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def get(job_status: Union['JobStatus', int, str]) -> 'JobStatus': | ||||||
|  |         if isinstance(job_status, JobStatus): | ||||||
|  |             return job_status | ||||||
|  |         if isinstance(job_status, int): | ||||||
|  |             return JobStatus(job_status) | ||||||
|  |         if isinstance(job_status, str): | ||||||
|  |             return JobStatus[job_status] | ||||||
|  |         raise TypeError('job_status must be JobStatus, int, or str') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Job(HashidMixin, db.Model): | ||||||
|  |     ''' | ||||||
|  |     Class to define Jobs. | ||||||
|  |     ''' | ||||||
|  |     __tablename__ = 'jobs' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     user_id = db.Column(db.Integer, db.ForeignKey('users.id')) | ||||||
|  |     # Fields | ||||||
|  |     creation_date = \ | ||||||
|  |         db.Column(db.DateTime(), default=datetime.utcnow) | ||||||
|  |     description = db.Column(db.String(255)) | ||||||
|  |     end_date = db.Column(db.DateTime()) | ||||||
|  |     service = db.Column(db.String(64)) | ||||||
|  |     service_args = db.Column(ContainerColumn(dict, 255)) | ||||||
|  |     service_version = db.Column(db.String(16)) | ||||||
|  |     status = db.Column( | ||||||
|  |         IntEnumColumn(JobStatus), | ||||||
|  |         default=JobStatus.INITIALIZING | ||||||
|  |     ) | ||||||
|  |     title = db.Column(db.String(32)) | ||||||
|  |     # Relationships | ||||||
|  |     inputs = db.relationship( | ||||||
|  |         'JobInput', | ||||||
|  |         back_populates='job', | ||||||
|  |         cascade='all, delete-orphan', | ||||||
|  |         lazy='dynamic' | ||||||
|  |     ) | ||||||
|  |     results = db.relationship( | ||||||
|  |         'JobResult', | ||||||
|  |         back_populates='job', | ||||||
|  |         cascade='all, delete-orphan', | ||||||
|  |         lazy='dynamic' | ||||||
|  |     ) | ||||||
|  |     user = db.relationship( | ||||||
|  |         'User', | ||||||
|  |         back_populates='jobs' | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         return f'<Job {self.title}>' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def jsonpatch_path(self): | ||||||
|  |         return f'{self.user.jsonpatch_path}/jobs/{self.hashid}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def path(self) -> Path: | ||||||
|  |         return self.user.path / 'jobs' / f'{self.id}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def url(self): | ||||||
|  |         return url_for('jobs.job', job_id=self.id) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def user_hashid(self): | ||||||
|  |         return self.user.hashid | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def create(**kwargs): | ||||||
|  |         job = Job(**kwargs) | ||||||
|  |         db.session.add(job) | ||||||
|  |         db.session.flush(objects=[job]) | ||||||
|  |         db.session.refresh(job) | ||||||
|  |         job_inputs_dir = job.path / 'inputs' | ||||||
|  |         job_pipeline_data_dir = job.path / 'pipeline_data' | ||||||
|  |         job_results_dir = job.path / 'results' | ||||||
|  |         try: | ||||||
|  |             job.path.mkdir() | ||||||
|  |             job_inputs_dir.mkdir() | ||||||
|  |             job_pipeline_data_dir.mkdir() | ||||||
|  |             job_results_dir.mkdir() | ||||||
|  |         except OSError as e: | ||||||
|  |             # TODO: Potential leftover cleanup | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             db.session.rollback() | ||||||
|  |             raise | ||||||
|  |         return job | ||||||
|  |  | ||||||
|  |     def delete(self): | ||||||
|  |         ''' Delete the job and its inputs and results from the database. ''' | ||||||
|  |         if self.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:  # noqa | ||||||
|  |             self.status = JobStatus.CANCELING | ||||||
|  |             db.session.commit() | ||||||
|  |             while self.status != JobStatus.CANCELED: | ||||||
|  |                 # In case the daemon handled a job in any way | ||||||
|  |                 if self.status != JobStatus.CANCELING: | ||||||
|  |                     self.status = JobStatus.CANCELING | ||||||
|  |                     db.session.commit() | ||||||
|  |                 sleep(1) | ||||||
|  |                 db.session.refresh(self) | ||||||
|  |         try: | ||||||
|  |             shutil.rmtree(self.path) | ||||||
|  |         except OSError as e: | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             db.session.rollback() | ||||||
|  |             raise e | ||||||
|  |         db.session.delete(self) | ||||||
|  |  | ||||||
|  |     def restart(self): | ||||||
|  |         ''' Restart a job - only if the status is failed ''' | ||||||
|  |         if self.status != JobStatus.FAILED: | ||||||
|  |             raise Exception('Job status is not "failed"') | ||||||
|  |         shutil.rmtree(self.path / 'results', ignore_errors=True) | ||||||
|  |         shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True) | ||||||
|  |         for result in self.results: | ||||||
|  |             db.session.delete(result) | ||||||
|  |         self.end_date = None | ||||||
|  |         self.status = JobStatus.SUBMITTED | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'creation_date': f'{self.creation_date.isoformat()}Z', | ||||||
|  |             'description': self.description, | ||||||
|  |             'end_date': ( | ||||||
|  |                 None if self.end_date is None | ||||||
|  |                 else f'{self.end_date.isoformat()}Z' | ||||||
|  |             ), | ||||||
|  |             'service': self.service, | ||||||
|  |             'service_args': self.service_args, | ||||||
|  |             'service_version': self.service_version, | ||||||
|  |             'status': self.status.name, | ||||||
|  |             'title': self.title | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             json_serializeable['user'] = \ | ||||||
|  |                 self.user.to_json_serializeable(backrefs=True) | ||||||
|  |         if relationships: | ||||||
|  |             json_serializeable['inputs'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable(relationships=True) | ||||||
|  |                 for x in self.inputs | ||||||
|  |             } | ||||||
|  |             json_serializeable['results'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable(relationships=True) | ||||||
|  |                 for x in self.results | ||||||
|  |             } | ||||||
|  |         return json_serializeable | ||||||
							
								
								
									
										65
									
								
								app/models/job_input.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								app/models/job_input.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | |||||||
|  | from flask import url_for | ||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from pathlib import Path | ||||||
|  | from app import db | ||||||
|  | from .file_mixin import FileMixin | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class JobInput(FileMixin, HashidMixin, db.Model): | ||||||
|  |     __tablename__ = 'job_inputs' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     job_id = db.Column(db.Integer, db.ForeignKey('jobs.id')) | ||||||
|  |     # Relationships | ||||||
|  |     job = db.relationship( | ||||||
|  |         'Job', | ||||||
|  |         back_populates='inputs' | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         return f'<JobInput {self.filename}>' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def content_url(self): | ||||||
|  |         return url_for( | ||||||
|  |             'jobs.download_job_input', | ||||||
|  |             job_id=self.job.id, | ||||||
|  |             job_input_id=self.id | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def jsonpatch_path(self): | ||||||
|  |         return f'{self.job.jsonpatch_path}/inputs/{self.hashid}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def path(self) -> Path: | ||||||
|  |         return self.job.path / 'inputs' / f'{self.id}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def url(self): | ||||||
|  |         return url_for( | ||||||
|  |             'jobs.job', | ||||||
|  |             job_id=self.job_id, | ||||||
|  |             _anchor=f'job-{self.job.hashid}-input-{self.hashid}' | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def user_hashid(self): | ||||||
|  |         return self.job.user.hashid | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def user_id(self): | ||||||
|  |         return self.job.user.id | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             **self.file_mixin_to_json_serializeable() | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             json_serializeable['job'] = \ | ||||||
|  |                 self.job.to_json_serializeable(backrefs=True) | ||||||
|  |         if relationships: | ||||||
|  |             pass | ||||||
|  |         return json_serializeable | ||||||
							
								
								
									
										71
									
								
								app/models/job_result.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								app/models/job_result.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | |||||||
|  | from flask import url_for | ||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from pathlib import Path | ||||||
|  | from app import db | ||||||
|  | from .file_mixin import FileMixin | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class JobResult(FileMixin, HashidMixin, db.Model): | ||||||
|  |     __tablename__ = 'job_results' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     job_id = db.Column(db.Integer, db.ForeignKey('jobs.id')) | ||||||
|  |     # Fields | ||||||
|  |     description = db.Column(db.String(255)) | ||||||
|  |     # Relationships | ||||||
|  |     job = db.relationship( | ||||||
|  |         'Job', | ||||||
|  |         back_populates='results' | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         return f'<JobResult {self.filename}>' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def download_url(self): | ||||||
|  |         return url_for( | ||||||
|  |             'jobs.download_job_result', | ||||||
|  |             job_id=self.job_id, | ||||||
|  |             job_result_id=self.id | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def jsonpatch_path(self): | ||||||
|  |         return f'{self.job.jsonpatch_path}/results/{self.hashid}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def path(self) -> Path: | ||||||
|  |         return self.job.path / 'results' / f'{self.id}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def url(self): | ||||||
|  |         return url_for( | ||||||
|  |             'jobs.job', | ||||||
|  |             job_id=self.job_id, | ||||||
|  |             _anchor=f'job-{self.job.hashid}-result-{self.hashid}' | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def user_hashid(self): | ||||||
|  |         return self.job.user.hashid | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def user_id(self): | ||||||
|  |         return self.job.user.id | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'description': self.description, | ||||||
|  |             **self.file_mixin_to_json_serializeable( | ||||||
|  |                 backrefs=backrefs, | ||||||
|  |                 relationships=relationships | ||||||
|  |             ) | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             json_serializeable['job'] = \ | ||||||
|  |                 self.job.to_json_serializeable(backrefs=True) | ||||||
|  |         if relationships: | ||||||
|  |             pass | ||||||
|  |         return json_serializeable | ||||||
							
								
								
									
										100
									
								
								app/models/role.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								app/models/role.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,100 @@ | |||||||
|  | from enum import IntEnum | ||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from typing import Union | ||||||
|  | from app import db | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Permission(IntEnum): | ||||||
|  |     ''' | ||||||
|  |     Defines User permissions as integers by the power of 2. User permission | ||||||
|  |     can be evaluated using the bitwise operator &. | ||||||
|  |     ''' | ||||||
|  |     ADMINISTRATE = 1 | ||||||
|  |     CONTRIBUTE = 2 | ||||||
|  |     USE_API = 4 | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def get(permission: Union['Permission', int, str]) -> 'Permission': | ||||||
|  |         if isinstance(permission, Permission): | ||||||
|  |             return permission | ||||||
|  |         if isinstance(permission, int): | ||||||
|  |             return Permission(permission) | ||||||
|  |         if isinstance(permission, str): | ||||||
|  |             return Permission[permission] | ||||||
|  |         raise TypeError('permission must be Permission, int, or str') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Role(HashidMixin, db.Model): | ||||||
|  |     __tablename__ = 'roles' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Fields | ||||||
|  |     name = db.Column(db.String(64), unique=True) | ||||||
|  |     default = db.Column(db.Boolean, default=False, index=True) | ||||||
|  |     permissions = db.Column(db.Integer, default=0) | ||||||
|  |     # Relationships | ||||||
|  |     users = db.relationship('User', back_populates='role', lazy='dynamic') | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         return f'<Role {self.name}>' | ||||||
|  |  | ||||||
|  |     def has_permission(self, permission: Union[Permission, int, str]): | ||||||
|  |         p = Permission.get(permission) | ||||||
|  |         return self.permissions & p.value == p.value | ||||||
|  |      | ||||||
|  |     def add_permission(self, permission: Union[Permission, int, str]): | ||||||
|  |         p = Permission.get(permission) | ||||||
|  |         if not self.has_permission(p): | ||||||
|  |             self.permissions += p.value | ||||||
|  |      | ||||||
|  |     def remove_permission(self, permission: Union[Permission, int, str]): | ||||||
|  |         p = Permission.get(permission) | ||||||
|  |         if self.has_permission(p): | ||||||
|  |             self.permissions -= p.value | ||||||
|  |  | ||||||
|  |     def reset_permissions(self): | ||||||
|  |         self.permissions = 0 | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'default': self.default, | ||||||
|  |             'name': self.name, | ||||||
|  |             'permissions': [ | ||||||
|  |                 x.name for x in Permission | ||||||
|  |                 if self.has_permission(x.value) | ||||||
|  |             ] | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             pass | ||||||
|  |         if relationships: | ||||||
|  |             json_serializeable['users'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable(relationships=True) | ||||||
|  |                 for x in self.users | ||||||
|  |             } | ||||||
|  |         return json_serializeable | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def insert_defaults(): | ||||||
|  |         roles = { | ||||||
|  |             'User': [], | ||||||
|  |             'API user': [Permission.USE_API], | ||||||
|  |             'Contributor': [Permission.CONTRIBUTE], | ||||||
|  |             'Administrator': [ | ||||||
|  |                 Permission.ADMINISTRATE, | ||||||
|  |                 Permission.CONTRIBUTE, | ||||||
|  |                 Permission.USE_API | ||||||
|  |             ], | ||||||
|  |             'System user': [] | ||||||
|  |         } | ||||||
|  |         default_role_name = 'User' | ||||||
|  |         for role_name, permissions in roles.items(): | ||||||
|  |             role = Role.query.filter_by(name=role_name).first() | ||||||
|  |             if role is None: | ||||||
|  |                 role = Role(name=role_name) | ||||||
|  |             role.reset_permissions() | ||||||
|  |             for permission in permissions: | ||||||
|  |                 role.add_permission(permission) | ||||||
|  |             role.default = role.name == default_role_name | ||||||
|  |             db.session.add(role) | ||||||
|  |         db.session.commit() | ||||||
							
								
								
									
										136
									
								
								app/models/spacy_nlp_pipeline_model.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								app/models/spacy_nlp_pipeline_model.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,136 @@ | |||||||
|  | from flask import current_app, url_for | ||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from tqdm import tqdm | ||||||
|  | from pathlib import Path | ||||||
|  | import requests | ||||||
|  | import yaml | ||||||
|  | from app import db | ||||||
|  | from app.ext.flask_sqlalchemy import ContainerColumn | ||||||
|  | from .file_mixin import FileMixin | ||||||
|  | from .user import User | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): | ||||||
|  |     __tablename__ = 'spacy_nlp_pipeline_models' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     user_id = db.Column(db.Integer, db.ForeignKey('users.id')) | ||||||
|  |     # Fields | ||||||
|  |     title = db.Column(db.String(64)) | ||||||
|  |     description = db.Column(db.String(255)) | ||||||
|  |     version = db.Column(db.String(16)) | ||||||
|  |     compatible_service_versions = db.Column(ContainerColumn(list, 255)) | ||||||
|  |     publisher = db.Column(db.String(128)) | ||||||
|  |     publisher_url = db.Column(db.String(512)) | ||||||
|  |     publishing_url = db.Column(db.String(512)) | ||||||
|  |     publishing_year = db.Column(db.Integer) | ||||||
|  |     pipeline_name = db.Column(db.String(64)) | ||||||
|  |     is_public = db.Column(db.Boolean, default=False) | ||||||
|  |     # Relationships | ||||||
|  |     user = db.relationship('User', back_populates='spacy_nlp_pipeline_models') | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def path(self) -> Path: | ||||||
|  |         return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def jsonpatch_path(self): | ||||||
|  |         return f'{self.user.jsonpatch_path}/spacy_nlp_pipeline_models/{self.hashid}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def url(self): | ||||||
|  |         return url_for( | ||||||
|  |             'contributions.spacy_nlp_pipeline_model', | ||||||
|  |             spacy_nlp_pipeline_model_id=self.id | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def user_hashid(self): | ||||||
|  |         return self.user.hashid | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def insert_defaults(force_download=False): | ||||||
|  |         nopaque_user = User.query.filter_by(username='nopaque').first() | ||||||
|  |         default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml' | ||||||
|  |         with default_records_file.open('r') as f: | ||||||
|  |             default_records = yaml.safe_load(f) | ||||||
|  |         for m in default_records: | ||||||
|  |             model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first()  # noqa | ||||||
|  |             if model is not None: | ||||||
|  |                 model.compatible_service_versions = m['compatible_service_versions'] | ||||||
|  |                 model.description = m['description'] | ||||||
|  |                 model.filename = m['url'].split('/')[-1] | ||||||
|  |                 model.publisher = m['publisher'] | ||||||
|  |                 model.publisher_url = m['publisher_url'] | ||||||
|  |                 model.publishing_url = m['publishing_url'] | ||||||
|  |                 model.publishing_year = m['publishing_year'] | ||||||
|  |                 model.is_public = True | ||||||
|  |                 model.title = m['title'] | ||||||
|  |                 model.version = m['version'] | ||||||
|  |                 model.pipeline_name = m['pipeline_name'] | ||||||
|  |             else: | ||||||
|  |                 model = SpaCyNLPPipelineModel( | ||||||
|  |                     compatible_service_versions=m['compatible_service_versions'], | ||||||
|  |                     description=m['description'], | ||||||
|  |                     filename=m['url'].split('/')[-1], | ||||||
|  |                     publisher=m['publisher'], | ||||||
|  |                     publisher_url=m['publisher_url'], | ||||||
|  |                     publishing_url=m['publishing_url'], | ||||||
|  |                     publishing_year=m['publishing_year'], | ||||||
|  |                     is_public=True, | ||||||
|  |                     title=m['title'], | ||||||
|  |                     user=nopaque_user, | ||||||
|  |                     version=m['version'], | ||||||
|  |                     pipeline_name=m['pipeline_name'] | ||||||
|  |                 ) | ||||||
|  |                 db.session.add(model) | ||||||
|  |                 db.session.flush(objects=[model]) | ||||||
|  |                 db.session.refresh(model) | ||||||
|  |             if not model.path.exists() or force_download: | ||||||
|  |                 r = requests.get(m['url'], stream=True) | ||||||
|  |                 pbar = tqdm( | ||||||
|  |                     desc=f'{model.title} ({model.filename})', | ||||||
|  |                     unit="B", | ||||||
|  |                     unit_scale=True, | ||||||
|  |                     unit_divisor=1024, | ||||||
|  |                     total=int(r.headers['Content-Length']) | ||||||
|  |                 ) | ||||||
|  |                 pbar.clear() | ||||||
|  |                 with open(model.path, 'wb') as f: | ||||||
|  |                     for chunk in r.iter_content(chunk_size=1024): | ||||||
|  |                         if chunk:  # filter out keep-alive new chunks | ||||||
|  |                             pbar.update(len(chunk)) | ||||||
|  |                             f.write(chunk) | ||||||
|  |                     pbar.close() | ||||||
|  |         db.session.commit() | ||||||
|  |      | ||||||
|  |     def delete(self): | ||||||
|  |         try: | ||||||
|  |             self.path.unlink(missing_ok=True) | ||||||
|  |         except OSError as e: | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             raise | ||||||
|  |         db.session.delete(self) | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'compatible_service_versions': self.compatible_service_versions, | ||||||
|  |             'description': self.description, | ||||||
|  |             'publisher': self.publisher, | ||||||
|  |             'publisher_url': self.publisher_url, | ||||||
|  |             'publishing_url': self.publishing_url, | ||||||
|  |             'publishing_year': self.publishing_year, | ||||||
|  |             'pipeline_name': self.pipeline_name, | ||||||
|  |             'is_public': self.is_public, | ||||||
|  |             'title': self.title, | ||||||
|  |             'version': self.version, | ||||||
|  |             **self.file_mixin_to_json_serializeable() | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             json_serializeable['user'] = \ | ||||||
|  |                 self.user.to_json_serializeable(backrefs=True) | ||||||
|  |         if relationships: | ||||||
|  |             pass | ||||||
|  |         return json_serializeable | ||||||
							
								
								
									
										132
									
								
								app/models/tesseract_ocr_pipeline_model.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								app/models/tesseract_ocr_pipeline_model.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,132 @@ | |||||||
|  | from flask import current_app, url_for | ||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from tqdm import tqdm | ||||||
|  | from pathlib import Path | ||||||
|  | import requests | ||||||
|  | import yaml | ||||||
|  | from app import db | ||||||
|  | from app.ext.flask_sqlalchemy import ContainerColumn | ||||||
|  | from .file_mixin import FileMixin | ||||||
|  | from .user import User | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): | ||||||
|  |     __tablename__ = 'tesseract_ocr_pipeline_models' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     user_id = db.Column(db.Integer, db.ForeignKey('users.id')) | ||||||
|  |     # Fields | ||||||
|  |     title = db.Column(db.String(64)) | ||||||
|  |     description = db.Column(db.String(255)) | ||||||
|  |     version = db.Column(db.String(16)) | ||||||
|  |     compatible_service_versions = db.Column(ContainerColumn(list, 255)) | ||||||
|  |     publisher = db.Column(db.String(128)) | ||||||
|  |     publisher_url = db.Column(db.String(512)) | ||||||
|  |     publishing_url = db.Column(db.String(512)) | ||||||
|  |     publishing_year = db.Column(db.Integer) | ||||||
|  |     is_public = db.Column(db.Boolean, default=False) | ||||||
|  |     # Relationships | ||||||
|  |     user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models') | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def path(self) -> Path: | ||||||
|  |         return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def jsonpatch_path(self): | ||||||
|  |         return f'{self.user.jsonpatch_path}/tesseract_ocr_pipeline_models/{self.hashid}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def url(self): | ||||||
|  |         return url_for( | ||||||
|  |             'contributions.tesseract_ocr_pipeline_model', | ||||||
|  |             tesseract_ocr_pipeline_model_id=self.id | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def user_hashid(self): | ||||||
|  |         return self.user.hashid | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def insert_defaults(force_download=False): | ||||||
|  |         nopaque_user = User.query.filter_by(username='nopaque').first() | ||||||
|  |         default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml' | ||||||
|  |         with default_records_file.open('r') as f: | ||||||
|  |             default_records = yaml.safe_load(f) | ||||||
|  |         for m in default_records: | ||||||
|  |             model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first()  # noqa | ||||||
|  |             if model is not None: | ||||||
|  |                 model.compatible_service_versions = m['compatible_service_versions'] | ||||||
|  |                 model.description = m['description'] | ||||||
|  |                 model.filename = f'{model.id}.traineddata' | ||||||
|  |                 model.publisher = m['publisher'] | ||||||
|  |                 model.publisher_url = m['publisher_url'] | ||||||
|  |                 model.publishing_url = m['publishing_url'] | ||||||
|  |                 model.publishing_year = m['publishing_year'] | ||||||
|  |                 model.is_public = True | ||||||
|  |                 model.title = m['title'] | ||||||
|  |                 model.version = m['version'] | ||||||
|  |             else: | ||||||
|  |                 model = TesseractOCRPipelineModel( | ||||||
|  |                     compatible_service_versions=m['compatible_service_versions'], | ||||||
|  |                     description=m['description'], | ||||||
|  |                     publisher=m['publisher'], | ||||||
|  |                     publisher_url=m['publisher_url'], | ||||||
|  |                     publishing_url=m['publishing_url'], | ||||||
|  |                     publishing_year=m['publishing_year'], | ||||||
|  |                     is_public=True, | ||||||
|  |                     title=m['title'], | ||||||
|  |                     user=nopaque_user, | ||||||
|  |                     version=m['version'] | ||||||
|  |                 ) | ||||||
|  |                 db.session.add(model) | ||||||
|  |                 db.session.flush(objects=[model]) | ||||||
|  |                 db.session.refresh(model) | ||||||
|  |                 model.filename = f'{model.id}.traineddata' | ||||||
|  |             if not model.path.exists() or force_download: | ||||||
|  |                 r = requests.get(m['url'], stream=True) | ||||||
|  |                 pbar = tqdm( | ||||||
|  |                     desc=f'{model.title} ({model.filename})', | ||||||
|  |                     unit="B", | ||||||
|  |                     unit_scale=True, | ||||||
|  |                     unit_divisor=1024, | ||||||
|  |                     total=int(r.headers['Content-Length']) | ||||||
|  |                 ) | ||||||
|  |                 pbar.clear() | ||||||
|  |                 with open(model.path, 'wb') as f: | ||||||
|  |                     for chunk in r.iter_content(chunk_size=1024): | ||||||
|  |                         if chunk:  # filter out keep-alive new chunks | ||||||
|  |                             pbar.update(len(chunk)) | ||||||
|  |                             f.write(chunk) | ||||||
|  |                     pbar.close() | ||||||
|  |         db.session.commit() | ||||||
|  |  | ||||||
|  |     def delete(self): | ||||||
|  |         try: | ||||||
|  |             self.path.unlink(missing_ok=True) | ||||||
|  |         except OSError as e: | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             raise | ||||||
|  |         db.session.delete(self) | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'compatible_service_versions': self.compatible_service_versions, | ||||||
|  |             'description': self.description, | ||||||
|  |             'publisher': self.publisher, | ||||||
|  |             'publisher_url': self.publisher_url, | ||||||
|  |             'publishing_url': self.publishing_url, | ||||||
|  |             'publishing_year': self.publishing_year, | ||||||
|  |             'is_public': self.is_public, | ||||||
|  |             'title': self.title, | ||||||
|  |             'version': self.version, | ||||||
|  |             **self.file_mixin_to_json_serializeable() | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             json_serializeable['user'] = \ | ||||||
|  |                 self.user.to_json_serializeable(backrefs=True) | ||||||
|  |         if relationships: | ||||||
|  |             pass | ||||||
|  |         return json_serializeable | ||||||
							
								
								
									
										48
									
								
								app/models/token.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								app/models/token.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | |||||||
|  | from datetime import datetime, timedelta | ||||||
|  | from app import db | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Token(db.Model): | ||||||
|  |     __tablename__ = 'tokens' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     user_id = db.Column(db.Integer, db.ForeignKey('users.id')) | ||||||
|  |     # Fields | ||||||
|  |     access_token = db.Column(db.String(64), index=True) | ||||||
|  |     access_expiration = db.Column(db.DateTime) | ||||||
|  |     refresh_token = db.Column(db.String(64), index=True) | ||||||
|  |     refresh_expiration = db.Column(db.DateTime) | ||||||
|  |     # Relationships | ||||||
|  |     user = db.relationship('User', back_populates='tokens') | ||||||
|  |  | ||||||
|  |     def expire(self): | ||||||
|  |         self.access_expiration = datetime.utcnow() | ||||||
|  |         self.refresh_expiration = datetime.utcnow() | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'access_token': self.access_token, | ||||||
|  |             'access_expiration': ( | ||||||
|  |                 None if self.access_expiration is None | ||||||
|  |                 else f'{self.access_expiration.isoformat()}Z' | ||||||
|  |             ), | ||||||
|  |             'refresh_token': self.refresh_token, | ||||||
|  |             'refresh_expiration': ( | ||||||
|  |                 None if self.refresh_expiration is None | ||||||
|  |                 else f'{self.refresh_expiration.isoformat()}Z' | ||||||
|  |             ) | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             json_serializeable['user'] = \ | ||||||
|  |                 self.user.to_json_serializeable(backrefs=True) | ||||||
|  |         if relationships: | ||||||
|  |             pass | ||||||
|  |         return json_serializeable | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def clean(): | ||||||
|  |         """Remove any tokens that have been expired for more than a day.""" | ||||||
|  |         yesterday = datetime.utcnow() - timedelta(days=1) | ||||||
|  |         Token.query.filter(Token.refresh_expiration < yesterday).delete() | ||||||
							
								
								
									
										452
									
								
								app/models/user.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										452
									
								
								app/models/user.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,452 @@ | |||||||
|  | from datetime import datetime, timedelta | ||||||
|  | from enum import IntEnum | ||||||
|  | from flask import current_app, url_for | ||||||
|  | from flask_hashids import HashidMixin | ||||||
|  | from flask_login import UserMixin | ||||||
|  | from sqlalchemy.ext.associationproxy import association_proxy | ||||||
|  | from pathlib import Path | ||||||
|  | from typing import Union | ||||||
|  | from werkzeug.security import generate_password_hash, check_password_hash | ||||||
|  | import jwt | ||||||
|  | import re | ||||||
|  | import secrets | ||||||
|  | import shutil | ||||||
|  | from app import db, hashids | ||||||
|  | from app.ext.flask_sqlalchemy import IntEnumColumn | ||||||
|  | from .corpus import Corpus | ||||||
|  | from .corpus_follower_association import CorpusFollowerAssociation | ||||||
|  | from .corpus_follower_role import CorpusFollowerRole | ||||||
|  | from .role import Permission, Role | ||||||
|  | from .token import Token | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ProfilePrivacySettings(IntEnum): | ||||||
|  |     SHOW_EMAIL = 1 | ||||||
|  |     SHOW_LAST_SEEN = 2 | ||||||
|  |     SHOW_MEMBER_SINCE = 4 | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def get(profile_privacy_setting: Union['ProfilePrivacySettings', int, str]) -> 'ProfilePrivacySettings': | ||||||
|  |         if isinstance(profile_privacy_setting, ProfilePrivacySettings): | ||||||
|  |             return profile_privacy_setting | ||||||
|  |         if isinstance(profile_privacy_setting, int): | ||||||
|  |             return ProfilePrivacySettings(profile_privacy_setting) | ||||||
|  |         if isinstance(profile_privacy_setting, str): | ||||||
|  |             return ProfilePrivacySettings[profile_privacy_setting] | ||||||
|  |         raise TypeError('profile_privacy_setting must be ProfilePrivacySettings, int, or str') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class UserSettingJobStatusMailNotificationLevel(IntEnum): | ||||||
|  |     NONE = 1 | ||||||
|  |     END = 2 | ||||||
|  |     ALL = 3 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class User(HashidMixin, UserMixin, db.Model): | ||||||
|  |     __tablename__ = 'users' | ||||||
|  |     # Primary key | ||||||
|  |     id = db.Column(db.Integer, primary_key=True) | ||||||
|  |     # Foreign keys | ||||||
|  |     role_id = db.Column(db.Integer, db.ForeignKey('roles.id')) | ||||||
|  |     # Fields | ||||||
|  |     email = db.Column(db.String(254), index=True, unique=True) | ||||||
|  |     username = db.Column(db.String(64), index=True, unique=True) | ||||||
|  |     username_pattern = re.compile(r'^[A-Za-zÄÖÜäöüß0-9_.]*$') | ||||||
|  |     password_hash = db.Column(db.String(128)) | ||||||
|  |     confirmed = db.Column(db.Boolean, default=False) | ||||||
|  |     terms_of_use_accepted = db.Column(db.Boolean, default=False) | ||||||
|  |     member_since = db.Column(db.DateTime(), default=datetime.utcnow) | ||||||
|  |     setting_job_status_mail_notification_level = db.Column( | ||||||
|  |         IntEnumColumn(UserSettingJobStatusMailNotificationLevel), | ||||||
|  |         default=UserSettingJobStatusMailNotificationLevel.END | ||||||
|  |     ) | ||||||
|  |     last_seen = db.Column(db.DateTime()) | ||||||
|  |     full_name = db.Column(db.String(64)) | ||||||
|  |     about_me = db.Column(db.String(256)) | ||||||
|  |     location = db.Column(db.String(64)) | ||||||
|  |     website = db.Column(db.String(128)) | ||||||
|  |     organization = db.Column(db.String(128)) | ||||||
|  |     is_public = db.Column(db.Boolean, default=False) | ||||||
|  |     profile_privacy_settings = db.Column(db.Integer(), default=0) | ||||||
|  |     # Relationships | ||||||
|  |     avatar = db.relationship( | ||||||
|  |         'Avatar', | ||||||
|  |         back_populates='user', | ||||||
|  |         cascade='all, delete-orphan', | ||||||
|  |         uselist=False | ||||||
|  |     ) | ||||||
|  |     corpora = db.relationship( | ||||||
|  |         'Corpus', | ||||||
|  |         back_populates='user', | ||||||
|  |         cascade='all, delete-orphan', | ||||||
|  |         lazy='dynamic' | ||||||
|  |     ) | ||||||
|  |     corpus_follower_associations = db.relationship( | ||||||
|  |         'CorpusFollowerAssociation', | ||||||
|  |         back_populates='follower', | ||||||
|  |         cascade='all, delete-orphan' | ||||||
|  |     ) | ||||||
|  |     followed_corpora = association_proxy( | ||||||
|  |         'corpus_follower_associations', | ||||||
|  |         'corpus', | ||||||
|  |         creator=lambda c: CorpusFollowerAssociation(corpus=c) | ||||||
|  |     ) | ||||||
|  |     jobs = db.relationship( | ||||||
|  |         'Job', | ||||||
|  |         back_populates='user', | ||||||
|  |         cascade='all, delete-orphan', | ||||||
|  |         lazy='dynamic' | ||||||
|  |     ) | ||||||
|  |     role = db.relationship( | ||||||
|  |         'Role', | ||||||
|  |         back_populates='users' | ||||||
|  |     ) | ||||||
|  |     spacy_nlp_pipeline_models = db.relationship( | ||||||
|  |         'SpaCyNLPPipelineModel', | ||||||
|  |         back_populates='user', | ||||||
|  |         cascade='all, delete-orphan', | ||||||
|  |         lazy='dynamic' | ||||||
|  |     ) | ||||||
|  |     tesseract_ocr_pipeline_models = db.relationship( | ||||||
|  |         'TesseractOCRPipelineModel', | ||||||
|  |         back_populates='user', | ||||||
|  |         cascade='all, delete-orphan', | ||||||
|  |         lazy='dynamic' | ||||||
|  |     ) | ||||||
|  |     tokens = db.relationship( | ||||||
|  |         'Token', | ||||||
|  |         back_populates='user', | ||||||
|  |         cascade='all, delete-orphan', | ||||||
|  |         lazy='dynamic' | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     def __init__(self, **kwargs): | ||||||
|  |         if 'role' not in kwargs: | ||||||
|  |             kwargs['role'] = ( | ||||||
|  |                 Role.query.filter_by(name='Administrator').first() | ||||||
|  |                 if kwargs['email'] == current_app.config['NOPAQUE_ADMIN'] | ||||||
|  |                 else Role.query.filter_by(default=True).first() | ||||||
|  |             ) | ||||||
|  |         super().__init__(**kwargs) | ||||||
|  |  | ||||||
|  |     def __repr__(self): | ||||||
|  |         return f'<User {self.username}>' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def jsonpatch_path(self): | ||||||
|  |         return f'/users/{self.hashid}' | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def password(self): | ||||||
|  |         raise AttributeError('password is not a readable attribute') | ||||||
|  |  | ||||||
|  |     @password.setter | ||||||
|  |     def password(self, password): | ||||||
|  |         self.password_hash = generate_password_hash(password) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def path(self) -> Path: | ||||||
|  |         return current_app.config.get('NOPAQUE_DATA_DIR') / 'users' / f'{self.id}' | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def create(**kwargs): | ||||||
|  |         user = User(**kwargs) | ||||||
|  |         db.session.add(user) | ||||||
|  |         db.session.flush(objects=[user]) | ||||||
|  |         db.session.refresh(user) | ||||||
|  |         user_spacy_nlp_pipeline_models_dir = user.path / 'spacy_nlp_pipeline_models' | ||||||
|  |         user_tesseract_ocr_pipeline_models_dir = user.path / 'tesseract_ocr_pipeline_models' | ||||||
|  |         user_corpora_dir = user.path / 'corpora' | ||||||
|  |         user_jobs_dir = user.path / 'jobs' | ||||||
|  |         try: | ||||||
|  |             user.path.mkdir() | ||||||
|  |             user_spacy_nlp_pipeline_models_dir.mkdir() | ||||||
|  |             user_tesseract_ocr_pipeline_models_dir.mkdir() | ||||||
|  |             user_corpora_dir.mkdir() | ||||||
|  |             user_jobs_dir.mkdir() | ||||||
|  |         except OSError as e: | ||||||
|  |             # TODO: Potential leftover cleanup | ||||||
|  |             current_app.logger.error(e) | ||||||
|  |             db.session.rollback() | ||||||
|  |             raise | ||||||
|  |         return user | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def insert_defaults(): | ||||||
|  |         nopaque_user = User.query.filter_by(username='nopaque').first() | ||||||
|  |         system_user_role = Role.query.filter_by(name='System user').first() | ||||||
|  |         if nopaque_user is None: | ||||||
|  |             nopaque_user = User.create( | ||||||
|  |                 username='nopaque', | ||||||
|  |                 role=system_user_role | ||||||
|  |             ) | ||||||
|  |             db.session.add(nopaque_user) | ||||||
|  |         elif nopaque_user.role != system_user_role: | ||||||
|  |             nopaque_user.role = system_user_role | ||||||
|  |         db.session.commit() | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def reset_password(token, new_password): | ||||||
|  |         try: | ||||||
|  |             payload = jwt.decode( | ||||||
|  |                 token, | ||||||
|  |                 current_app.config['SECRET_KEY'], | ||||||
|  |                 algorithms=['HS256'], | ||||||
|  |                 issuer=current_app.config['SERVER_NAME'], | ||||||
|  |                 options={'require': ['exp', 'iat', 'iss', 'purpose', 'sub']} | ||||||
|  |             ) | ||||||
|  |         except jwt.PyJWTError: | ||||||
|  |             return False | ||||||
|  |         if payload.get('purpose') != 'User.reset_password': | ||||||
|  |             return False | ||||||
|  |         user_hashid = payload.get('sub') | ||||||
|  |         user_id = hashids.decode(user_hashid) | ||||||
|  |         user = User.query.get(user_id) | ||||||
|  |         if user is None: | ||||||
|  |             return False | ||||||
|  |         user.password = new_password | ||||||
|  |         db.session.add(user) | ||||||
|  |         return True | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def verify_access_token(access_token, refresh_token=None): | ||||||
|  |         token = Token.query.filter(Token.access_token == access_token).first() | ||||||
|  |         if token is not None: | ||||||
|  |             if token.access_expiration > datetime.utcnow(): | ||||||
|  |                 token.user.ping() | ||||||
|  |                 db.session.commit() | ||||||
|  |                 if token.user.role.name != 'System user': | ||||||
|  |                     return token.user | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def verify_refresh_token(refresh_token, access_token): | ||||||
|  |         token = Token.query.filter((Token.refresh_token == refresh_token) & (Token.access_token == access_token)).first() | ||||||
|  |         if token is not None: | ||||||
|  |             if token.refresh_expiration > datetime.utcnow(): | ||||||
|  |                 return token | ||||||
|  |             # someone tried to refresh with an expired token | ||||||
|  |             # revoke all tokens from this user as a precaution | ||||||
|  |             token.user.revoke_auth_tokens() | ||||||
|  |             db.session.commit() | ||||||
|  |  | ||||||
|  |     def can(self, permission): | ||||||
|  |         return self.role is not None and self.role.has_permission(permission) | ||||||
|  |  | ||||||
|  |     def confirm(self, confirmation_token): | ||||||
|  |         try: | ||||||
|  |             payload = jwt.decode( | ||||||
|  |                 confirmation_token, | ||||||
|  |                 current_app.config['SECRET_KEY'], | ||||||
|  |                 algorithms=['HS256'], | ||||||
|  |                 issuer=current_app.config['SERVER_NAME'], | ||||||
|  |                 options={'require': ['exp', 'iat', 'iss', 'purpose', 'sub']} | ||||||
|  |             ) | ||||||
|  |         except jwt.PyJWTError: | ||||||
|  |             return False | ||||||
|  |         if payload.get('purpose') != 'user.confirm': | ||||||
|  |             return False | ||||||
|  |         if payload.get('sub') != self.hashid: | ||||||
|  |             return False | ||||||
|  |         self.confirmed = True | ||||||
|  |         db.session.add(self) | ||||||
|  |         return True | ||||||
|  |  | ||||||
|  |     def delete(self): | ||||||
|  |         shutil.rmtree(self.path, ignore_errors=True) | ||||||
|  |         db.session.delete(self) | ||||||
|  |  | ||||||
|  |     def generate_auth_token(self): | ||||||
|  |         return Token( | ||||||
|  |             access_token=secrets.token_urlsafe(), | ||||||
|  |             access_expiration=datetime.utcnow() + timedelta(minutes=15), | ||||||
|  |             refresh_token=secrets.token_urlsafe(), | ||||||
|  |             refresh_expiration=datetime.utcnow() + timedelta(days=7), | ||||||
|  |             user=self | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def generate_confirm_token(self, expiration=3600): | ||||||
|  |         now = datetime.utcnow() | ||||||
|  |         payload = { | ||||||
|  |             'exp': now + timedelta(seconds=expiration), | ||||||
|  |             'iat': now, | ||||||
|  |             'iss': current_app.config['SERVER_NAME'], | ||||||
|  |             'purpose': 'user.confirm', | ||||||
|  |             'sub': self.hashid | ||||||
|  |         } | ||||||
|  |         return jwt.encode( | ||||||
|  |             payload, | ||||||
|  |             current_app.config['SECRET_KEY'], | ||||||
|  |             algorithm='HS256' | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def generate_reset_password_token(self, expiration=3600): | ||||||
|  |         now = datetime.utcnow() | ||||||
|  |         payload = { | ||||||
|  |             'exp': now + timedelta(seconds=expiration), | ||||||
|  |             'iat': now, | ||||||
|  |             'iss': current_app.config['SERVER_NAME'], | ||||||
|  |             'purpose': 'User.reset_password', | ||||||
|  |             'sub': self.hashid | ||||||
|  |         } | ||||||
|  |         return jwt.encode( | ||||||
|  |             payload, | ||||||
|  |             current_app.config['SECRET_KEY'], | ||||||
|  |             algorithm='HS256' | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def is_administrator(self): | ||||||
|  |         return self.can(Permission.ADMINISTRATE) | ||||||
|  |  | ||||||
|  |     def ping(self): | ||||||
|  |         self.last_seen = datetime.utcnow() | ||||||
|  |  | ||||||
|  |     def revoke_auth_tokens(self): | ||||||
|  |         for token in self.tokens: | ||||||
|  |             db.session.delete(token) | ||||||
|  |  | ||||||
|  |     def verify_password(self, password): | ||||||
|  |         if self.role.name == 'System user': | ||||||
|  |             return False | ||||||
|  |         return check_password_hash(self.password_hash, password) | ||||||
|  |  | ||||||
|  |     #region Profile Privacy settings | ||||||
|  |     def has_profile_privacy_setting(self, setting): | ||||||
|  |         s = ProfilePrivacySettings.get(setting) | ||||||
|  |         return self.profile_privacy_settings & s.value == s.value | ||||||
|  |      | ||||||
|  |     def add_profile_privacy_setting(self, setting): | ||||||
|  |         s = ProfilePrivacySettings.get(setting) | ||||||
|  |         if not self.has_profile_privacy_setting(s): | ||||||
|  |             self.profile_privacy_settings += s.value | ||||||
|  |  | ||||||
|  |     def remove_profile_privacy_setting(self, setting): | ||||||
|  |         s = ProfilePrivacySettings.get(setting) | ||||||
|  |         if self.has_profile_privacy_setting(s): | ||||||
|  |             self.profile_privacy_settings -= s.value | ||||||
|  |  | ||||||
|  |     def reset_profile_privacy_settings(self): | ||||||
|  |         self.profile_privacy_settings = 0 | ||||||
|  |     #endregion Profile Privacy settings | ||||||
|  |  | ||||||
|  |     def follow_corpus(self, corpus, role=None): | ||||||
|  |         if role is None: | ||||||
|  |             cfr = CorpusFollowerRole.query.filter_by(default=True).first() | ||||||
|  |         else: | ||||||
|  |             cfr = role | ||||||
|  |         if self.is_following_corpus(corpus): | ||||||
|  |             cfa = CorpusFollowerAssociation.query.filter_by(corpus=corpus, follower=self).first() | ||||||
|  |             if cfa.role != cfr: | ||||||
|  |                 cfa.role = cfr | ||||||
|  |         else: | ||||||
|  |             cfa = CorpusFollowerAssociation(corpus=corpus, role=cfr, follower=self) | ||||||
|  |             db.session.add(cfa) | ||||||
|  |  | ||||||
|  |     def unfollow_corpus(self, corpus): | ||||||
|  |         if not self.is_following_corpus(corpus): | ||||||
|  |             return | ||||||
|  |         self.followed_corpora.remove(corpus) | ||||||
|  |  | ||||||
|  |     def is_following_corpus(self, corpus): | ||||||
|  |         return corpus in self.followed_corpora | ||||||
|  |      | ||||||
|  |     def generate_follow_corpus_token(self, corpus_hashid, role_name, expiration=7): | ||||||
|  |         now = datetime.utcnow() | ||||||
|  |         payload = { | ||||||
|  |             'exp': expiration, | ||||||
|  |             'iat': now, | ||||||
|  |             'iss': current_app.config['SERVER_NAME'], | ||||||
|  |             'purpose': 'User.follow_corpus', | ||||||
|  |             'role_name': role_name, | ||||||
|  |             'sub': corpus_hashid | ||||||
|  |         } | ||||||
|  |         return jwt.encode( | ||||||
|  |             payload, | ||||||
|  |             current_app.config['SECRET_KEY'], | ||||||
|  |             algorithm='HS256' | ||||||
|  |         ) | ||||||
|  |      | ||||||
|  |     def follow_corpus_by_token(self, token): | ||||||
|  |         try: | ||||||
|  |             payload = jwt.decode( | ||||||
|  |                 token, | ||||||
|  |                 current_app.config['SECRET_KEY'], | ||||||
|  |                 algorithms=['HS256'], | ||||||
|  |                 issuer=current_app.config['SERVER_NAME'], | ||||||
|  |                 options={'require': ['exp', 'iat', 'iss', 'purpose', 'role_name', 'sub']} | ||||||
|  |             ) | ||||||
|  |         except jwt.PyJWTError: | ||||||
|  |             return False | ||||||
|  |         if payload.get('purpose') != 'User.follow_corpus': | ||||||
|  |             return False | ||||||
|  |         corpus_hashid = payload.get('sub') | ||||||
|  |         corpus_id = hashids.decode(corpus_hashid) | ||||||
|  |         corpus = Corpus.query.get_or_404(corpus_id) | ||||||
|  |         if corpus is None: | ||||||
|  |             return False | ||||||
|  |         role_name = payload.get('role_name') | ||||||
|  |         role = CorpusFollowerRole.query.filter_by(name=role_name).first() | ||||||
|  |         if role is None: | ||||||
|  |             return False | ||||||
|  |         self.follow_corpus(corpus, role) | ||||||
|  |         # db.session.add(self) | ||||||
|  |         return True | ||||||
|  |  | ||||||
|  |     def to_json_serializeable(self, backrefs=False, relationships=False, filter_by_privacy_settings=False): | ||||||
|  |         json_serializeable = { | ||||||
|  |             'id': self.hashid, | ||||||
|  |             'confirmed': self.confirmed, | ||||||
|  |             'avatar': url_for('users.user_avatar', user_id=self.id), | ||||||
|  |             'email': self.email, | ||||||
|  |             'last_seen': ( | ||||||
|  |                 None if self.last_seen is None | ||||||
|  |                 else f'{self.last_seen.isoformat()}Z' | ||||||
|  |             ), | ||||||
|  |             'member_since': f'{self.member_since.isoformat()}Z', | ||||||
|  |             'username': self.username, | ||||||
|  |             'full_name': self.full_name, | ||||||
|  |             'about_me': self.about_me, | ||||||
|  |             'website': self.website, | ||||||
|  |             'location': self.location, | ||||||
|  |             'organization': self.organization, | ||||||
|  |             'job_status_mail_notification_level': \ | ||||||
|  |                     self.setting_job_status_mail_notification_level.name, | ||||||
|  |             'profile_privacy_settings': { | ||||||
|  |                 'is_public': self.is_public, | ||||||
|  |                 'show_email': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL), | ||||||
|  |                 'show_last_seen': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_LAST_SEEN), | ||||||
|  |                 'show_member_since': self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_MEMBER_SINCE) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         if backrefs: | ||||||
|  |             json_serializeable['role'] = \ | ||||||
|  |                 self.role.to_json_serializeable(backrefs=True) | ||||||
|  |         if relationships: | ||||||
|  |             json_serializeable['corpus_follower_associations'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable() | ||||||
|  |                 for x in self.corpus_follower_associations | ||||||
|  |             } | ||||||
|  |             json_serializeable['corpora'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable(relationships=True) | ||||||
|  |                 for x in self.corpora | ||||||
|  |             } | ||||||
|  |             json_serializeable['jobs'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable(relationships=True) | ||||||
|  |                 for x in self.jobs | ||||||
|  |             } | ||||||
|  |             json_serializeable['tesseract_ocr_pipeline_models'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable(relationships=True) | ||||||
|  |                 for x in self.tesseract_ocr_pipeline_models | ||||||
|  |             } | ||||||
|  |             json_serializeable['spacy_nlp_pipeline_models'] = { | ||||||
|  |                 x.hashid: x.to_json_serializeable(relationships=True) | ||||||
|  |                 for x in self.spacy_nlp_pipeline_models | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |         if filter_by_privacy_settings: | ||||||
|  |             if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_EMAIL): | ||||||
|  |                 json_serializeable.pop('email') | ||||||
|  |             if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_LAST_SEEN): | ||||||
|  |                 json_serializeable.pop('last_seen') | ||||||
|  |             if not self.has_profile_privacy_setting(ProfilePrivacySettings.SHOW_MEMBER_SINCE): | ||||||
|  |                 json_serializeable.pop('member_since') | ||||||
|  |         return json_serializeable | ||||||
| @@ -1,12 +1,11 @@ | |||||||
| from flask import Blueprint | from flask import Blueprint | ||||||
| from flask_login import login_required | from flask_login import login_required | ||||||
| import os | from pathlib import Path | ||||||
| import yaml | import yaml | ||||||
|  |  | ||||||
|  |  | ||||||
| services_file = \ | services_file = Path(__file__).parent / 'services.yml' | ||||||
|     os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml') | with services_file.open('r') as f: | ||||||
| with open(services_file, 'r') as f: |  | ||||||
|     SERVICES = yaml.safe_load(f) |     SERVICES = yaml.safe_load(f) | ||||||
|  |  | ||||||
| bp = Blueprint('services', __name__) | bp = Blueprint('services', __name__) | ||||||
|   | |||||||
| @@ -59,3 +59,8 @@ spacy-nlp-pipeline: | |||||||
|         - 'encoding_detection' |         - 'encoding_detection' | ||||||
|       publishing_year: 2022 |       publishing_year: 2022 | ||||||
|       url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1' |       url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1' | ||||||
|  |     0.1.2: | ||||||
|  |       methods: | ||||||
|  |         - 'encoding_detection' | ||||||
|  |       publishing_year: 2024 | ||||||
|  |       url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2' | ||||||
|   | |||||||
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 34 KiB After Width: | Height: | Size: 30 KiB | 
| @@ -1,34 +1,9 @@ | |||||||
| <h3 class="manual-chapter-title">Introduction</h3> | <h3 class="manual-chapter-title">Introduction</h3> | ||||||
| <h4>Introduction</h4> |  | ||||||
| <p> | <p> | ||||||
|   Nopaque is a web application that offers different services and tools to support  |   nopaque is a web-based digital working environment. It implements a | ||||||
|   researchers working with image and text-based data. These services are logically  |   workflow based on the research process in the humanities and supports its | ||||||
|   connected and build upon each other. They include: |   users in processing their data in order to subsequently apply digital | ||||||
|  |   analysis methods to them. All processes are implemented in a specially | ||||||
|  |   provided cloud environment with established open source software. This | ||||||
|  |   always ensures that no personal data of the users is disclosed. | ||||||
| </p> | </p> | ||||||
|  <ol style="list-style-type:disc; margin-left:2em; padding-bottom:0;"> |  | ||||||
|   <li><b>File setup</b>, which converts and merges different data  (e.g., books, letters)  |  | ||||||
|   for further processing.</li> |  | ||||||
|   <li><b>Image-to-text conversion tools:</b></li> |  | ||||||
|     <ol style="list-style-type:circle; margin-left:1em; padding-bottom:0;"><li><b>Optical Character Recognition</b> converts photos and  |  | ||||||
|     scans into text data, making them machine-readable.</li> |  | ||||||
|     <li><b>Transkribus HTR (Handwritten Text Recognition) Pipeline</b> (currently deactivated)*  |  | ||||||
|     also converts images into text data, making them machine-readable.</li> |  | ||||||
|     </ol> |  | ||||||
|   <li><b>Natural Language Processing</b> extracts information from your text via  |  | ||||||
|   computational linguistic data processing (tokenization, lemmatization, part-of-speech  |  | ||||||
|   tagging and named-entity recognition.</li> |  | ||||||
|   <li><b>Corpus analysis</b> makes use of CQP Query Language to search through text  |  | ||||||
|   corpora with the aid of metadata and Natural Language Processing tags.</li> |  | ||||||
|  </ol> |  | ||||||
|  |  | ||||||
| Nopaque also features a <b>Social Area</b>, where researchers can create a personal profile, connect with other users and share corpora if desired. |  | ||||||
| These services can be accessed from the sidebar in nopaque. |  | ||||||
| All processes are implemented in a specially provided cloud environment with established open-source software.  |  | ||||||
| This always ensures that no personal data of the users is disclosed. |  | ||||||
| <p> |  | ||||||
| *Note: the Transkribus HTR Pipeline is currently  |  | ||||||
| deactivated; we are working on an alternative solution. You can try using Tesseract OCR,  |  | ||||||
| though the results will likely be poor. |  | ||||||
| </p> |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,104 +0,0 @@ | |||||||
| <h3 class="manual-chapter-title">Getting Started</h3> |  | ||||||
| <h4>Getting Started</h4> |  | ||||||
| <p> |  | ||||||
| In this section, we will take you through all the steps you need to start analyzing your data with nopaque. |  | ||||||
| </p> |  | ||||||
|  |  | ||||||
| <div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;"> |  | ||||||
|   <h5>Content</h5> |  | ||||||
|   <ol style="list-style-type:disc"> |  | ||||||
|     <li><a href="#registration-and-login">Registration and login</a></li> |  | ||||||
|     <li><a href="#preparing-files">Preparing files for analysis</a></li> |  | ||||||
|     <li><a href="#converting-a-pdf-into-text">Converting a PDF into text data</a></li> |  | ||||||
|     <li><a href="#extracting-linguistic-data">Extracting linguistic data from text</a></li> |  | ||||||
|     <li><a href="#creating-a-corpus">Creating a corpus</a></li> |  | ||||||
|     <li><a href="#analyzing-a-corpus">Analyzing a corpus</a></li> |  | ||||||
|   </ol> |  | ||||||
| </div> |  | ||||||
|  |  | ||||||
| <p></p> |  | ||||||
|  |  | ||||||
| <h5 id="registration-and-login">Registration and login</h5> |  | ||||||
| <p>Before you can begin using nopaque, you will need to create a personal user account.  |  | ||||||
| Open the menu (three dots) at the top right of the screen and choose “Register.” Enter  |  | ||||||
| the required details listed on the registration page (username, password, email address).  |  | ||||||
| After verifying your account via the link sent to your email, you can log in.</p> |  | ||||||
|  |  | ||||||
| <h5 id="preparing-files">Preparing files for analysis</h5> |  | ||||||
| <p>A few steps need to be taken before images, scans, or other text data are ready for  |  | ||||||
| analysis in nopaque. The SpaCy NLP Pipeline service can only extract linguistic data  |  | ||||||
| from texts in plain text (.txt) format. If your text is already in this format, you  |  | ||||||
| can skip the next steps and go directly to <b>Extracting linguistic data from text</b>.  |  | ||||||
| Otherwise, the next steps assume that you are starting off with image data.</p> |  | ||||||
| <p> |  | ||||||
| First, all data needs to be converted into PDF format. Using the <b>File Setup</b> service,  |  | ||||||
| you can bundle images together – even of different formats – and convert them all into  |  | ||||||
| one PDF file. Note that the File Setup service will sort the images based on their file  |  | ||||||
| name in ascending order. It is thus recommended to name them accordingly, for example:  |  | ||||||
| page-01.png, page-02.jpg, page-03.tiff. |  | ||||||
| </p> |  | ||||||
| <p> |  | ||||||
| Add a title and description to your job and select the File Setup version* you want to use. |  | ||||||
| After uploading the images and completing the File Setup job, the list of files added  |  | ||||||
| can be seen under “Inputs.” Further below, under “Results,” you can find and download  |  | ||||||
| the PDF output.</p> |  | ||||||
|  |  | ||||||
| <h5 id="converting-a-pdf-into-text">Converting a PDF into text data</h5> |  | ||||||
| <p>Select an image-to-text conversion tool depending on whether your PDF is primarily  |  | ||||||
| composed of handwritten text or printed text. For printed text, select the <b>Tesseract OCR  |  | ||||||
| Pipeline</b>. For handwritten text, select the <b>Transkribus HTR Pipeline</b>. Select the desired  |  | ||||||
| language model or upload your own. Select the version* of Tesseract OCR you want to use  |  | ||||||
| and click on submit to start the conversion. When the job is finished, various output  |  | ||||||
| files can be seen and downloaded further below, under “Results.” You may want to review  |  | ||||||
| the text output for errors and coherence. (Note: the Transkribus HTR Pipeline is currently  |  | ||||||
| deactivated; we are working on an alternative solution. You can try using Tesseract OCR,  |  | ||||||
| though the results will likely be poor.) |  | ||||||
| </p> |  | ||||||
|  |  | ||||||
| <h5 id="extracting-linguistic-data">Extracting linguistic data from text</h5> |  | ||||||
| <p>The <b>SpaCy NLP Pipeline</b> service extracts linguistic information from plain text files  |  | ||||||
| (in .txt format). Select the corresponding .txt file, the language model, and the  |  | ||||||
| version* you want to use. When the job is finished, find and download the files in  |  | ||||||
| <b>.json</b> and <b>.vrt</b> format under “Results.”</p> |  | ||||||
|  |  | ||||||
| <h5 id="creating-a-corpus">Creating a corpus</h5> |  | ||||||
| <p>Now, using the files in .vrt format, you can create a corpus. This can be done  |  | ||||||
| in the <a href="{{ url_for('main.dashboard') }}">Dashboard</a> or  |  | ||||||
| <a href="{{ url_for('services.corpus_analysis') }}">Corpus Analysis</a> sections under “My Corpora.” Click on “Create corpus”  |  | ||||||
| and add a title and description for your corpus. After submitting, you will automatically  |  | ||||||
| be taken to the corpus overview page (which can be called up again via the corpus lists)  |  | ||||||
| of your new, still empty corpus. </p> |  | ||||||
| <p> |  | ||||||
| Further down in the “Corpus files” section, you can add texts in .vrt format  |  | ||||||
| (results of the NLP service) to your new corpus. To do this, use the "Add Corpus File"  |  | ||||||
| button and fill in the form that appears. Here, you can add  |  | ||||||
| metadata to each text. After adding all texts to the corpus, it must  |  | ||||||
| be prepared for analysis. This process can be initiated by clicking on the  |  | ||||||
| "Build" button under "Actions".  |  | ||||||
| On the corpus overview page, you can see information about the current status of  |  | ||||||
| the corpus in the upper right corner. After the build process, the status "built" should be shown here. |  | ||||||
| Now, your corpus is ready for analysis.</p> |  | ||||||
|  |  | ||||||
| <h5 id="analyzing-a-corpus">Analyzing a corpus</h5> |  | ||||||
| <p>Navigate to the corpus you would like to analyze and click on the Analyze button.  |  | ||||||
| This will take you to an analysis overview page for your corpus. Here, you can find a  |  | ||||||
| visualization of general linguistic information of your corpus, including tokens,  |  | ||||||
| sentences, unique words, unique lemmas, unique parts of speech and unique simple parts  |  | ||||||
| of speech. You will also find a pie chart of the proportional textual makeup of your  |  | ||||||
| corpus and can view the linguistic information for each individual text file. A more  |  | ||||||
| detailed visualization of token frequencies with a search option is also on this page.</p> |  | ||||||
| <p>From the corpus analysis overview page, you can navigate to other analysis modules:  |  | ||||||
| the <b>Query Builder</b> (under <b>Concordance</b>) and the <b>Reader</b>. With the Reader, you can read  |  | ||||||
| your corpus texts tokenized with the associated linguistic information. The tokens can  |  | ||||||
| be shown as lemmas, parts of speech, words, and can be displayed in different ways:  |  | ||||||
| visually as plain text with the option of highlighted entities or as chips.</p> |  | ||||||
| <p>The <b>Concordance</b> module allows for more specific, query-oriented text analyses.  |  | ||||||
| Here, you can filter out text parameters and structural attributes in different  |  | ||||||
| combinations. This is explained in more detail in the Query Builder section of the  |  | ||||||
| manual.</p> |  | ||||||
|  |  | ||||||
| <br> |  | ||||||
| <br> |  | ||||||
| *For all services, it is recommended to use the latest version unless you need a model  |  | ||||||
| only available in an earlier version or are looking to reproduce data that was originally generated  |  | ||||||
| using an older version. |  | ||||||
| @@ -0,0 +1,18 @@ | |||||||
|  | <h3 class="manual-chapter-title">Registration and Log in</h3> | ||||||
|  | <div class="row"> | ||||||
|  |   <div class="col s12 m4"> | ||||||
|  |     <img alt="Registration and Log in" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/registration-and-log-in.png') }}"> | ||||||
|  |   </div> | ||||||
|  |   <div class="col s12 m8"> | ||||||
|  |     <p> | ||||||
|  |       Before you can start using the web platform, you need to create a user | ||||||
|  |       account. This requires only a few details: just a user name, an e-mail | ||||||
|  |       address and a password are needed. In order to register yourself, fill out | ||||||
|  |       the form on the <a href="{{ url_for('auth.register') }}">registration page</a>. After successful registration, the | ||||||
|  |       created account must be verified. To do this, follow the instructions | ||||||
|  |       given in the automatically sent e-mail. Afterwards, you can log in as | ||||||
|  |       usual with your username/email address and password in the log-in form | ||||||
|  |       located next to the registration button. | ||||||
|  |     </p> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
| @@ -1,22 +1,15 @@ | |||||||
| <h3 class="manual-chapter-title">Dashboard</h3> | <h3 class="manual-chapter-title">Dashboard</h3> | ||||||
| <h4>About the dashboard</h4> |  | ||||||
| <br> |  | ||||||
| <div class="row"> | <div class="row"> | ||||||
|   <div class="col s12 m4"> |   <div class="col s12 m4"> | ||||||
|     <img alt="Dashboard" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/dashboard.png') }}"> |     <img alt="Dashboard" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/dashboard.png') }}"> | ||||||
|   </div> |   </div> | ||||||
|   <div class="col s12 m8"> |   <div class="col s12 m8"> | ||||||
|     <p> |     <p> | ||||||
|       The <a href="{{ url_for('main.dashboard') }}">dashboard</a> provides a central  |       The <a href="{{ url_for('main.dashboard') }}">dashboard</a> provides a central overview of all resources assigned to the | ||||||
|       overview of all user-specific resources. |       user. These are <a href="{{ url_for('main.dashboard', _anchor='corpora') }}">corpora</a> and created <a href="{{ url_for('main.dashboard', _anchor='jobs') }}">jobs</a>. Corpora are freely composable | ||||||
|       These are <a href="{{ url_for('main.dashboard', _anchor='corpora') }}">corpora</a>,  |       annotated text collections and jobs are the initiated file processing | ||||||
|       created <a href="{{ url_for('main.dashboard', _anchor='jobs') }}">jobs</a>, and  |       procedures. One can search for jobs as well as corpus listings using | ||||||
|       model <a href="{{ url_for('main.dashboard', _anchor='contributions') }}"">contributions</a>.  |       the search field displayed above them. | ||||||
|       A <b>corpus</b> is a freely composable annotated text collection.  |  | ||||||
|       A <b>job</b> is an initiated file processing procedure.  |  | ||||||
|       A <b>model</b> is a mathematical system for pattern recognition based on data examples that have been processed by AI. One can search for jobs as  |  | ||||||
|       well as corpus listings using the search field displayed above them on the dashboard.  |  | ||||||
|       Uploaded models can be found and edited by clicking on the corresponding service under <b>My Contributions</b>.       |  | ||||||
|     </p> |     </p> | ||||||
|   </div> |   </div> | ||||||
|   <div class="col s12"> </div> |   <div class="col s12"> </div> | ||||||
|   | |||||||
| @@ -1,107 +1,52 @@ | |||||||
| <h3 class="manual-chapter-title">Services</h5> | <h3 class="manual-chapter-title">Services</h5> | ||||||
| <h4>Services</h4> |  | ||||||
| <p> |  | ||||||
| In this section, we will describe the different services nopaque has to offer. |  | ||||||
| </p> |  | ||||||
|  |  | ||||||
| <div class="row"> | <div class="row"> | ||||||
|   <div class="col s12 m4"> |   <div class="col s12 m4"> | ||||||
|     <img alt="Services" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/services.png') }}"> |     <img alt="Services" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/services.png') }}"> | ||||||
|   </div> |   </div> | ||||||
|   <div class="col s12 m8"> |   <div class="col s12 m8"> | ||||||
|     <p> |     <p> | ||||||
|       Nopaque was designed to be modular. Its modules are implemented in  |       nopaque was designed from the ground up to be modular. This modularity | ||||||
|       self-contained <b>services</b>, each of which represents a step in the  |       means that the offered workflow provides variable entry and exit points, | ||||||
|       workflow. The typical workflow involves using services one after another,  |       so that different starting points and goals can be flexibly addressed. | ||||||
|       consecutively. |       Each of these modules are implemented in a self-contained service, each of | ||||||
|       The typical workflow order can be taken from the listing of the  |       which represents a step in the workflow. The services are coordinated in | ||||||
|       services in the left sidebar or from the nopaque manual (accessible via the pink  |       such a way that they can be used consecutively. The order can either be | ||||||
|       button in the upper right corner).  |       taken from the listing of the services in the left sidebar or from the | ||||||
|       The services can also be applied at different starting and ending points,  |       roadmap (accessible via the pink compass in the upper right corner). All | ||||||
|       which allows you to conduct your work flexibly. |       services are versioned, so the data generated with nopaque is always | ||||||
|       All services are versioned, so the data generated with nopaque is always |  | ||||||
|       reproducible. |       reproducible. | ||||||
|        |  | ||||||
|       <p>For all services, it is recommended to use the latest version (selected  |  | ||||||
|       in the drop-down menu on the service page) unless you need a model  |  | ||||||
|       only available in an earlier version or are looking to reproduce data that was originally generated  |  | ||||||
|       using an older version.</p> |  | ||||||
|     </p> |     </p> | ||||||
|   </div> |   </div> | ||||||
| </div> | </div> | ||||||
|  |  | ||||||
|  | <h4 class="manual-chapter-title">File Setup</h4> | ||||||
|  |  | ||||||
| <h4>File Setup</h4> |  | ||||||
| <p> | <p> | ||||||
|   The <a href="{{ url_for('services.file_setup_pipeline') }}">File Setup Service</a> bundles image data, such as scans and photos, |   The <a href="{{ url_for('services.file_setup_pipeline') }}">File Setup Service</a> bundles image data, such as scans and photos, | ||||||
|   together in a handy PDF file. To use this service, use the job form to |   together in a handy PDF file. To use this service, use the job form to | ||||||
|   select the images to be bundled, choose the desired service version, and |   select the images to be bundled, choose the desired service version, and | ||||||
|   specify a title and description. |   specify a title and description. Please note that the service sorts the | ||||||
|   Note that the File Setup service will sort the images based on their file name in  |   images into the resulting PDF file based on the file names. So naming the | ||||||
|   ascending order. It is thus important and highly recommended to name  |   images correctly is of great importance. It has proven to be a good practice | ||||||
|   them accordingly, for example:  |   to name the files according to the following scheme: | ||||||
|   page-01.png, page-02.jpg, page-03.tiff. Generally, you can assume |   page-01.png, page-02.jpg, page-03.tiff, etc. In general, you can assume | ||||||
|   that the images will be sorted in the order in which the file explorer of |   that the images will be sorted in the order in which the file explorer of | ||||||
|   your operating system lists them when you view the files in a folder |   your operating system lists them when you view the files in a folder | ||||||
|   sorted in ascending order by file name. |   sorted in ascending order by file name. | ||||||
| </p> | </p> | ||||||
|  |  | ||||||
| <h4>Optical Character Recognition (OCR)</h4> | <h4>Optical Character Recognition (OCR)</h4> | ||||||
| <p> | <p>Coming soon...</p> | ||||||
|   The <a href="{{ url_for('services.tesseract_ocr_pipeline') }}">Tesseract OCR Pipeline</a>  |  | ||||||
|   converts image data - like photos and scans - into text data, making them machine-readable.  |  | ||||||
|   This step enables you to proceed with the computational analysis of your documents.  |  | ||||||
|   To use this service, use the job form to select the file you want to convert into text data.  |  | ||||||
|   Then, choose the language model and service version you would like to use. Enter a title and description for your file and then  |  | ||||||
|   submit your job. Once the job is finished, the results can be found and downloaded further below on the page, under  |  | ||||||
|   the section labeled "Inputs." |  | ||||||
|  |  | ||||||
| </p> |  | ||||||
|  |  | ||||||
| <h4>Handwritten Text Recognition (HTR)</h4> | <h4>Handwritten Text Recognition (HTR)</h4> | ||||||
| <p>The Transkribus HTR Pipeline is currently  | <p>Coming soon...</p> | ||||||
| deactivated. We are working on an alternative solution. In the meantime, you can  |  | ||||||
| try using Tesseract OCR, though the results will likely be poor.</p> |  | ||||||
|  |  | ||||||
| <h4>Natural Language Processing (NLP)</h4> | <h4>Natural Language Processing (NLP)</h4> | ||||||
| <p>The <a href="{{ url_for('services.spacy_nlp_pipeline') }}">SpaCy NLP Pipeline</a> extracts  | <p>Coming soon...</p> | ||||||
| information from plain text files (.txt format) via computational linguistic data processing  |  | ||||||
| (tokenization, lemmatization, part-of-speech tagging and named-entity recognition).  |  | ||||||
| To use this service, select the .txt file that you want to extract this information from.  |  | ||||||
| Then select the language model and the version you want to use. Once the job is finished, you can find and download the files in  |  | ||||||
| <b>.json</b> and <b>.vrt</b> format under the section labeled “Results.”</p> |  | ||||||
|  |  | ||||||
| <h4>Corpus Analysis</h4> | <h4>Corpus Analysis</h4> | ||||||
| <p> | <p> | ||||||
|   With the <a href="{{ url_for('services.corpus_analysis') }}">Corpus Analysis</a>  |   With the corpus analysis service, it is possible to create a text corpus | ||||||
|   service, it is possible to create a text corpus |   and then explore it in an analysis session. The analysis session is realized | ||||||
|   and then explore through it with analytical tools. The analysis session is realized |  | ||||||
|   on the server side by the Open Corpus Workbench software, which enables |   on the server side by the Open Corpus Workbench software, which enables | ||||||
|   efficient and complex searches with the help of the CQP Query Language.</p> |   efficient and complex searches with the help of the CQP Query Language. | ||||||
|   <p> |  | ||||||
|   To use this service, navigate to the corpus you would like to analyze and click on the Analyze button.  |  | ||||||
|   This will take you to an analysis overview page for your corpus. Here, you can find  |  | ||||||
|   a visualization of general linguistic information of your corpus, including tokens,  |  | ||||||
|   sentences, unique words, unique lemmas, unique parts of speech and unique simple  |  | ||||||
|   parts of speech. You will also find a pie chart of the proportional textual makeup  |  | ||||||
|   of your corpus and can view the linguistic information for each individual text file.  |  | ||||||
|   A more detailed visualization of token frequencies with a search option is also on  |  | ||||||
|   this page. |  | ||||||
|   </p> |  | ||||||
|   <p> |  | ||||||
|   From the corpus analysis overview page, you can navigate to other analysis modules:  |  | ||||||
|   the Query Builder (under Concordance) and the Reader.  |  | ||||||
|   </p> |  | ||||||
|   <p> |  | ||||||
|   With the <b>Reader</b>, you can read your corpus texts tokenized with the associated linguistic information. The tokens  |  | ||||||
|   can be shown as lemmas, parts of speech, words, and can be displayed in different  |  | ||||||
|   ways: visually as plain text with the option of highlighted entities or as chips. |  | ||||||
|   </p> |  | ||||||
|   <p> |  | ||||||
|   The Concordance module allows for more specific, query-oriented text analyses.  |  | ||||||
|   Here, you can filter out text parameters and structural attributes in different  |  | ||||||
|   combinations. This is explained in more detail in the <b>Query Builder</b> section of the  |  | ||||||
|   manual. |  | ||||||
|   </p> |  | ||||||
| </p> | </p> | ||||||
|   | |||||||
| @@ -1,22 +1,5 @@ | |||||||
| <h3 class="manual-chapter-title">CQP Query Language</h3> | <h3 class="manual-chapter-title">CQP Query Language</h3> | ||||||
| <h4 id="cqp-query-language">CQP Query Language</h4> | <p>Within the Corpus Query Language, a distinction is made between two types of annotations: positional attributes and structural attributes. Positional attributes refer to a token, e.g. the word "book" is assigned the part-of-speech tag "NN", the lemma "book" and the simplified part-of-speech tag "NOUN" within the token structure. Structural attributes refer to text structure-giving elements such as sentence and entity markup. For example, the markup of a sentence is represented in the background as follows:</p> | ||||||
| <p>In this section, we will provide some functional explanations of the properties of the Corpus Query Language. This includes  |  | ||||||
| the types of linguistic attributes one can work with and how to use them in your query.</p> |  | ||||||
|  |  | ||||||
| <div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;"> |  | ||||||
|   <h5>Content</h5> |  | ||||||
|   <ol style="list-style-type:disc"> |  | ||||||
|     <li><a href="#overview-annotations">Overview of annotation types</a></li> |  | ||||||
|     <li><a href="#positional-attributes">Positional attributes</a></li> |  | ||||||
|     <li><a href="#searching-positional-attributes">How to search for positional attributes</a></li> |  | ||||||
|     <li><a href="#structural-attributes">Structural attributes</a></li> |  | ||||||
|     <li><a href="#searching-structural-attributes">How to search for structural attributes</a></li> |  | ||||||
|  |  | ||||||
|   </ol> |  | ||||||
| </div> |  | ||||||
|  |  | ||||||
| <h4 id="overview-annotations">Overview of annotation types</h4> |  | ||||||
| <p>Within the Corpus Query Language, a distinction is made between two types of annotations: <b>positional attributes</b> and <b>structural attributes</b>. Positional attributes refer to a token, e.g. the word "book" is assigned the part-of-speech tag "NN", the lemma "book" and the simplified part-of-speech tag "NOUN" within the token structure. Structural attributes refer to text structure-giving elements such as sentence and entity markup. For example, the markup of a sentence is represented in the background as follows:</p> |  | ||||||
| <pre> | <pre> | ||||||
|   <code> |   <code> | ||||||
|     <span class="green-text"><s>                                     structural attribute</span> |     <span class="green-text"><s>                                     structural attribute</span> | ||||||
| @@ -30,7 +13,7 @@ the types of linguistic attributes one can work with and how to use them in your | |||||||
|   </code> |   </code> | ||||||
| </pre> | </pre> | ||||||
|  |  | ||||||
| <h4 id="positional-attributes">Positional attributes</h4> | <h4>Positional attributes</h4> | ||||||
| <p>Before you can start searching for positional attributes (also called tokens), it is necessary to know what properties they contain.</p> | <p>Before you can start searching for positional attributes (also called tokens), it is necessary to know what properties they contain.</p> | ||||||
| <ol> | <ol> | ||||||
|   <li><span class="blue-text"><b>word</b></span>: The string as it is also found in the original text</li> |   <li><span class="blue-text"><b>word</b></span>: The string as it is also found in the original text</li> | ||||||
| @@ -50,7 +33,7 @@ the types of linguistic attributes one can work with and how to use them in your | |||||||
|   </li> |   </li> | ||||||
| </ol> | </ol> | ||||||
|  |  | ||||||
| <h5 id="searching-positional-attributes">How to search for positional attributes</h5> | <h5>Searching for positional attributes</h5> | ||||||
| <div> | <div> | ||||||
|   <p> |   <p> | ||||||
|     <b>Token with no condition on any property (also called <span class="blue-text">wildcard token</span>)</b><br> |     <b>Token with no condition on any property (also called <span class="blue-text">wildcard token</span>)</b><br> | ||||||
| @@ -135,7 +118,7 @@ the types of linguistic attributes one can work with and how to use them in your | |||||||
|   <pre style="margin-top: 0;"   ><code>         ^             ^ the braces indicate the start and end of an option group</code></pre> |   <pre style="margin-top: 0;"   ><code>         ^             ^ the braces indicate the start and end of an option group</code></pre> | ||||||
| </div> | </div> | ||||||
|  |  | ||||||
| <h4 id="structural-attributes">Structural attributes</h4> | <h4>Structural attributes</h4> | ||||||
| <p>nopaque provides several structural attributes for query. A distinction is made between attributes with and without value.</p> | <p>nopaque provides several structural attributes for query. A distinction is made between attributes with and without value.</p> | ||||||
| <ol> | <ol> | ||||||
|   <li><span class="green-text"><b>s</b></span>: Annotates a sentence</li> |   <li><span class="green-text"><b>s</b></span>: Annotates a sentence</li> | ||||||
| @@ -170,7 +153,7 @@ the types of linguistic attributes one can work with and how to use them in your | |||||||
|   </li> |   </li> | ||||||
| </ol> | </ol> | ||||||
|  |  | ||||||
| <h5 id="searching-structural-attributes">How to search for structural attributes</h5> | <h5>Searching for structural attributes</h5> | ||||||
| <pre><code><ent> [] </ent>;                       A one token long entity of any type</code></pre> | <pre><code><ent> [] </ent>;                       A one token long entity of any type</code></pre> | ||||||
| <pre><code><ent_type="PERSON"> [] </ent_type>;     A one token long entity of type PERSON</code></pre> | <pre><code><ent_type="PERSON"> [] </ent_type>;     A one token long entity of type PERSON</code></pre> | ||||||
| <pre><code><ent_type="PERSON"> []* </ent_type>;    Entity of any length of type PERSON</code></pre> | <pre><code><ent_type="PERSON"> []* </ent_type>;    Entity of any length of type PERSON</code></pre> | ||||||
|   | |||||||
| @@ -1,45 +1,33 @@ | |||||||
| <h3 class="manual-chapter-title">Query Builder Tutorial</h3> | <h3 class="manual-chapter-title">Query Builder Tutorial</h3> | ||||||
| <h4>Query Builder</h4> | <h4>Overview</h4> | ||||||
| <p>In this section, we will provide you with more detailed instructions on how to use the Query Builder -  | <p>The query builder can be accessed via "My Corpora" or "Corpus Analysis" in the sidebar options.  | ||||||
| nopaque's main user-friendly tool for finding and analyzing different linguistic elements of your texts.</p> | Select the desired corpus and click on the "Analyze" and then "Concordance" | ||||||
|  | buttons to open the query builder.</p> | ||||||
| <div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;"> | <p>The query builder uses the Corpus Query Language (CQL) to help you make a query for analyzing your texts.  | ||||||
|   <h5>Content</h5> |  | ||||||
|   <ol style="list-style-type:disc"> |  | ||||||
|     <li><a href="#general-overview">General Overview</a></li> |  | ||||||
|     <li><a href="#add-new-token-tutorial">Add a new token to your query</a></li> |  | ||||||
|     <li><a href="#edit-options-tutorial">Options for editing your query</a></li> |  | ||||||
|     <li><a href="#add-structural-attribute-tutorial">Add structural attributes to your query</a></li> |  | ||||||
|     <li><a href="#general-options-query-builder">General options</a></li> |  | ||||||
|   </ol> |  | ||||||
| </div> |  | ||||||
|  |  | ||||||
| <h4 id="general-overview">General Overview</h4> |  | ||||||
| <p>The Query Builder can be accessed via <a href=" {{ url_for('main.dashboard') }}">My Corpora</a> or <a href=" {{ url_for('services.corpus_analysis') }}">Corpus Analysis</a> in the sidebar options.  |  | ||||||
| Click on the corpus you wish to analyze. You will be sent to its corpus overview page. |  | ||||||
| Here, click on <b>Analyze</b> to reach the analysis page. |  | ||||||
| The analysis page features different options for analyzing your corpus, including  |  | ||||||
| visualizations and a <b>Reader</b> module. In this case, we want to open the query builder.  |  | ||||||
| To do so, click on the <b>Concordance</b> button on the top of the page.</p> |  | ||||||
| <p>The query builder uses the <b>Corpus Query Language (CQL)</b> to help you make a query for analyzing your texts. |  | ||||||
| In this way, it is possible to filter out various types of text parameters, for  | In this way, it is possible to filter out various types of text parameters, for  | ||||||
| example, a specific word, a lemma, or you can set part-of-speech  | example, a specific word, a lemma, or you can set part-of-speech  | ||||||
| tags (pos) that indicate the type of word you are looking for (a noun, an  | tags (pos) that indicate the type of word you are looking for (a noun, an  | ||||||
| adjective, etc.). In addition, you can also search for structural attributes,  | adjective, etc.). In addition, you can also search for structural attributes,  | ||||||
| or specify your query for a token (word, lemma, pos) via entity typing. And of  | or specify your query for a token (word, lemma, pos) via entity typing. And of  | ||||||
| course, the different text parameters can be combined.</p> | course, the different text parameters can be combined.</p> | ||||||
| <p>Tokens and structural attributes can be added by clicking on the <b>"+"</b> button | <p>Tokens and structural attributes can be added by clicking on the "+" button | ||||||
| (what we call the "input marker") in the input field or the labeled buttons below it. Elements  | (the "input marker") in the input field or the labeled buttons below it. Elements  | ||||||
| added are shown as chips. These can be reorganized using drag and drop. The input  | added are shown as chips. These can be reorganized using drag and drop. The input  | ||||||
| marker can also be moved in this way. Its position shows where new elements will be added. <br> | marker can also be moved in this way. Its position shows where new elements will be added. <br> | ||||||
| A "translation" of your query into Corpus Query Language (CQL) will be displayed underneath the query field.</p> | A "translation" of your query into Corpus Query Language (CQL) is shown below.</p> | ||||||
| <p>For more information, see our <b>manual section for the Corpus Query Language.</b> | <p>Advanced users can make direct use of the Corpus Query Language (CQL) by switching to "expert mode" via the toggle button.</p> | ||||||
| <br> |  | ||||||
| Advanced users can make direct use of CQL by switching to <b>expert mode</b> via the toggle button. |  | ||||||
| </p> |  | ||||||
| <p>The entire input field can be cleared using the red trash icon on the right.</p> | <p>The entire input field can be cleared using the red trash icon on the right.</p> | ||||||
| <br> | <br> | ||||||
|  |  | ||||||
|  | <div style="border: 1px solid; padding-left: 20px; margin-right: 400px; margin-bottom: 40px;"> | ||||||
|  |   <h5>Content</h5> | ||||||
|  |   <ol style="list-style-type:disc"> | ||||||
|  |     <li><a href="#add-new-token-tutorial">Add a new token to your query</a></li> | ||||||
|  |     <li><a href="#edit-options-tutorial">Options for editing your query</a></li> | ||||||
|  |     <li><a href="#add-structural-attribute-tutorial">Add structural attributes to your query</a></li> | ||||||
|  |     <li><a href="#general-options-query-builder">General options</a></li> | ||||||
|  |   </ol> | ||||||
|  | </div> | ||||||
|  |  | ||||||
| {# Add Token Tutorial #} | {# Add Token Tutorial #} | ||||||
| <div> | <div> | ||||||
| @@ -49,8 +37,8 @@ Advanced users can make direct use of CQL by switching to <b>expert mode</b> via | |||||||
|   <h4 id="add-new-token-tutorial">Add new token to your Query</h4> |   <h4 id="add-new-token-tutorial">Add new token to your Query</h4> | ||||||
|   <p>If you are only looking for a specific token, you can click on the left  |   <p>If you are only looking for a specific token, you can click on the left  | ||||||
|   button and select the type of token you are looking for from the drop-down menu.  |   button and select the type of token you are looking for from the drop-down menu.  | ||||||
|   "Word" is selected by default. </p> |   By default "Word" is selected. </p> | ||||||
|    |   <br> | ||||||
|   <h5>Word and Lemma</h5> |   <h5>Word and Lemma</h5> | ||||||
|   <p>If you want to search for a specific word or lemma and the respective  |   <p>If you want to search for a specific word or lemma and the respective  | ||||||
|   category is selected in the drop-down menu, you can type in the word or lemma  |   category is selected in the drop-down menu, you can type in the word or lemma  | ||||||
|   | |||||||
| @@ -3,22 +3,21 @@ | |||||||
|     <h2>Manual</h2> |     <h2>Manual</h2> | ||||||
|     <ul class="tabs" id="manual-modal-toc"> |     <ul class="tabs" id="manual-modal-toc"> | ||||||
|       <li class="tab"><a href="#manual-modal-introduction">Introduction</a></li> |       <li class="tab"><a href="#manual-modal-introduction">Introduction</a></li> | ||||||
|       <li class="tab"><a href="#manual-modal-getting-started">Getting Started</a></li> |       <li class="tab"><a href="#manual-modal-registration-and-log-in">Registration and Log in</a></li> | ||||||
|       <li class="tab"><a href="#manual-modal-dashboard">Dashboard</a></li> |       <li class="tab"><a href="#manual-modal-dashboard">Dashboard</a></li> | ||||||
|       <li class="tab"><a href="#manual-modal-services">Services</a></li> |       <li class="tab"><a href="#manual-modal-services">Services</a></li> | ||||||
|       <!-- <li class="tab"><a href="#manual-modal-a-closer-look-at-the-corpus-analysis">A closer look at the Corpus Analysis</a></li> --> |       <li class="tab"><a href="#manual-modal-a-closer-look-at-the-corpus-analysis">A closer look at the Corpus Analysis</a></li> | ||||||
|       <li class="tab"><a href="#manual-modal-query-builder">Query Builder</a></li> |  | ||||||
|       <li class="tab"><a href="#manual-modal-cqp-query-language">CQP Query Language</a></li> |       <li class="tab"><a href="#manual-modal-cqp-query-language">CQP Query Language</a></li> | ||||||
|  |       <li class="tab"><a href="#manual-modal-query-builder">Query Builder</a></li> | ||||||
|       <li class="tab"><a href="#manual-modal-tagsets">Tagsets</a></li> |       <li class="tab"><a href="#manual-modal-tagsets">Tagsets</a></li> | ||||||
|     </ul> |     </ul> | ||||||
|     <div id="manual-modal-introduction"> |     <div id="manual-modal-introduction"> | ||||||
|       <br> |       <br> | ||||||
|       {% include "_base/_modals/_manual/01_introduction.html.j2" %} |       {% include "_base/_modals/_manual/01_introduction.html.j2" %} | ||||||
|     </div> |     </div> | ||||||
|     <div id="manual-modal-getting-started"> |     <div id="manual-modal-registration-and-log-in"> | ||||||
|       <br> |       <br> | ||||||
|       {% include "_base/_modals/_manual/02_getting_started.html.j2" %} |       {% include "_base/_modals/_manual/02_registration_and_log_in.html.j2" %} | ||||||
|     </div> |     </div> | ||||||
|     <div id="manual-modal-dashboard"> |     <div id="manual-modal-dashboard"> | ||||||
|       <br> |       <br> | ||||||
| @@ -28,10 +27,10 @@ | |||||||
|       <br> |       <br> | ||||||
|       {% include "_base/_modals/_manual/06_services.html.j2" %} |       {% include "_base/_modals/_manual/06_services.html.j2" %} | ||||||
|     </div> |     </div> | ||||||
|     <!-- <div id="manual-modal-a-closer-look-at-the-corpus-analysis"> |     <div id="manual-modal-a-closer-look-at-the-corpus-analysis"> | ||||||
|       <br> |       <br> | ||||||
|       {% include "_base/_modals/_manual/07_a_closer_look_at_the_corpus_analysis.html.j2" %} |       {% include "_base/_modals/_manual/07_a_closer_look_at_the_corpus_analysis.html.j2" %} | ||||||
|     </div> --> |     </div> | ||||||
|     <div id="manual-modal-cqp-query-language"> |     <div id="manual-modal-cqp-query-language"> | ||||||
|       <br> |       <br> | ||||||
|       {% include "_base/_modals/_manual/08_cqp_query_language.html.j2" %} |       {% include "_base/_modals/_manual/08_cqp_query_language.html.j2" %} | ||||||
|   | |||||||
| @@ -5,111 +5,28 @@ | |||||||
|         <div class="col s12"> |         <div class="col s12"> | ||||||
|           <h1 id="title">Terms of use</h1> |           <h1 id="title">Terms of use</h1> | ||||||
|         </div> |         </div> | ||||||
|  |  | ||||||
|         <div class="col s12"> |         <div class="col s12"> | ||||||
|           <p>With the usage of the nopaque platform you declare your acceptance of the General Terms of Use and that you have taken note of the legal framework and the data protection declaration.</p> |           <div class="switch"> | ||||||
|  |             <label> | ||||||
|  |               DE | ||||||
|  |               <input type="checkbox" id="terms-of-use-modal-switch"> | ||||||
|  |               <span class="lever"></span> | ||||||
|  |               EN | ||||||
|  |             </label> | ||||||
|           </div> |           </div> | ||||||
|  |           <br> | ||||||
|         <div class="col s12"> |  | ||||||
|           <div class="card"> |  | ||||||
|             <div class="card-content"> |  | ||||||
|               <span class="card-title">§ 1 Scope</span> |  | ||||||
|               <p>The General Terms of Use for the nopaque platform apply to everyone who uses the system as an authorised user in the sense of <b>§ 2</b> (1) of the General Terms of Use. By using the system and with your consent you accept these terms of use.</p> |  | ||||||
|         </div> |         </div> | ||||||
|  |         <div class="terms-of-use-modal-content hide"> | ||||||
|  |           {% include "main/terms_of_use_en.html.j2" %} | ||||||
|         </div> |         </div> | ||||||
|  |         <div class="terms-of-use-modal-content"> | ||||||
|  |           {% include "main/terms_of_use_de.html.j2" %} | ||||||
|         </div> |         </div> | ||||||
|  |  | ||||||
|         <div class="col s12"> |  | ||||||
|           <div class="card"> |  | ||||||
|             <div class="card-content"> |  | ||||||
|               <span class="card-title">§ 2 Right of use</span> |  | ||||||
|               <p>(1) The nopaque platform is available to users exclusively for the purposes of teaching and research. Any other use, especially for business, commercial is not permitted. The following groups shall be entitled to use the nopaque platform:</p> |  | ||||||
|               <ul class="browser-default"> |  | ||||||
|                 <li>students, teaching staff and employees at Bielefeld University</li> |  | ||||||
|                 <li>external researchers from outside the University Bielefeld</li> |  | ||||||
|               </ul> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>(2) The use of the system is free of charge.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>(3) The duration of the right of use ends with the deletion of the user account by the user (see <b>§ 7</b>)</p> |  | ||||||
|             </div> |  | ||||||
|           </div> |  | ||||||
|         </div> |  | ||||||
|  |  | ||||||
|         <div class="col s12"> |  | ||||||
|           <div class="card"> |  | ||||||
|             <div class="card-content"> |  | ||||||
|               <span class="card-title">§ 3 Purpose of the Services</span> |  | ||||||
|               <p>nopaque custom-built web application which serves as a platform for preprocessing and analysing digital copies of various text based research data (books, letters, etc.) in different files and formats. nopaque converts image data – like photos or scans – into text data through OCR making it machine readable. This step enables to proceed with further computational analysis of the documents. By means of computational linguistic data processing (tokenization, lemmatization, part-of-speech tagging and named-entity recognition) nopaque extracts additional information from texts.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>(1) Change of service</p> |  | ||||||
|               <p>The provider of the nopaque platform is entitled to change and supplement the scope of functions of nopaque without prior notice. This could result from a thematic and scientific reorientation of the project.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>(2) Support</p> |  | ||||||
|               <p>On nopaque, a contact form is available. As far as possible the SFB 1288 INF staff will try to provide user support.</p> |  | ||||||
|             </div> |  | ||||||
|           </div> |  | ||||||
|         </div> |  | ||||||
|  |  | ||||||
|         <div class="col s12"> |  | ||||||
|           <div class="card"> |  | ||||||
|             <div class="card-content"> |  | ||||||
|               <span class="card-title">§ 4 Obligations of the User</span> |  | ||||||
|               <p>(1) The system is suitable for normal security requirements. Data with a high need for protection (e.g. health data) may not be stored or processed in the nopaque platform.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>(2) Users of nopaque are responsible for their own entered contents. The uploading of illegal content, especially content that violates criminal, personal, data protection or copyright regulations (including § 60a) is not permitted.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>(3) Users undertake to indemnify Bielefeld University from all claims by third parties based on the data they use and to reimburse Bielefeld University for any costs incurred by the latter due to possible infringements of rights. This also includes the costs incurred by Bielefeld University in defending itself against such claims in and out of court.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>(4) Exclusion from use</p> |  | ||||||
|               <p>Bielefeld University is entitled to immediately block access to the service if there are reasonable grounds to suspect that the stored data is unlawful (e.g upload harmful files via file upload) and/or violates the rights of third parties. Other infringements of the provisions of these Terms of Use, in particular the obligations under §6 also entitle Bielefeld University to block the user. Bielefeld University shall immediately notify the user of the block and the reason for the block. The block must be lifted as soon as the suspicion is invalidated.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>(5) Usage of Data</p> |  | ||||||
|               <p>The data stored by the user on the storage space intended for him may be legally protected, the responsibility for the processing of the data from these points of view lies solely with the user. By using nopaque, the user grants Bielefeld the right to process the data with the corresponding tools. At all times during processing in nopaque, data remains in the user's private storage location and will not passed on to third parties.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>(6) Release of Bielefeld University from Third-Party Claims</p> |  | ||||||
|               <p>The user is responsible for the data stored by him/her in nopaque. Furthermore he/she is responsible for entering and maintaining the data and information required to use nopaque.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>The user is obliged to indemnify Bielefeld University against all claims by third parties based on the data stored by him/her and to reimburse Bielefeld University for any costs incurred as a result of possible legal infringements. This also includes the costs incurred by Bielefeld University for extrajudicial and judicial defense against these claims.</p> |  | ||||||
|             </div> |  | ||||||
|           </div> |  | ||||||
|         </div> |  | ||||||
|  |  | ||||||
|         <div class="col s12"> |  | ||||||
|           <div class="card"> |  | ||||||
|             <div class="card-content"> |  | ||||||
|               <span class="card-title">§ 5 Liability of Bielefeld University</span> |  | ||||||
|               <p>Claims for damages against Bielefeld University are excluded irrespective of the legal grounds. Bielefeld University shall not be liable for loss of data and information or other „indirect“ damages, e.g. loss of profit, loss of production, or other indirect damages. Bielefeld University shall not be liable for the loss of data to the extent that the damage is due to the fact that the user has failed to back up the data and thereby ensure that lost data can be restored with justifiable effort.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>nopaque is available in accordance with normal operational care based on the "Best Effort" practice. No liability is assumed for the consequences of failures or errors of the nopaque platform. Bielefeld University does not guarantee that the systems will run error-free and without interruption at all times. Bielefeld University accepts no responsibility for technical quality. Nor is it liable for the content, in particular for the accuracy, completeness, and timeliness of information to which it merely provides access for use.</p> |  | ||||||
|             </div> |  | ||||||
|           </div> |  | ||||||
|         </div> |  | ||||||
|  |  | ||||||
|         <div class="col s12"> |  | ||||||
|           <div class="card"> |  | ||||||
|             <div class="card-content"> |  | ||||||
|               <span class="card-title">§ 6 Data Protection</span> |  | ||||||
|               <p>Information on the handling of personal data during the operation of the service can be found in the separate data protection policy.</p> |  | ||||||
|             </div> |  | ||||||
|           </div> |  | ||||||
|         </div> |  | ||||||
|  |  | ||||||
|         <div class="col s12"> |  | ||||||
|           <div class="card"> |  | ||||||
|             <div class="card-content"> |  | ||||||
|               <span class="card-title">§ 7 Duration and Termination</span> |  | ||||||
|               <p>The user may terminate the use nopaque by deleting his/her account at any time without giving reasons. After deletion of the account, all users‘ data will be automatically deleted and access to the service blocked. This does not affect the user's right to delete data under data protection law.</p> |  | ||||||
|               <p> </p> |  | ||||||
|               <p>Bielefeld University may exclude the user from using the service without notice for an important reason. Important reasons include, in particular, repeated violations of the provisions of these Terms of Use or of applicable laws.</p> |  | ||||||
|             </div> |  | ||||||
|           </div> |  | ||||||
|         </div> |  | ||||||
|  |  | ||||||
|       </div> |       </div> | ||||||
|     </div> |     </div> | ||||||
|   </div> |   </div> | ||||||
|   <div class="modal-footer"> |   <div class="modal-footer"> | ||||||
|     <a href="#!" class="modal-close waves-effect waves-green btn-flat">Accept</a> |     <span style="margin-right:20px;">I have taken note of the new GTC and agree to their validity in the context of my further use.</span> | ||||||
|  |     <a href="#!" class="modal-close waves-effect waves-green btn">Yes</a> | ||||||
|   </div> |   </div> | ||||||
| </div> | </div> | ||||||
|   | |||||||
| @@ -12,7 +12,7 @@ | |||||||
|         <li> |         <li> | ||||||
|           <a class="dropdown-trigger no-autoinit" data-target="nav-more-dropdown" href="#!" id="nav-more-dropdown-trigger"> |           <a class="dropdown-trigger no-autoinit" data-target="nav-more-dropdown" href="#!" id="nav-more-dropdown-trigger"> | ||||||
|             {% if current_user.is_authenticated %} |             {% if current_user.is_authenticated %} | ||||||
|             <img src="{{ url_for('users.user_avatar', user_id=current_user.id) }}" alt="avatar" class="circle left" style="height: 54px; padding: 10px 10px 0 0;"> |             <img src="{{ url_for('users.user_avatar', user_id=current_user.id) }}" alt="avatar" class="circle left" style="height: 54px; padding:8px;"> | ||||||
|             {{ current_user.username }} ({{ current_user.email }}) |             {{ current_user.username }} ({{ current_user.email }}) | ||||||
|             {% else %} |             {% else %} | ||||||
|             <i class="material-icons left">more_vert</i> |             <i class="material-icons left">more_vert</i> | ||||||
|   | |||||||
| @@ -131,3 +131,15 @@ | |||||||
|     app.flash(message, message); |     app.flash(message, message); | ||||||
|   } |   } | ||||||
| </script> | </script> | ||||||
|  |  | ||||||
|  | <script> | ||||||
|  |   let languageModalSwitch = document.querySelector('#terms-of-use-modal-switch'); | ||||||
|  |   let termsOfUseModalContent = document.querySelectorAll('.terms-of-use-modal-content'); | ||||||
|  |   if (languageModalSwitch) { | ||||||
|  |     languageModalSwitch.addEventListener('change', function() { | ||||||
|  |       termsOfUseModalContent.forEach(content => { | ||||||
|  |         content.classList.toggle('hide'); | ||||||
|  |       }); | ||||||
|  |     }); | ||||||
|  |   } | ||||||
|  | </script> | ||||||
|   | |||||||
| @@ -42,7 +42,7 @@ | |||||||
|             {{ form.hidden_tag() }} |             {{ form.hidden_tag() }} | ||||||
|             <div class="row"> |             <div class="row"> | ||||||
|               <div class="col s12 l5"> |               <div class="col s12 l5"> | ||||||
|                 {{ wtf.render_field(form.spacy_model_file, accept='.tar.gz', placeholder='Choose a .tar.gz file') }} |                 {{ wtf.render_field(form.spacy_model_file, accept='.tar.gz,.whl', placeholder='Choose a .tar.gz or .whl file') }} | ||||||
|               </div> |               </div> | ||||||
|               <div class="col s12 l7"> |               <div class="col s12 l7"> | ||||||
|                 {{ wtf.render_field(form.title, material_icon='title') }} |                 {{ wtf.render_field(form.title, material_icon='title') }} | ||||||
|   | |||||||
| @@ -273,7 +273,7 @@ publicCorpusFollowerList.add( | |||||||
| {% if cfr.has_permission('MANAGE_FILES') %} | {% if cfr.has_permission('MANAGE_FILES') %} | ||||||
| let followerBuildRequest = document.querySelector('#follower-build-request'); | let followerBuildRequest = document.querySelector('#follower-build-request'); | ||||||
| followerBuildRequest.addEventListener('click', () => { | followerBuildRequest.addEventListener('click', () => { | ||||||
|   requests.corpora.entity.build({{ corpus.hashid|tojson }}) |   nopaque.requests.corpora.entity.build({{ corpus.hashid|tojson }}) | ||||||
|     .then((response) => { |     .then((response) => { | ||||||
|       window.location.reload(); |       window.location.reload(); | ||||||
|     }); |     }); | ||||||
| @@ -285,7 +285,7 @@ followerBuildRequest.addEventListener('click', () => { | |||||||
| {% if current_user.is_following_corpus(corpus) %} | {% if current_user.is_following_corpus(corpus) %} | ||||||
|   let unfollowRequestElement = document.querySelector('.action-button[data-action="unfollow-request"]'); |   let unfollowRequestElement = document.querySelector('.action-button[data-action="unfollow-request"]'); | ||||||
|   unfollowRequestElement.addEventListener('click', () => { |   unfollowRequestElement.addEventListener('click', () => { | ||||||
|     requests.corpora.entity.followers.entity.delete({{ corpus.hashid|tojson }}, {{ current_user.hashid|tojson }}) |     nopaque.requests.corpora.entity.followers.entity.delete({{ corpus.hashid|tojson }}, {{ current_user.hashid|tojson }}) | ||||||
|       .then((response) => { |       .then((response) => { | ||||||
|         window.location.reload(); |         window.location.reload(); | ||||||
|       }); |       }); | ||||||
| @@ -335,7 +335,7 @@ M.Modal.init( | |||||||
|  |  | ||||||
| inviteUserModalInviteButtonElement.addEventListener('click', (event) => { | inviteUserModalInviteButtonElement.addEventListener('click', (event) => { | ||||||
|   let usernames = inviteUserModalSearch.chipsData.map((chipData) => chipData.tag); |   let usernames = inviteUserModalSearch.chipsData.map((chipData) => chipData.tag); | ||||||
|   requests.corpora.entity.followers.add({{ corpus.hashid|tojson }}, usernames) |   nopaque.requests.corpora.entity.followers.add({{ corpus.hashid|tojson }}, usernames) | ||||||
|     .then((response) => { |     .then((response) => { | ||||||
|       window.location.reload(); |       window.location.reload(); | ||||||
|     }); |     }); | ||||||
| @@ -383,7 +383,7 @@ M.Modal.init( | |||||||
| shareLinkModalCreateButtonElement.addEventListener('click', (event) => { | shareLinkModalCreateButtonElement.addEventListener('click', (event) => { | ||||||
|   let role = shareLinkModalCorpusFollowerRoleSelectElement.value; |   let role = shareLinkModalCorpusFollowerRoleSelectElement.value; | ||||||
|   let expiration = shareLinkModalExpirationDateDatepickerElement.value |   let expiration = shareLinkModalExpirationDateDatepickerElement.value | ||||||
|   requests.corpora.entity.generateShareLink({{ corpus.hashid|tojson }}, role, expiration) |   nopaque.requests.corpora.entity.generateShareLink({{ corpus.hashid|tojson }}, role, expiration) | ||||||
|     .then((response) => { |     .then((response) => { | ||||||
|       response.json() |       response.json() | ||||||
|         .then((json) => { |         .then((json) => { | ||||||
|   | |||||||
| @@ -8,117 +8,177 @@ | |||||||
|     </div> |     </div> | ||||||
|  |  | ||||||
|     <div class="col s12"> |     <div class="col s12"> | ||||||
|       <div class="card" id="april-2022-update"> |       <div class="card" id="news-post-january-2024"> | ||||||
|         <div class="card-content"> |         <div class="card-content"> | ||||||
|           <span class="card-title">April 2022 update</span> |           <h6 style="font-weight: 300;">January 2024</h6> | ||||||
|           <p>Dear users</p> |           <span class="card-title">Looking back on 2023 - new changes to nopaque</span> | ||||||
|           <br> |           <br> | ||||||
|  |           <p>Hello nopaque users!</p> | ||||||
|  |           <p>First of all, the nopaque team would like to wish everyone a good start to 2024! We hope you found the time to relax over the winter break.</p> | ||||||
|  |           <p>Now that the new year has come around and we’re all back in the office, we wanted to take the opportunity to tell you about the most important things we’ve worked on in nopaque in 2023 – things we’ve incorporated into our <b>latest nopaque update</b> as of late <b>December 2023</b>. You may have noticed some of them as you’ve returned to your projects on nopaque.</p> | ||||||
|  |           <br> | ||||||
|  |           <h6 style="font-weight: 300;">Changes to the Query Builder</h6> | ||||||
|           <p> |           <p> | ||||||
|             with the April 2022 update we have improved nopaque in all places. |             The Query Builder has undergone changes to make it more intuitive to use and is now the standard option for creating queries. | ||||||
|             We have significantly reworked our backend code to utilize our servers more efficiently, |             Individual elements of a query can now be easily modified and edited by clicking on them.  | ||||||
|             integrated a new service, updated all previously existing ones, rewrote a lot of code and made a few minor design improvements. |             An input marker shows your position in the inquiry and where new elements will be added. This and all other elements can be moved around via drag and drop.  | ||||||
|  |             A new toggle button enables users to easily switch between the Query Builder and Expert Mode if they prefer to work with the plain Corpus Query Language (CQL) instead. This can be done in the middle of an existing query – existing chips will be “translated” into CQL. | ||||||
|  |             This also works the other way around – if you want to switch back, your query in CQL wll be parsed into chips. | ||||||
|  |             More details and instructions on how to use the new Query Builder can be found in the manual. | ||||||
|           </p> |           </p> | ||||||
|           <br> |           <br> | ||||||
|  |           <h6 style="font-weight: 300;">Community Update</h6> | ||||||
|           <span class="card-title">Where is my Job data?</span> |  | ||||||
|           <p> |           <p> | ||||||
|             At the beginning of the year, we realized that our storage limit had been reached. |             The most extensive changes to nopaque have taken place in the Social Area. We want nopaque to be a platform where researchers can connect with each other, so we’ve added some more features to make this possible.  | ||||||
|             This was the time when some users may have noticed system instabilities. |             Users can now update their personal profiles to be publicly visible to others on nopaque, including a short “About me” section and options to share your website, organization, location, and add an avatar that others can see. | ||||||
|             We were fortunately able to temporarily solve this problem without data loss |             It is also possible to share corpora with other researchers via share links, access invitations, or by setting corpus visibility to Public. Other users can only see the meta data of public corpora – further access can be granted upon request.  | ||||||
|             by deleting some non-nopaque related data on our system (yes we also do <a href="https://digital-history.uni-bielefeld.de">other things then nopaque</a>). |             The extent of access to these shared corpora is managed by assigning the roles of Viewer, Contributor, and Administrator. Viewers may only download the files. Contributors can download and edit files and their metadata as well as analyze and build the corpus. Administrators can manage users, followers and visibility, in addition to all of the above. | ||||||
|             In order to not face the same problem again, we had to dedicate ourselves to a long-term solution. |  | ||||||
|             This consists of deleting all previous job data with this update and henceforth storing new job data |  | ||||||
|             only for three months after job creation (important note: <b>corpora are not affected</b>). |  | ||||||
|             All job data prior to this update has been backed up for you, |  | ||||||
|             feel free to contact us at nopaque@uni-bielefeld.de if you would like to get this data back. |  | ||||||
|           </p> |           </p> | ||||||
|           <br> |           <br> | ||||||
|  |         </div> | ||||||
|  |       </div> | ||||||
|  |     </div> | ||||||
|  |  | ||||||
|           <span class="card-title">What's new?</span> |     <div class="col s12"> | ||||||
|  |       <div class="card" id="news-post-july-2023"> | ||||||
|  |         <div class="card-content"> | ||||||
|  |           <h6 style="font-weight: 300;">July 2023</h6> | ||||||
|  |           <span class="card-title">Visualization Update (beta) - new analysis features</span> | ||||||
|  |           <br> | ||||||
|  |           <p>Hey users,</p> | ||||||
|           <p> |           <p> | ||||||
|             By partnering up with <a href="https://readcoop.eu/transkribus/?sc=Transkribus">Transkribus</a> we reached one of our long term goals: integrate a HTR service into nopaque. |             we wanted to give you some news on updates we’re making to nopaque.  | ||||||
|             The <a href="{{ url_for('services.transkribus_htr_pipeline') }}">Transkribus HTR Pipeline</a> service is implemented as a kind of proxied service where the work is split between Transkribus and us. |             Since we want to make it easier for users to grasp and work with different elements of their data,  | ||||||
|  |             we’ve been working on adding some visualization features into the Corpus Analysis service. Currently, the two main modules,  | ||||||
|  |             “Reader” and “Concordance” have been expanded with an additional “Static Visualizations” module, but there’s more to come! | ||||||
|  |           </p> | ||||||
|  |           <p> | ||||||
|  |             With the Static Visualizations module, it’s now possible to view information  | ||||||
|  |             about your corpus, such as the number of (unique) tokens, sentences, lemmata,  | ||||||
|  |             corresponding information on individual texts, the distribution of these elements  | ||||||
|  |             within your corpus, as well as searchable lists of word frequencies with stopwords  | ||||||
|  |             that can be preset and modified. In the future, this area will be extended with more advanced visualization options. | ||||||
|  |           </p> | ||||||
|  |           <p> | ||||||
|  |             We’ll keep you posted about further visualization updates. Until then, we hope the latest update improves  | ||||||
|  |             your research experience with nopaque. And as always, if you have any ideas for nopaque or need assistance,  | ||||||
|  |             don’t hesitate to contact us! | ||||||
|  |           </p> | ||||||
|  |           <br> | ||||||
|  |         </div> | ||||||
|  |       </div> | ||||||
|  |     </div> | ||||||
|  |  | ||||||
|  |     <div class="col s12"> | ||||||
|  |       <div class="card" id="news-post-november-2022"> | ||||||
|  |         <div class="card-content"> | ||||||
|  |           <h6 style="font-weight: 300;">November 2022</h6> | ||||||
|  |           <span class="card-title">Contribution Update</span> | ||||||
|  |           <br> | ||||||
|  |           <p>Dear users,</p> | ||||||
|  |           <p> | ||||||
|  |             users can now upload their own language models into nopaque. This is useful for working with different languages that are not available as standard in nopaque or if a user wants to work with a language model they have developed themselves. Tesseract models can be uploaded in .traineddata format; spaCy models can be uploaded in tar.gz format. We are also working on the option to upload models in .whl format in the future. | ||||||
|  |             Uploaded models can be found in the model list of the corresponding service and can be used immediately. Models can also be made public if you have a role of Contributor in nopaque.  | ||||||
|  |           </p> | ||||||
|  |           <br> | ||||||
|  |           <p><b>Please note:</b> The Contributor role must be requested from the nopaque admins if you would like to make a model public for all users.</p> | ||||||
|  |           <br> | ||||||
|  |         </div> | ||||||
|  |       </div> | ||||||
|  |     </div> | ||||||
|  |    | ||||||
|  |     <div class="col s12"> | ||||||
|  |       <div class="card" id="news-post-april-2022"> | ||||||
|  |         <div class="card-content"> | ||||||
|  |           <h6 style="font-weight: 300;">April 2022</h6> | ||||||
|  |           <span class="card-title">April updates – more features, faster operation</span> | ||||||
|  |           <br> | ||||||
|  |           <p>Hello everyone,</p> | ||||||
|  |           <p> | ||||||
|  |             in April 2022, we released an update improving many elements of nopaque. We rewrote a lot of our code,  | ||||||
|  |             including a significant reworking of our backend code for more efficient use of our servers.  | ||||||
|  |             We integrated a new service, updated the existing ones, and made some minor design improvements. | ||||||
|  |           </p> | ||||||
|  |           <br> | ||||||
|  |           <h6 style="font-weight: 300;">Database Cleanup</h6> | ||||||
|  |           <p> | ||||||
|  |             We may be a bit late with our spring cleaning, but we’ve tidied up our  | ||||||
|  |             database system and deleted old, empty corpora, unconfirmed user accounts and  | ||||||
|  |             unnecessary data fields. | ||||||
|  |           </p> | ||||||
|  |           <h6 style="font-weight: 300;">What's new?</h6> | ||||||
|  |           <p> | ||||||
|  |             By partnering with Transkribus, we’ve reached one of our long-term goals: to integrate a  | ||||||
|  |             Handwritten Text Recognition (HTR) service into nopaque. The Transkribus HTR Pipeline service is implemented as a  | ||||||
|  |             kind of proxied service where the work is split between us and Transkribus.  | ||||||
|             That means we do the preprocessing, storage and postprocessing, while Transkribus handles the HTR itself. |             That means we do the preprocessing, storage and postprocessing, while Transkribus handles the HTR itself. | ||||||
|           </p> |           </p> | ||||||
|           <br> |  | ||||||
|  |  | ||||||
|           <p> |           <p> | ||||||
|             One of the changes in the background was to fix our performance issues. While implementing the <a href="{{ url_for('services.transkribus_htr_pipeline') }}">Transkribus HTR Pipeline</a> service we |             One change we needed to make in the background was to fix our performance issues.  | ||||||
|             found some optimization potential within different steps of our processing routine. These optimizations are now also |             While implementing the Transkribus HTR Pipeline service, we saw optimization potential  | ||||||
|             available in our <a href="{{ url_for('services.transkribus_htr_pipeline') }}">Tesseract OCR Pipeline</a> service, resulting in a speed up of about 4x. |             in different steps of our processing routine. These optimizations are now also available  | ||||||
|             For now we are done with the most obvious optimizations but we may include more in the near future, so stay tuned! |             in our Tesseract OCR Pipeline service and result in speeds that are about four times faster  | ||||||
|  |             than before. We’re now finished with the major optimizations, but there could be more soon,  | ||||||
|  |             so stay tuned! | ||||||
|  |           </p> | ||||||
|  |           <p> | ||||||
|  |             Next, we reorganized our Corpus Analysis code. It was a bit messy, but after a complete rewrite,  | ||||||
|  |             we are now able to query a corpus without long loading times and with better error handling,  | ||||||
|  |             making the user experience much more stable. The Corpus Analysis service is now modularized and comes with two modules  | ||||||
|  |             that recreate and extend the functionality of the old service. | ||||||
|  |           </p> | ||||||
|  |           <p> | ||||||
|  |             The Query Result viewer had to be temporarily disabled, as the code was based on the old Corpus Analysis service.  | ||||||
|  |             It will be reintegrated as a module to the Corpus Analysis. | ||||||
|  |           </p> | ||||||
|  |           <p> | ||||||
|  |             The spaCy NLP Pipeline service was also taken care of with some smaller updates. This is important preliminary work  | ||||||
|  |             for support of more models/languages missing the full set of linguistic features (lemma, ner, pos, simple_pos).  | ||||||
|  |             It still needs some testing and adjustments but will be ready soon!  | ||||||
|  |           </p> | ||||||
|  |           <p> | ||||||
|  |             Last, but not least, we made some design changes. Now, you can find color in places that were previously in black and white.  | ||||||
|  |             Nothing big, but the new colors can aid in identifying resources more efficiently.  | ||||||
|  |           </p> | ||||||
|  |           <h6 style="font-weight: 300;">Where is my job data?</h6> | ||||||
|  |           <p> | ||||||
|  |             We reached our storage limit at the beginning of the year.  | ||||||
|  |             At this time, some users may have noticed system instability.  | ||||||
|  |             Fortunately, we found a solution that avoided data loss by deleting some  | ||||||
|  |             non-nopaque related data in our system (yes, <a href="https://www.uni-bielefeld.de/fakultaeten/geschichtswissenschaft/abteilung/arbeitsbereiche/digital-history/">we also do things other than nopaque</a>).  | ||||||
|  |             To avoid facing the same problem again, we had to find a long-term solution.  | ||||||
|  |             In the end, this involved the deletion of all previous job data with this update and,  | ||||||
|  |             going forward, only keeping new job data for three months after job creation  | ||||||
|  |             (<b>important note:</b> corpora are not affected). All job data created prior to this  | ||||||
|  |             update has been backed up for you. Feel free to contact us at <a href="mailto:nopaque@uni-bielefeld.de">nopaque@uni-bielefeld.de</a>  | ||||||
|  |             if you would like to get this data back. | ||||||
|           </p> |           </p> | ||||||
|           <br> |           <br> | ||||||
|  |  | ||||||
|           <p> |  | ||||||
|             The next step was to reorganize our <a href="{{ url_for('services.corpus_analysis') }}">Corpus Analysis</a> code. Unfortunatly it was a bit messy, after a complete rewrite we are |  | ||||||
|             now able to query a corpus without long loading times and with better error handling, resulting in way more stable user experience. |  | ||||||
|             The Corpus Analysis service is now modularized and comes with 2 modules that recreate and extend the functionality of the old service.<br> |  | ||||||
|             For now we had to disable the Query Result viewer, the code was based on the old Corpus Analysis service and will be reintegrated as a module to the Corpus Analysis. |  | ||||||
|           </p> |  | ||||||
|           <br> |  | ||||||
|  |  | ||||||
|           <p> |  | ||||||
|             The <a href="{{ url_for('services.spacy_nlp_pipeline') }}">spaCy NLP Pipeline</a> service got some love in the form of smaller updates too. |  | ||||||
|             This is important preliminary work to support more models/languages that does not provide the full set of linguistic features (lemma, ner, pos, simple_pos). It still needs some testing and tweaking but will be ready soon! |  | ||||||
|           </p> |  | ||||||
|           <br> |  | ||||||
|  |  | ||||||
|           <p> |  | ||||||
|             Last but not least we made some design changes. Now you can find colors in places where we had just black and white before. |  | ||||||
|             Nothing big but the new colors will help you identify ressources more efficient! |  | ||||||
|           </p> |  | ||||||
|           <br> |  | ||||||
|  |  | ||||||
|           <span class="card-title">Database cleanup</span> |  | ||||||
|           <p> |  | ||||||
|             We may be a bit late with our spring cleaning but with this update we tidied up within our database system. |  | ||||||
|             This means we deleted old corpora with no corpus files, unconfirmed user accounts and in general unnecessary data fields. |  | ||||||
|           </p> |  | ||||||
|           <br> |  | ||||||
|  |  | ||||||
|           <p> |  | ||||||
|             That's it, thank you for using nopaque! We hope you like the update and appreciate all your past and future feedback. |  | ||||||
|           </p> |  | ||||||
|         </div> |         </div> | ||||||
|       </div> |       </div> | ||||||
|     </div> |     </div> | ||||||
|  |  | ||||||
|     <div class="col s12"> |     <div class="col s12"> | ||||||
|       <div class="card" id="maintenance"> |       <div class="card" id="news-post-september-2021"> | ||||||
|         <div class="card-content"> |  | ||||||
|           <span class="card-title">Maintenance</span> |  | ||||||
|           <p>Dear users</p> |  | ||||||
|           <br> |  | ||||||
|           <p>Currently we are rewriting big parts of our project infrastructure. Due to this the following features are not available:</p> |  | ||||||
|           <ul> |  | ||||||
|             <li>Corpus export and import</li> |  | ||||||
|             <li>Query result export, import and view</li> |  | ||||||
|           </ul> |  | ||||||
|           <p>We hope to add these features back in the near future, until then check out our updated corpus analysis.</p> |  | ||||||
|         </div> |  | ||||||
|       </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="col s12"> |  | ||||||
|       <div class="card" id="nlp-removed-language-support"> |  | ||||||
|         <div class="card-content"> |  | ||||||
|           <span class="card-title">Natural Language Processing removed language support</span> |  | ||||||
|           <p>Dear users</p> |  | ||||||
|           <br> |  | ||||||
|           <p>Not all language models support all features we utizlize in our NLP service. Thats why we had to drop them, as soon as they meet our requirements we will add them back!</p> |  | ||||||
|         </div> |  | ||||||
|       </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="col s12"> |  | ||||||
|       <div class="card" id="beta-launch"> |  | ||||||
|         <div class="card-content"> |         <div class="card-content"> | ||||||
|  |           <h6 style="font-weight: 300;">September 2021</h6> | ||||||
|           <span class="card-title">nopaque's beta launch</span> |           <span class="card-title">nopaque's beta launch</span> | ||||||
|           <p>Dear users</p> |  | ||||||
|           <br> |           <br> | ||||||
|           <p>A few days ago we went live with nopaque. Right now nopaque is still in its Beta phase. So some bugs are to be expected. If you encounter any bugs or some feature is not working as expected please send as an email using the feedback button at the botton of the page in the footer!</p> |           <p>Hello to all our users!</p> | ||||||
|           <p>We are happy to help you with any issues and will use the feedback to fix all mentioned bugs!</p> |           <p>The BETA version of our web platform, nopaque, is now available! Nopaque is a web application that offers different services and tools to support researchers working with image and text-based data. These services include:</p> | ||||||
|  |           <ul> | ||||||
|  |             <li>File Setup, which converts and merges different data (e.g., books, letters) for further processing</li> | ||||||
|  |             <li>Optical Character Recognition, which converts photos and scans into text data for machine readability</li> | ||||||
|  |             <li>Natural Language Processing, which extracts information from your text via computational linguistic data processing (tokenization, lemmatization, part-of-speech tagging and named-entity recognition)</li> | ||||||
|  |             <li>Corpus analysis, which makes use of CQP Query Language to search through text corpora with the aid of metadata and Natural Language Processing tags.</li> | ||||||
|  |           </ul> | ||||||
|  |           <p> | ||||||
|  |             Nopaque was created based on our experiences working with other subprojects and a Prototyp user study in the  | ||||||
|  |             first phase of funding. The platform is open source under the terms of the MIT license (<a href="https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque">https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque</a>).  | ||||||
|  |             Language support and functions are currently limited – extensions can be requested by sending an email to <a href="mailto:nopaque@uni-bielefeld.de">nopaque@uni-bielefeld.de</a>.  | ||||||
|  |             Because we are still in the beta phase, some bugs are to be expected. If you encounter any problems, please let us know!  | ||||||
|  |             We are thankful for all feedback we receive. | ||||||
|  |           </p> | ||||||
|         </div> |         </div> | ||||||
|       </div> |       </div> | ||||||
|     </div> |     </div> | ||||||
|   | |||||||
| @@ -8,14 +8,14 @@ | |||||||
|     </div> |     </div> | ||||||
|  |  | ||||||
|     <div class="col s12"> |     <div class="col s12"> | ||||||
|       <p>With these data protection notices, Bielefeld University fulfils its obligation to provide information in accordance with Articles 13 & 14 of the EU General Data Protection Regulation (GDPR) on the above-mentioned processing of personal data. Terms such as "personal data", "processing", "data controller", "third party", etc. are used as defined in Article 4 GDPR.</p> |       <p>With these data protection notices, Bielefeld University fulfils its obligation to provide information in accordance with Articles 13 & 14 of the <b>EU General Data Protection Regulation (GDPR)</b> on the above-mentioned processing of personal data. Terms such as "personal data", "processing", "data controller", "third party", etc. are used as defined in <b>Article 4 GDPR</b>.</p> | ||||||
|     </div> |     </div> | ||||||
|  |  | ||||||
|     <div class="col s12"> |     <div class="col s12"> | ||||||
|       <div class="card"> |       <div class="card"> | ||||||
|         <div class="card-content"> |         <div class="card-content"> | ||||||
|           <span class="card-title">§ 1 Contact Details</span> |           <span class="card-title">§ 1 Contact Details</span> | ||||||
|           <p>Bielefeld University, a legal entity under public law established by the state of North Rhine-Westphalia (NRW), is responsible for processing the data. It is represented by its rector, Prof. Dr. Ing. Gerhard Sagerer.</p> |           <p>Bielefeld University, a legal entity under public law established by the state of North Rhine-Westphalia (NRW), is responsible for processing the data. It is represented by its rector, Prof. Dr. Angelika Epple.</p> | ||||||
|           <h6>§ 1.1. Contact details of the data controller</h6> |           <h6>§ 1.1. Contact details of the data controller</h6> | ||||||
|           <ul class="browser-default"> |           <ul class="browser-default"> | ||||||
|             <li>Data protection officer of the Faculty of History</li> |             <li>Data protection officer of the Faculty of History</li> | ||||||
| @@ -34,10 +34,10 @@ | |||||||
|           <ul class="browser-default"> |           <ul class="browser-default"> | ||||||
|             <li>Dr. Johanna Vompras</li> |             <li>Dr. Johanna Vompras</li> | ||||||
|             <li>Email: |             <li>Email: | ||||||
|               <a href="mailto:johanna.vompras@uni-bielefeld.de">johanna.vompras@uni-bielefeld.de</a> |               <a href="mailto:nopaque@uni-bielefeld.de">nopaque@uni-bielefeld.de</a> | ||||||
|             </li> |             </li> | ||||||
|             <li>Web.: |             <li>Web.: | ||||||
|               <a href="https://www.uni-bielefeld.de/(en)/sfb1288/projekte/inf.html">https://www.uni-bielefeld.de/(en)/sfb1288/projekte/inf.html</a> |               <a href="https://www.uni-bielefeld.de/sfb/sfb1288/projektbereiche/inf/">https://www.uni-bielefeld.de/sfb/sfb1288/projektbereiche/inf/</a> | ||||||
|             </li> |             </li> | ||||||
|           </ul> |           </ul> | ||||||
|           <h6>§ 1.2. Contact details of the data protection officer</h6> |           <h6>§ 1.2. Contact details of the data protection officer</h6> | ||||||
| @@ -56,40 +56,55 @@ | |||||||
|       <div class="card"> |       <div class="card"> | ||||||
|         <div class="card-content"> |         <div class="card-content"> | ||||||
|           <span class="card-title">§ 2 General information on data processing and its purpose</span> |           <span class="card-title">§ 2 General information on data processing and its purpose</span> | ||||||
|           <p>We process the personal data of our users only to the extent necessary to provide a functioning website and its functionalities.</p> |           <p>We process the personal data of our users only to the extent necessary to provide a functioning website and its functionalities. Collecting this information enables us to better diagnose problems with the application, provide support more effectively as well as ensure the continuous functionality of the service.</p> | ||||||
|           <p>The following personal data is collected and stored within the system:</p> |           <p> The following (personal) data is collected and stored within the system:</p> | ||||||
|  |            | ||||||
|           <h6>Master Data</h6> |           <h6>Master Data</h6> | ||||||
|           <p>Within the scope of user authentication the following personal data is collected and processed:</p> |           <p>Within the scope of user authentication, the following personal data is collected and processed: </p> | ||||||
|           <ul class="browser-default"> |           <ul class="browser-default"> | ||||||
|             <li>User name</li> |             <li>User name</li> | ||||||
|             <li>E-Mail</li> |             <li>E-Mail</li> | ||||||
|           </ul> |           </ul> | ||||||
|           <p>Registration of the user is required for the provision of certain content and services within nopaque.</p> |           <p>The registration of users is required for the provision of access to services within NOPAQUE. The freely selectable username and the corresponding email address is used to persistently identify you in NOPAQUE. The provided email address might be used to contact you in case we noticed some malfunction, to announce maintenance, or to spread important information regarding Nopaque. If you reset your password, Nopaque will use your email address to send you reset instructions via email. Nopaque does not show your email address and the username to other Nopaque users per default – except, if user gave the permission to do so, according to <b>§ 7 paragraph 1</b> of the General Terms of Use for the use of NOPAQUE.</p> | ||||||
|           <h6>Protocol Data</h6> |            | ||||||
|  |           <h6>Protocol and administrative data</h6> | ||||||
|           <p>In general, when a website is visited, for technical reasons information is automatically sent from the browser to the server and stored there in access protocols. When using a web application, additional protocol data is also generated, which is necessary for tracking technical errors. This information includes:</p> |           <p>In general, when a website is visited, for technical reasons information is automatically sent from the browser to the server and stored there in access protocols. When using a web application, additional protocol data is also generated, which is necessary for tracking technical errors. This information includes:</p> | ||||||
|           <ul class="browser-default"> |           <ul class="browser-default"> | ||||||
|             <li>IP address</li> |             <li>IP address</li> | ||||||
|             <li>User account</li> |  | ||||||
|             <li>Complete HTTP request URL</li> |             <li>Complete HTTP request URL</li> | ||||||
|             <li>HTTP action (e.g. GET: call up a page, POST: send form data)</li> |             <li>HTTP action (e.g. GET: call up a page, POST: send form data)</li> | ||||||
|             <li>Access status (HTTP status code)</li> |             <li>Access status (HTTP status code)</li> | ||||||
|             <li>data volume retrieved</li> |  | ||||||
|             <li>Date and time of the action</li> |             <li>Date and time of the action</li> | ||||||
|             <li>User-Agent string</li> |  | ||||||
|           </ul> |           </ul> | ||||||
|           <p>Locally logged data will be used by the development team in order to debug and improve tools. This data can only be viewed by the technical administration and by the employees responsible for the nopaque platform. Data is stored for seven days to ensure proper technical operation and to find the cause of errors and is deleted <u>afterwards</u>.</p> |           <br> | ||||||
|           <p>Logged data may be used to understand how researchers are using the nopaque platform. To be able to use the data for research purposes, we reserve the right to store it in an anonymous and aggregated form for a longer period of time (up to two years after completion of the SFB 1288 INF project).</p> |           <p>Protocol data will be used by the development team in order to debug and improve the included tools. This data can only be viewed by the technical administration and by the employees responsible for the NOPAQUE platform. Protocol data is not directly associated with a user account. NOPAQUE deletes all this information when it gets more than six months old.</p> | ||||||
|           <h6>Cookies</h6> |           <br> | ||||||
|           <p>Browsers store so-called cookies. Cookies are files that can be stored by the provider of a website in the directory of the browser program on the user's computer. These files contain text information and can be read again by the provider when the page is called up again. The provider can use these cookies, for example, to always deliver pages in the theme selected by the user.</p> |           <p>Further administrative data, which is essentially needed – to enable a proper user registration and user management functionalities:</p> | ||||||
|           <p>The storage of cookies can be switched off in the browser settings or provided with an expiry time. By deactivating cookies, however, some functions that are controlled by cookies can then only be used to a limited extent or not at all.</p> |  | ||||||
|           <p>NOPAQUE uses cookies for the following purposes:</p> |  | ||||||
|           <ul class="browser-default"> |           <ul class="browser-default"> | ||||||
|             <li>Recognition of a user during a session in order to assign personal content and other user-defined settings.</li> |             <li>Attributes <b>member_since</b> and <b>last_seen</b>: for system cleaning purposes and detection of fake accounts</li> | ||||||
|             <li>Login Script with ‘Remember Me’ feature allows the user to preserve their logged in status. When the user checks the Remember Me option, then the logged in status is serialized in the session and stored in cookies in an encrypted way.</li> |             <li>Attribute <b>confirmed</b>: This attribute indicates whether the account has been activated via e-mail. This type of activation ensures that the stored e-mail address actually exists.</li> | ||||||
|  |             <li>Attribute <b>terms_of_use_accepted</b>: To verify that the terms of use have been accepted by the user. This attribute is set to false when the terms of use has changed. After the user has logged in again, the user is asked to accept the new terms of use.</li> | ||||||
|           </ul> |           </ul> | ||||||
|  |  | ||||||
|  |           <h6>Cookies</h6> | ||||||
|  |           <p>Cookies: Browsers store so-called cookies. Cookies are files that can be stored by the provider of a website in the directory of the browser program on the user's computer. These files contain text information and can be read again by the provider when the page is called up again. The provider can use these cookies, for example, to always deliver pages in the theme selected by the user.</p> | ||||||
|  |           <p>The storage of cookies can be switched off in the browser settings or provided with an expiry time. By deactivating cookies, however, some functions that are controlled by cookies can then only be used to a limited extent or not at all.</p> | ||||||
|  |           <br> | ||||||
|  |           <p>NOPAQUE uses <b>functionality cookies</b> which deliver certain functions and allow to remember important information and user’s preferences:</p> | ||||||
|  |           <ul class="browser-default"> | ||||||
|  |             <li><b>session</b>: Recognition of a user during a session in order to assign personal content and other user-defined settings. The session cookie is deleted after closing the browser session.</li> | ||||||
|  |             <li><b>remember_token</b>: Login script with ‘remember me’ feature allowing the user to preserve their logged in status. When the user checks the Remember Me option, then the logged in status is serialized in the session and stored in cookies in an encrypted way.</li> | ||||||
|  |           </ul> | ||||||
|  |           <p>Cookies collected by NOPAQUE do not collect personal information of the users.</p> | ||||||
|  |            | ||||||
|           <h6>Content Data</h6> |           <h6>Content Data</h6> | ||||||
|           <p>The content data includes all data that is entered or created by users themselves in the system. This data is listed here because it is assigned to individual authors and may contain personal data. This may include: uploaded files, images, texts or other media files. Please note that files and scans submitted to NOPAQUE are stored in order to allow persistent access during a work session and between work sessions.</p> |           <p>The content data includes all data that is entered or created by users themselves in the system. This data is listed here because it is assigned to individual authors and may contain personal data. This may include: <b>uploaded files, images, text documents, other media files</b> and <b>(interim) results</b> after data processing and computations. Please note that files and scans submitted to NOPAQUE are safely stored on the NOPAQUE server in order to allow persistent access during a work session and between work sessions.</p> | ||||||
|           <p>According to § 4 paragraph 2 of the General Terms of Use for the use of NOPAQUE at Bielefeld University, the users themselves are responsible for the content they post and must comply with the legal provisions of data protection. This includes in particular the deletion of personal data that may no longer be processed.</p> |           <br> | ||||||
|  |           <p>According to <b>§ 4 paragraph 1 - 3</b> of the General Terms of Use for the use of NOPAQUE at Bielefeld University, the users themselves are responsible for the content they upload and must comply with the legal provisions of data protection and copyright law. This includes in particular the deletion of personal data that may no longer be processed.</p> | ||||||
|  |            | ||||||
|  |           <h6>User-added Information (optional)</h6> | ||||||
|  |           <p>NOPAQUE also stores optionally user-added personal information, like user’s profile information (full name, affiliation) and user’s added profile photo (avatar).</p> | ||||||
|  |          | ||||||
|         </div> |         </div> | ||||||
|       </div> |       </div> | ||||||
|     </div> |     </div> | ||||||
| @@ -107,9 +122,14 @@ | |||||||
|     <div class="col s12"> |     <div class="col s12"> | ||||||
|       <div class="card"> |       <div class="card"> | ||||||
|         <div class="card-content"> |         <div class="card-content"> | ||||||
|           <span class="card-title">§ 4 Data transmissions</span> |           <span class="card-title">§ 4 Data transmissions and sharing of your data</span> | ||||||
|           <p>Your personal data, which are processed by Bielefeld University for the purposes mentioned under 2. will not be transferred to third parties.</p> |           <p>Your personal data, which are processed by Bielefeld University for the purposes mentioned under <b>2 A - D</b> will not be transferred to third parties.</p> | ||||||
|  |           <br> | ||||||
|           <p>In individual cases, data may also be legally transmitted to third parties, for example, to law enforcement authorities for the investigation of criminal offences within the framework of the <b>Code of Criminal Procedure (StPO)</b>. If technical service providers are given access to personal data, this is done on the basis of a contract in accordance with <b>Article 28 GDPR</b>.</p> |           <p>In individual cases, data may also be legally transmitted to third parties, for example, to law enforcement authorities for the investigation of criminal offences within the framework of the <b>Code of Criminal Procedure (StPO)</b>. If technical service providers are given access to personal data, this is done on the basis of a contract in accordance with <b>Article 28 GDPR</b>.</p> | ||||||
|  |           <br> | ||||||
|  |           <p>NOPAQUE centrally bundles sophisticated data processing functionalities related to Digital Humanities (for example: pattern recognition or text mining) on its platform which are offered by third parties on external platforms. No personal data is passed on to third parties - only content data which is sent anonymously to the APIs of the third-party tools. The results of the calculations are then sent back to NOPAQUE and saved as results in user’s account.</p> | ||||||
|  |           <br> | ||||||
|  |           <p>For personal data mentioned under <b>2 E</b> the following applies: we do not share your personal data with third parties unless you have given your consent. This consent is given if the user actively makes his profile public in NOPAQUE. As consequence, user-added information (as listed under point 2E of this declaration) and, if applicable, a listing of public corpora with metadata is then made visible to other NOPAQUE users.</p> | ||||||
|         </div> |         </div> | ||||||
|       </div> |       </div> | ||||||
|     </div> |     </div> | ||||||
| @@ -117,8 +137,8 @@ | |||||||
|     <div class="col s12"> |     <div class="col s12"> | ||||||
|       <div class="card"> |       <div class="card"> | ||||||
|         <div class="card-content"> |         <div class="card-content"> | ||||||
|           <span class="card-title">§ 5 Duration of processing / data deletion</span> |           <span class="card-title">§ 5 Duration of processing of your data and data retention</span> | ||||||
|           <p>Data processed for user authentication are deleted immediately after account deletion.</p> |           <p>Data processed for user authentication, all personal data and contents uploaded by the user (listed in <b>2</b>) are deleted immediately after account deletion.</p> | ||||||
|         </div> |         </div> | ||||||
|       </div> |       </div> | ||||||
|     </div> |     </div> | ||||||
| @@ -126,7 +146,7 @@ | |||||||
|     <div class="col s12"> |     <div class="col s12"> | ||||||
|       <div class="card"> |       <div class="card"> | ||||||
|         <div class="card-content"> |         <div class="card-content"> | ||||||
|           <span class="card-title">§ 6 Your rights as a data subject</span> |           <span class="card-title">§ 6 Your data protection rights and choices as NOPAQUE user</span> | ||||||
|           <p>As a data subject, you have certain rights under <b>GDPR</b> that you may assert at any time:</p> |           <p>As a data subject, you have certain rights under <b>GDPR</b> that you may assert at any time:</p> | ||||||
|           <ul class="browser-default"> |           <ul class="browser-default"> | ||||||
|             <li>the right to access information about whether or not personal data concerning you is processed, and if so, what categories of data are being processed (<b>Article 15 GDPR</b>),</li> |             <li>the right to access information about whether or not personal data concerning you is processed, and if so, what categories of data are being processed (<b>Article 15 GDPR</b>),</li> | ||||||
|   | |||||||
| @@ -6,106 +6,36 @@ | |||||||
|     <div class="col s12"> |     <div class="col s12"> | ||||||
|       <h1 id="title">{{ title }}</h1> |       <h1 id="title">{{ title }}</h1> | ||||||
|     </div> |     </div> | ||||||
|  |  | ||||||
|     <div class="col s12"> |     <div class="col s12"> | ||||||
|       <p>With the usage of the nopaque platform you declare your acceptance of the General Terms of Use and that you have taken note of the legal framework and the data protection declaration.</p> |       <div class="switch"> | ||||||
|  |         <label> | ||||||
|  |           DE | ||||||
|  |           <input type="checkbox" id="terms-of-use-page-switch"> | ||||||
|  |           <span class="lever"></span> | ||||||
|  |           EN | ||||||
|  |         </label> | ||||||
|       </div> |       </div> | ||||||
|  |       <br> | ||||||
|     <div class="col s12"> |  | ||||||
|       <div class="card"> |  | ||||||
|         <div class="card-content"> |  | ||||||
|           <span class="card-title">§ 1 Scope</span> |  | ||||||
|           <p>The General Terms of Use for the nopaque platform apply to everyone who uses the system as an authorised user in the sense of <b>§ 2</b> (1) of the General Terms of Use. By using the system and with your consent you accept these terms of use.</p> |  | ||||||
|         </div> |  | ||||||
|       </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="col s12"> |  | ||||||
|       <div class="card"> |  | ||||||
|         <div class="card-content"> |  | ||||||
|           <span class="card-title">§ 2 Right of use</span> |  | ||||||
|           <p>(1) The nopaque platform is available to users exclusively for the purposes of teaching and research. Any other use, especially for business, commercial is not permitted. The following groups shall be entitled to use the nopaque platform:</p> |  | ||||||
|           <ul class="browser-default"> |  | ||||||
|             <li>students, teaching staff and employees at Bielefeld University</li> |  | ||||||
|             <li>external researchers from outside the University Bielefeld</li> |  | ||||||
|           </ul> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>(2) The use of the system is free of charge.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>(3) The duration of the right of use ends with the deletion of the user account by the user (see <b>§ 7</b>)</p> |  | ||||||
|         </div> |  | ||||||
|       </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="col s12"> |  | ||||||
|       <div class="card"> |  | ||||||
|         <div class="card-content"> |  | ||||||
|           <span class="card-title">§ 3 Purpose of the Services</span> |  | ||||||
|           <p>nopaque custom-built web application which serves as a platform for preprocessing and analysing digital copies of various text based research data (books, letters, etc.) in different files and formats. nopaque converts image data – like photos or scans – into text data through OCR making it machine readable. This step enables to proceed with further computational analysis of the documents. By means of computational linguistic data processing (tokenization, lemmatization, part-of-speech tagging and named-entity recognition) nopaque extracts additional information from texts.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>(1) Change of service</p> |  | ||||||
|           <p>The provider of the nopaque platform is entitled to change and supplement the scope of functions of nopaque without prior notice. This could result from a thematic and scientific reorientation of the project.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>(2) Support</p> |  | ||||||
|           <p>On nopaque, a contact form is available. As far as possible the SFB 1288 INF staff will try to provide user support.</p> |  | ||||||
|         </div> |  | ||||||
|       </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="col s12"> |  | ||||||
|       <div class="card"> |  | ||||||
|         <div class="card-content"> |  | ||||||
|           <span class="card-title">§ 4 Obligations of the User</span> |  | ||||||
|           <p>(1) The system is suitable for normal security requirements. Data with a high need for protection (e.g. health data) may not be stored or processed in the nopaque platform.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>(2) Users of nopaque are responsible for their own entered contents. The uploading of illegal content, especially content that violates criminal, personal, data protection or copyright regulations (including § 60a) is not permitted.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>(3) Users undertake to indemnify Bielefeld University from all claims by third parties based on the data they use and to reimburse Bielefeld University for any costs incurred by the latter due to possible infringements of rights. This also includes the costs incurred by Bielefeld University in defending itself against such claims in and out of court.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>(4) Exclusion from use</p> |  | ||||||
|           <p>Bielefeld University is entitled to immediately block access to the service if there are reasonable grounds to suspect that the stored data is unlawful (e.g upload harmful files via file upload) and/or violates the rights of third parties. Other infringements of the provisions of these Terms of Use, in particular the obligations under §6 also entitle Bielefeld University to block the user. Bielefeld University shall immediately notify the user of the block and the reason for the block. The block must be lifted as soon as the suspicion is invalidated.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>(5) Usage of Data</p> |  | ||||||
|           <p>The data stored by the user on the storage space intended for him may be legally protected, the responsibility for the processing of the data from these points of view lies solely with the user. By using nopaque, the user grants Bielefeld the right to process the data with the corresponding tools. At all times during processing in nopaque, data remains in the user's private storage location and will not passed on to third parties.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>(6) Release of Bielefeld University from Third-Party Claims</p> |  | ||||||
|           <p>The user is responsible for the data stored by him/her in nopaque. Furthermore he/she is responsible for entering and maintaining the data and information required to use nopaque.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>The user is obliged to indemnify Bielefeld University against all claims by third parties based on the data stored by him/her and to reimburse Bielefeld University for any costs incurred as a result of possible legal infringements. This also includes the costs incurred by Bielefeld University for extrajudicial and judicial defense against these claims.</p> |  | ||||||
|         </div> |  | ||||||
|       </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="col s12"> |  | ||||||
|       <div class="card"> |  | ||||||
|         <div class="card-content"> |  | ||||||
|           <span class="card-title">§ 5 Liability of Bielefeld University</span> |  | ||||||
|           <p>Claims for damages against Bielefeld University are excluded irrespective of the legal grounds. Bielefeld University shall not be liable for loss of data and information or other „indirect“ damages, e.g. loss of profit, loss of production, or other indirect damages. Bielefeld University shall not be liable for the loss of data to the extent that the damage is due to the fact that the user has failed to back up the data and thereby ensure that lost data can be restored with justifiable effort.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>nopaque is available in accordance with normal operational care based on the "Best Effort" practice. No liability is assumed for the consequences of failures or errors of the nopaque platform. Bielefeld University does not guarantee that the systems will run error-free and without interruption at all times. Bielefeld University accepts no responsibility for technical quality. Nor is it liable for the content, in particular for the accuracy, completeness, and timeliness of information to which it merely provides access for use.</p> |  | ||||||
|         </div> |  | ||||||
|       </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="col s12"> |  | ||||||
|       <div class="card"> |  | ||||||
|         <div class="card-content"> |  | ||||||
|           <span class="card-title">§ 6 Data Protection</span> |  | ||||||
|           <p>Information on the handling of personal data during the operation of the service can be found in the separate data protection policy.</p> |  | ||||||
|         </div> |  | ||||||
|       </div> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div class="col s12"> |  | ||||||
|       <div class="card"> |  | ||||||
|         <div class="card-content"> |  | ||||||
|           <span class="card-title">§ 7 Duration and Termination</span> |  | ||||||
|           <p>The user may terminate the use nopaque by deleting his/her account at any time without giving reasons. After deletion of the account, all users‘ data will be automatically deleted and access to the service blocked. This does not affect the user's right to delete data under data protection law.</p> |  | ||||||
|           <p> </p> |  | ||||||
|           <p>Bielefeld University may exclude the user from using the service without notice for an important reason. Important reasons include, in particular, repeated violations of the provisions of these Terms of Use or of applicable laws.</p> |  | ||||||
|     </div> |     </div> | ||||||
|  |     <div class="terms-of-use-page-content hide"> | ||||||
|  |       {% include "main/terms_of_use_en.html.j2" %} | ||||||
|     </div> |     </div> | ||||||
|  |     <div class="terms-of-use-page-content"> | ||||||
|  |       {% include "main/terms_of_use_de.html.j2" %} | ||||||
|     </div> |     </div> | ||||||
|   </div> |   </div> | ||||||
| </div> | </div> | ||||||
| {% endblock page_content %} | {% endblock page_content %} | ||||||
|  |  | ||||||
|  | {% block scripts %} | ||||||
|  | {{ super() }} | ||||||
|  | <script> | ||||||
|  |   let languagePageSwitch = document.querySelector('#terms-of-use-page-switch'); | ||||||
|  |   let termsOfUsePageContent = document.querySelectorAll('.terms-of-use-page-content'); | ||||||
|  |   languagePageSwitch.addEventListener('change', function() { | ||||||
|  |     termsOfUsePageContent.forEach(content => { | ||||||
|  |       content.classList.toggle('hide'); | ||||||
|  |     }); | ||||||
|  |   }); | ||||||
|  | </script> | ||||||
|  | {% endblock scripts %} | ||||||
|   | |||||||
							
								
								
									
										141
									
								
								app/templates/main/terms_of_use_de.html.j2
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										141
									
								
								app/templates/main/terms_of_use_de.html.j2
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,141 @@ | |||||||
|  | <div class="col s12"> | ||||||
|  |   <p>Mit Nutzung der Plattform nopaque stimmen Sie den Allgemeinen Geschäftsbedingungen sowie der Zurkenntnisnahme des rechtlichen Rahmens und der Datenschutzerklärung.</p> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 1 Inhalt und Umfang des Serviceangebots</span> | ||||||
|  |       <p>(1)	Diese Allgemeinen Geschäftsbedingungen gelten für die Nutzung der Angebote des browserbasierten Datenverarbeitungs- und Textanalysetools „nopaque“ (nachfolgend „Serviceangebot“), der Universität Bielefeld (nachfolgend Universität), das von berechtigten Nutzern kostenlos über die Internetseite www.nopaque.uni-bielefeld.de verwendet werden kann.</p> | ||||||
|  |       <p>(2)	berechtigt zur Nutzung des Serviceangebotes sind lediglich</p> | ||||||
|  |       <ol type="a"> | ||||||
|  |         <li>Studierende, Lehrende und Beschäftigte der Universität Bielefeld</li> | ||||||
|  |         <li>Externe Wissenschaftler*innen  im Rahmen nichtkommerzieller Forschung</li> | ||||||
|  |       </ol> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 2 Serviceangebot</span> | ||||||
|  |       <p>(1)	Im Rahmen des Serviceangebots können von den Nutzern hochzuladende Daten automatisiert verarbeitet und analysiert werden.  Dies ist in Form von folgenden, einzeln ansteuerbaren, Services umgesetzt:</p> | ||||||
|  |       <ol type="a"> | ||||||
|  |         <li><b>File Setup</b>: Digitale Kopien von textbasierten Forschungsdaten (Bücher, Briefe usw.) bestehen oft aus verschiedenen Dateien und Formaten. Nopaque konvertiert und führt diese Dateien zusammen, um die Weiterverarbeitung und die Anwendung anderer Dienste zu erleichtern.</li> | ||||||
|  |         <li><b>Optical Character Recognition</b> (OCR): nopaque wandelt Ihre Bilddaten - wie Fotos oder Scans - durch OCR in Textdaten um und macht sie maschinenlesbar. Dieser Schritt ermöglicht es Ihnen, mit der weiteren rechnerischen Analyse Ihrer Dokumente fortzufahren.</li> | ||||||
|  |         <li><b>Handwritten Text Recognition</b> (HTR): nopaque wandelt Ihre Bilddaten von handschriftlichen Texten - wie Fotos oder Scans - mittels HTR in Textdaten um und macht sie maschinenlesbar. Dieser Schritt ermöglicht es Ihnen, mit der weiteren rechnerischen Analyse Ihrer Dokumente fortzufahren.</li> | ||||||
|  |         <li><b>Natural Language Processing</b> (NLP): Mittels computergestützter linguistischer Datenverarbeitung (Tokenisierung, Lemmatisierung, Part-of-Speech-Tagging und Named-Entity-Erkennung) extrahiert nopaque zusätzliche Informationen aus Ihrem Text.</li> | ||||||
|  |         <li><b>Corpus analysis</b>: Mit nopaque können Sie so viele Textkorpora erstellen und hochladen, wie Sie möchten. Es nutzt die CQP Query Language, die komplexe Suchanfragen mit Hilfe von Metadaten und NLP-Tags ermöglicht.</li> | ||||||
|  |       </ol> | ||||||
|  |       <p>(2)	Der Nutzer ist auch berechtigt, im Rahmen des Serviceangebots erstellte Textaufbereitungen (Corpora) innerhalb der Nutzerplattform des Serviceangebots durch die „Teilen“-Funktionen mit anderen registrierten Nutzern zu teilen. Ein Zugang zu fremden Corpora kann jeweils mit Leserechten (Viewer), mit Lese-und Bearbeitungsrechten (Contributor) oder Co-Administratorrechten (Administrator) eingeräumt werden.</p> | ||||||
|  |       <p>(3)	Die Nutzer können im Rahmen Ihres Nutzerkontos eigene Sprachmodelle hochladen und diese zur Analyse hochgeladener Dateien verwenden. Nach Freischaltung durch einen Administrator können Nutzer ihre hochgeladenen Sprachmodelle auch öffentlich im Rahmen der Plattform zur Verfügung stellen.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 3 Zugang zum Serviceangebot</span> | ||||||
|  |       <p>(1) Die Nutzer registrieren sich über die Eingabe der abgefragten Daten in der Anmeldemaske. Danach muss der Nutzer einen von ihm gewählten Benutzernamen und ein Passwort bestimmen und eingeben.</p> | ||||||
|  |       <p>(2) Der Nutzer ist für die Geheimhaltung des Benutzernamens sowie des Passwortes selbst verantwortlich.</p> | ||||||
|  |       <p>(3) Nach Freischaltung durch einen Administrator können Nutzer durch eine API-Schnittstelle auch persönliche Zugangstoken erstellen und auf alle Daten zugreifen kann, auf die der Nutzer selbst zugreifen kann.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 4 Allgemeine Pflichten der Nutzer</span> | ||||||
|  |       <p>(1)	Mit der Registrierung erklären die Nutzer Ihre Zustimmung zu den Nutzungsbedingungen. Das rechtswidrige Hochladen, Verarbeiten und Teilen von Inhalten insbesondere entgegen strafrechtlicher, persönlichkeitsrechtlicher, datenschutzrechtlicher oder urheberrechtlicher Regelungen ist nicht gestattet. Durch das Hochladen, bearbeiten und/oder Teilen entsprechender Texte, Corpora und/oder Analyseergebnisse erklärt der jeweilige Nutzer, Inhaber aller dazu jeweils erforderlichen Rechte zu sein.</p> | ||||||
|  |       <p>(2)	Die Nutzer verpflichten sich, auch im Rahmen der Gestaltung des persönlichen Nutzerprofils und der Nutzung von Foren der Plattform, Medien und Texte nur auf gesetzlich erlaubte Art und Weise zu verwenden und illegale Äußerungen zu unterlassen.</p> | ||||||
|  |       <p>(3)	Das Serviceangebot ist lediglich zur Verarbeitung von Daten ohne erhöhte Schutzbedürfnisse geeignet. Daten, welche besonderen datenschutzrechtlichen Anforderungen unterliegen (bspw. Gesundheitsdaten, siehe Art. 9 DSGVO) dürfen nicht gespeichert oder verarbeitet werden.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 5 Haftung der Nutzer</span> | ||||||
|  |       <p>(1)	Sofern aufgrund der Nutzung eines nicht berechtigten Nutzers (siehe hierzu § 1 Abs. 2) oder aufgrund einer unerlaubten Nutzung (siehe hierzu § 4 Abs. 1, 2) von Texten, Analyseergebnissen oder sonstigen Medien (bspw. einem hochgeladenen Profilbild) im Rahmen des Serviceangebots Ansprüche Dritter gegen die Universität Bielefeld geltend gemacht werden, stellt der für die Rechtsverletzung verantwortliche Nutzer die Universität von diesen Ansprüchen frei.</p> | ||||||
|  |       <p>(2)	Die Nutzer haften bezgl. der Geheimhaltung und Weitergabe von Zugangsdaten für jegliche Schäden, die Ihnen entstehen.  Sofern aufgrund einer unerlaubten Verwendung von Zugangsdaten/Token Ansprüche Dritter gegen die Universität Bielefeld geltend gemacht werden, stellt der für die Rechtsverletzung verantwortliche Nutzer die Universität von diesen Ansprüchen frei.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 6 Nutzungsrechte des Nutzers bzgl. erstellter Corpora und Analyseberichte</span> | ||||||
|  |       <p>(1)	Der Nutzer ist in den Grenzen des § 4 berechtigt, Texte mit der Webanwendung aufzubereiten, im Rahmen der Webanwendung einzusehen und herunterzuladen. Regelungen zum Teilen der so erstellten Corpora ergeben sich nach § 7 dieser AGB.</p> | ||||||
|  |       <p>(2)	Der Nutzer ist auch berechtigt, den jeweils auf Grundlage der aufbereiteten Texte zur Verfügung gestellten Analysebericht zu Zwecken von Forschung und Lehre in den Grenzen des § 4 im Rahmen der Online-Anwendung zu lesen, herunterzuladen, zu vervielfältigen und zu veröffentlichen. Die Berechtigung steht nur denjenigen Nutzern zu, welche die entsprechenden Analyseberichte generieren. Die Berechtigung steht unter der auflösenden Bedingung, dass im Rahmen entsprechender Vervielfältigungen/Veröffentlichungen von Analyseberichten oder deren Ausschnitten als Quellenangabe vermerkt wird: nopaque [Software]. (2020). SFB 1288 – Teilprojekt INF, Universität Bielefeld. https://nopaque.uni-bielefeld.de/</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 7 Teilen von zuvor erstellten Corpora</span> | ||||||
|  |       <p>(1)	Das Teilen von Corpora wird auf folgende Art und Weise ermöglicht:</p> | ||||||
|  |       <ol type="a">  | ||||||
|  |         <li>Es kann ein Link generiert werden, welcher anderen angemeldeten Nutzern Zugang zu den auf den Servern des Serviceangebots gespeicherten Corpora eines Nutzers ermöglicht.</li> | ||||||
|  |         <li>Corpora können in eine durch alle Nutzer einsehbare Liste eingefügt werden, in deren Rahmen sich andere Nutzer über den vorhandene Bestand informieren und durch Kontaktierung des jeweiligen Erstellers einen Zugang erbitten können. Der Ersteller entscheidet dann in eigener Verantwortung über die Zulässigkeit des erbetenen Zugangs und ermöglicht bis zum Widerruf den Zugriff auf die Corpora durch die jeweiligen anderen Nutzer durch das Teilen eines Links im obigen Sinne.</li> | ||||||
|  |       </ol> | ||||||
|  |       <p>(2)	In Bezug auf das Teilen von Corpora, welche urheberrechtlich geschützte Medien enthalten, ist insb. § 60d UrhG einzuhalten. Es sind durch den hochladenden Nutzer insb. die dort genannten Löschfristen einzuhalten. Es wird ausdrücklich darauf hingewiesen, dass das regelwidrige Verbreiten von Corpora oder Ausgangstexten zu Pflichtverletzungen nach § 4 führen kann, deren finanzieller Schaden von den jeweiligen Nutzern zu tragen ist.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 8 Verfügbarkeit und Support</span> | ||||||
|  |       <p>(1)	Die Universität ist berechtigt, soweit es im Interesse des Nutzers erforderlich ist, Wartungsarbeiten  auch während der Betriebszeit vorzunehmen. Hierbei kann es zu Störungen des Datenabrufs kommen, die die Universität möglichst geringhalten wird.</p> | ||||||
|  |       <p>(2)	Die Universität ist berechtigt, den Funktionsumfang des Serviceangebots zu ändern und/oder zu modifizieren ohne die Nutzer zuvor zu benachrichtigen. Die Universität kann, ohne hierzu verpflichtet zu sein, die Software jederzeit aktualisieren oder weiterentwickeln und insbesondere aufgrund geänderter Rechtslage, technischer Entwicklungen, aufgrund einer wissenschaftlichen oder thematischen Umstrukturierung des Projekts oder zur Verbesserung der IT-Sicherheit anpassen.</p> | ||||||
|  |       <p>(3)  Nutzern wird im Rahmen des Webauftritts ein Supportformular zur Verfügung gestellt.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 9 Mängelrechte, Haftung der Universität</span> | ||||||
|  |       <p>(1)  Die Universität übernimmt keine Gewähr dafür, dass die durch das Serviceangebot zur Verfügung gestellten Informationen inhaltlich richtig, aktuell oder brauchbar sind oder zu einem gewünschten Erfolg führen. Insoweit ist jegliche Haftung der Universität ausgeschlossen.</p> | ||||||
|  |       <p>(2)  Das Serviceangebot wird auf Basis der Praxis „Best Effort“ nach betriebsüblicher Sorgfalt zur Verfügung gestellt. Die Universität Bielefeld übernimmt keine Garantie dafür, dass die Systeme fehlerfrei und ohne Unterbrechung laufen. Eventuelle Datenverluste infolge technischer Störungen können nicht ausgeschlossen werden. Für den Verlust von Daten haftet die Universität Bielefeld insoweit nicht, als der Schaden darauf beruht, dass es die Nutzer*innen unterlassen haben, Datensicherungen durchzuführen und dadurch sicherzustellen, dass verloren gegangene Daten mit vertretbarem Aufwand wiederhergestellt werden können.</p> | ||||||
|  |       <p>(3)  Im Übrigen haftet die Universität nur bei Vorsatz oder grober Fahrlässigkeit ihrer Mitarbeiter*innen, es sei denn, dass eine schuldhafte Verletzung wesentlicher Pflichten im Sinne dieser Nutzungsbedingungen vorliegt.  In diesem Fall ist die Haftung auf typische, bei Begründung des Nutzungsverhältnisses vorhersehbare Schäden begrenzt; die Universität haftet nicht für andere Schäden, z.B. für entgangenen Gewinn, für Produktionsausfall, für sonstige mittelbare Schäden oder für Verlust von Daten und Informationen. Die Haftung für Schäden aus der Verletzung des Lebens, des Körpers oder der Gesundheit bleibt unberührt.</p> | ||||||
|  |       <p>(4)  Mögliche Amtshaftungsansprüche gegen die Hochschule bleiben von den vorstehenden Regelungen unberührt.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 10 Missbräuchliche Nutzung</span> | ||||||
|  |       <p>Die Universität behält sich vor, bei Verdacht einer missbräuchlichen Nutzung oder wesentlicher Verletzungen dieser Nutzungsbedingungen diesen Vorgängen nachzugehen, entsprechende Vorkehrungen zu treffen und bei einem begründeten Verdacht gegebenenfalls den Zugang des Nutzers zum Serviceangebot – mindestens bis zu einer Verdachtsausräumung seitens des Nutzers – zu sperren und/oder gegebenenfalls bei besonders schwerwiegenden Verstößen auch das Nutzerkonto zu löschen. Soweit der Nutzer den Verdacht ausräumt, wird die Sperrung aufgehoben. Hinweise auf eine missbräuchliche Nutzung des Serviceangebotes hat jeder Nutzer der Universität unverzüglich mitzuteilen.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 11 Anwendbares Recht</span> | ||||||
|  |       <p>Es gilt ausschließlich das Recht der Bundesrepublik Deutschland. Unberührt davon bleiben die zwingenden Bestimmungen des Staates, in dem Sie Ihren gewöhnlichen Aufenthalt haben.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 12 Gerichtsstand</span> | ||||||
|  |       <p>Sofern Sie keinen Wohnsitz in der Bundesrepublik Deutschland haben oder nach Anmeldung Ihren Wohnsitz ins Ausland verlegen oder Ihr Wohnsitz zum Zeitpunkt der Klageerhebung nicht bekannt ist, ist Gerichtsstand für alle Streitigkeiten aus und im Zusammenhang mit dem Nutzungsverhältnis das Amts- oder Landgericht Bielefeld.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
							
								
								
									
										150
									
								
								app/templates/main/terms_of_use_en.html.j2
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										150
									
								
								app/templates/main/terms_of_use_en.html.j2
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,150 @@ | |||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card red darken-2"> | ||||||
|  |     <div class="card-content white-text"> | ||||||
|  |       <span class="card-title">Notice</span> | ||||||
|  |       <p>This page is translated for the understanding of English-speaking users. However, only the version of the German Terms of Use in accordance with German law applies. </p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <p>With the usage of the nopaque platform you declare your acceptance of the General Terms of Use and that you have taken note of the legal framework and the data protection declaration.</p> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 1 Content and scope of the Services offered</span> | ||||||
|  |       <p>(1)	These General Terms and Conditions apply to the use of the Services offered in the browser-based data processing and text analysis tool "nopaque" (hereinafter "Services") by Bielefeld University (hereinafter "University"), which can be used free of charge by authorized Users via the website www.nopaque.uni-bielefeld.de.</p> | ||||||
|  |       <p>(2)	The authorized use of these services is exclusively granted to:</p> | ||||||
|  |       <ol type="a"> | ||||||
|  |         <li>Students, teaching staff, and employees of Bielefeld University</li> | ||||||
|  |         <li>External researchers for use in non-commercial research</li> | ||||||
|  |       </ol> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 2 Services</span> | ||||||
|  |       <p>(1)	As part of the Services offered, user-uploaded data can be processed and analyzed automatically. This is implemented in the form of the following Services, each of which can be executed individually:</p> | ||||||
|  |       <ol type="a"> | ||||||
|  |         <li><b>File Setup</b>: Digital copies of text-based research data (books, letters, etc.) often consist of different files and formats. nopaque converts and merges these files to facilitate further processing and the use of other services.</li> | ||||||
|  |         <li><b>Optical Character Recognition</b> (OCR): nopaque converts image data - such as photos or scans - into text data using OCR and makes it machine-readable. This step enables the further computational analysis of documents.</li> | ||||||
|  |         <li><b>Handwritten Text Recognition</b> (HTR): nopaque converts image data from handwritten text - such as photos or scans - into text data using HTR and makes it machine-readable. This step enables further computational analysis of documents.</li> | ||||||
|  |         <li><b>Natural Language Processing</b> (NLP): Using computer-aided linguistic data processing (tokenization, lemmatization, part-of-speech tagging and named entity recognition), nopaque extracts additional information from text.</li> | ||||||
|  |         <li><b>Corpus analysis</b>: With nopaque, one can create and upload as many text corpora as desired. It uses the CQP Query Language, which enables complex search queries using metadata and NLP tags.</li> | ||||||
|  |       </ol> | ||||||
|  |       <p>(2)	The User is also entitled to share text edits (corpora) created as part of the Services offered with other registered Users within the Service’s user platform using the "Share" functions. Access to third-party corpora can be granted with reading rights (viewer), reading and editing rights (contributor) or co-administrator rights (administrator).</p> | ||||||
|  |       <p>(3)	Users can upload their own language models as part of their user account and use them to analyze uploaded files. After activation by an administrator, Users can also make their uploaded language models publicly available on the platform.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 3 Access to the Services</span> | ||||||
|  |       <p>(1)  Users register by entering the requested data in the login screen. The User must then select and enter a username and password of their choice.</p> | ||||||
|  |       <p>(2) Users are responsible for keeping their username and password confidential.</p> | ||||||
|  |       <p>(3) After activation by an administrator, Users can create personal access tokens via an application programming interface (API) and access all user-accessible data. </p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 4 General User Obligations</span> | ||||||
|  |       <p>(1)	By registering, Users declare their consent to the terms of use. The unlawful uploading, processing, and sharing of content, in particular contrary to criminal law, personal rights regulations, data protection law or copyright regulations, is not permitted. By uploading, editing and/or sharing corresponding texts, corpora and/or analysis results, the respective User declares to be the owner of all rights required in each case.</p> | ||||||
|  |       <p>(2)	Users are required to use media and texts only in a legally permitted manner and refrain from making illegal statements. This includes the design of their personal user profile and when using the forums on the nopaque platform.</p> | ||||||
|  |       <p>(3)	The Services offered are only suitable for the processing of data without heightened security requirements. Data that is subject to special data protection requirements (e.g. health data, see Art. 9 GDPR) may not be stored or processed.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 5 User liability</span> | ||||||
|  |       <p>(1)	If claims are asserted against Bielefeld University by third parties due to the use of an unauthorized User (see § 1 para. 2) or due to unauthorized use (see § 4 para. 1, 2) of texts, analysis results or other media (e.g. an uploaded profile picture) within the scope of the services offered, the User responsible for the infringement shall indemnify Bielefeld University against these claims.</p> | ||||||
|  |       <p>(2)	Users are liable for any damage they incur regarding the confidentiality and disclosure of access data. If claims are asserted against Bielefeld University by third parties due to unauthorized use of access data/tokens, the User responsible for the infringement shall indemnify Bielefeld University against these claims.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 6 User rights of use regarding generated corpora and analysis reports</span> | ||||||
|  |       <p>(1)	Within the limits of § 4, the User is entitled to prepare texts with the web application, to view them within the web application and to download them. Regulations on sharing the corpora created in this way are set out in § 7 of these GTC</p> | ||||||
|  |       <p>(2)	The User is also entitled to display, download, reproduce, and publish analysis reports generated based on the prepared texts for the purposes of teaching and research within the limits of § 4. These rights are only granted to the Users that generate the corresponding analysis reports. The authorization is subject to the resolutive condition that in the context of corresponding reproductions/publications of analysis reports or their excerpts, the following is noted as the source: nopaque [Software]. (2020). SFB 1288 - Subproject INF, Bielefeld University. https://nopaque.uni-bielefeld.de/.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 7 Sharing previously created corpora</span> | ||||||
|  |       <p>(1)	Sharing of corpora is possible in the following ways:</p> | ||||||
|  |       <ol type="a">  | ||||||
|  |         <li>A link can be generated allowing other registered Users to access another User’s corpora stored on the servers of the Service.</li> | ||||||
|  |         <li>Corpora can be added to a list visible to all other Users. In the context of this list, other Users can find information about the available content and request access by contacting the relevant creator. The creator can then decide, on their own responsibility, whether the requested access is permissible and can allow the requesting User to access the corpora – until this access is revoked – by sharing a link as described above.</li> | ||||||
|  |       </ol> | ||||||
|  |       <p>(2)	With regard to the sharing of corpora containing copyright-protected media, § 60d UrhG must be observed most particularly. The User is required to comply with the deletion periods stated therein. It is expressly pointed out that the improper distribution of corpora or source texts can lead to breaches of duty in accordance with § 4, the financial loss of which is to be borne by the respective users.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 8 Availability and support</span> | ||||||
|  |       <p>(1)	The University has the right to conduct maintenance during operating times as far as it is in user interest. Disruption of data access may occur during maintenance, which the University will minimize as much as possible. </p> | ||||||
|  |       <p>(2)	The University has the right to change and/or modify the range of functions of the Services without prior notification of its Users. The University can, without being required to do so, update or develop the software at any time, especially due to changed laws and regulations, technical developments, scientific or thematic restructuring of the project or for IT security improvements.</p> | ||||||
|  |       <p>(3)  Users are provided with a support contact form on the website.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 9 Defect rights and liability of the University</span> | ||||||
|  |       <p>(1)  The University does not guarantee that the information provided by the service is correct, up-to-date or usable or that it will lead to the desired results. In this respect, the University accepts no liability whatsoever.</p> | ||||||
|  |       <p>(2)  The Services are provided based on “best effort” practices according to customary operational diligence. The University does not guarantee uninterrupted, error-free system operation. Possible data loss due to technical disruptions cannot be ruled out. The University is not liable for data loss based on user neglect to create data backups and to ensure that lost data can be restored with reasonable expense and effort.</p> | ||||||
|  |       <p>(3)  Otherwise, the University shall only be liable in the event of wrongful intent or gross negligence on the part of its employees, unless there is a culpable breach of material obligations within the scope of these Terms and Conditions of use.  In this case, liability is limited to typical damages foreseeable at the time the user relationship was established; the University is not liable for other damages, e.g. for loss of profit, for loss of production, for other indirect damages or for loss of data and information. Liability for damage resulting from injury to life, body or health remains unaffected.</p> | ||||||
|  |       <p>(4)  Possible official liability claims against the university remain unaffected by the above provisions.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 10 Wrongful use</span> | ||||||
|  |       <p>The University reserves the right to investigate suspected misuse or significant violations of these Terms and Conditions, to take appropriate precautions and, in the event of reasonable suspicion, to block the User's access to the Services – at least until the user has cleared up the suspicion – and/or, if necessary, to delete the user account in the event of particularly serious violations. If the User clears up the suspicion, the block will be lifted. Every User must notify the University immediately of any indications of misuse of the service.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 11 Applicable law</span> | ||||||
|  |       <p>The law of the Federal Republic of Germany shall apply exclusively. The mandatory provisions of the country in which you have your habitual residence remain unaffected</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <div class="col s12"> | ||||||
|  |   <div class="card"> | ||||||
|  |     <div class="card-content"> | ||||||
|  |       <span class="card-title">§ 12 Place of jurisdiction</span> | ||||||
|  |       <p>If you do not have a place of residence in the Federal Republic of Germany, if you move your place of residence abroad after registration, or if your place of residence is not known at the time the action is filed, the place of jurisdiction for all disputes arising from and in connection with the user relationship shall be the local ("Amtsgericht") or regional ("Landgericht") court of Bielefeld.</p> | ||||||
|  |     </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
| @@ -15,4 +15,4 @@ def before_request(): | |||||||
|     pass |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
| from . import events, json_routes, routes, settings | from . import cli, events, json_routes, routes, settings | ||||||
|   | |||||||
							
								
								
									
										12
									
								
								app/users/cli.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								app/users/cli.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,12 @@ | |||||||
|  | from app.models import User | ||||||
|  | from app import db | ||||||
|  | from . import bp | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @bp.cli.command('reset') | ||||||
|  | def reset(): | ||||||
|  |     ''' Reset terms of use accept ''' | ||||||
|  |     for user in [x for x in User.query.all() if x.terms_of_use_accepted]: | ||||||
|  |         print(f'Resetting user {user.username}') | ||||||
|  |         user.terms_of_use_accepted = False | ||||||
|  |     db.session.commit() | ||||||
| @@ -7,7 +7,6 @@ from flask import ( | |||||||
| ) | ) | ||||||
| from flask_breadcrumbs import register_breadcrumb | from flask_breadcrumbs import register_breadcrumb | ||||||
| from flask_login import current_user | from flask_login import current_user | ||||||
| import os |  | ||||||
| from app.models import User | from app.models import User | ||||||
| from . import bp | from . import bp | ||||||
| from .utils import user_dynamic_list_constructor as user_dlc | from .utils import user_dynamic_list_constructor as user_dlc | ||||||
| @@ -40,8 +39,8 @@ def user_avatar(user_id): | |||||||
|     if user.avatar is None: |     if user.avatar is None: | ||||||
|         return redirect(url_for('static', filename='images/user_avatar.png')) |         return redirect(url_for('static', filename='images/user_avatar.png')) | ||||||
|     return send_from_directory( |     return send_from_directory( | ||||||
|         os.path.dirname(user.avatar.path), |         user.avatar.path.parent, | ||||||
|         os.path.basename(user.avatar.path), |         user.avatar.path.name, | ||||||
|         as_attachment=True, |         as_attachment=True, | ||||||
|         attachment_filename=user.avatar.filename, |         attachment_filename=user.avatar.filename, | ||||||
|         mimetype=user.avatar.mimetype |         mimetype=user.avatar.mimetype | ||||||
|   | |||||||
| @@ -1,6 +1,7 @@ | |||||||
| from dotenv import load_dotenv | from dotenv import load_dotenv | ||||||
| from flask import Flask | from flask import Flask | ||||||
| from logging.handlers import RotatingFileHandler | from logging.handlers import RotatingFileHandler | ||||||
|  | from pathlib import Path | ||||||
| from werkzeug.middleware.proxy_fix import ProxyFix | from werkzeug.middleware.proxy_fix import ProxyFix | ||||||
| import logging | import logging | ||||||
| import os | import os | ||||||
| @@ -57,8 +58,7 @@ class Config: | |||||||
|  |  | ||||||
|     ''' # nopaque # ''' |     ''' # nopaque # ''' | ||||||
|     NOPAQUE_ADMIN = os.environ.get('NOPAQUE_ADMIN') |     NOPAQUE_ADMIN = os.environ.get('NOPAQUE_ADMIN') | ||||||
|     NOPAQUE_DATA_DIR = \ |     NOPAQUE_DATA_DIR = Path(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque')) | ||||||
|         os.path.abspath(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque')) |  | ||||||
|     NOPAQUE_IS_PRIMARY_INSTANCE = \ |     NOPAQUE_IS_PRIMARY_INSTANCE = \ | ||||||
|         os.environ.get('NOPAQUE_IS_PRIMARY_INSTANCE', 'true').lower() == 'true' |         os.environ.get('NOPAQUE_IS_PRIMARY_INSTANCE', 'true').lower() == 'true' | ||||||
|     NOPAQUE_MAIL_SUBJECT_PREFIX = '[nopaque]' |     NOPAQUE_MAIL_SUBJECT_PREFIX = '[nopaque]' | ||||||
| @@ -115,7 +115,7 @@ class Config: | |||||||
|     NOPAQUE_READCOOP_USERNAME = os.environ.get('NOPAQUE_READCOOP_USERNAME') |     NOPAQUE_READCOOP_USERNAME = os.environ.get('NOPAQUE_READCOOP_USERNAME') | ||||||
|     NOPAQUE_READCOOP_PASSWORD = os.environ.get('NOPAQUE_READCOOP_PASSWORD') |     NOPAQUE_READCOOP_PASSWORD = os.environ.get('NOPAQUE_READCOOP_PASSWORD') | ||||||
|  |  | ||||||
|     NOPAQUE_VERSION='1.0.0' |     NOPAQUE_VERSION='1.0.2' | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def init_app(app: Flask): |     def init_app(app: Flask): | ||||||
|   | |||||||
| @@ -2,7 +2,7 @@ apifairy | |||||||
| cqi>=0.1.7 | cqi>=0.1.7 | ||||||
| dnspython==2.2.1 | dnspython==2.2.1 | ||||||
| docker | docker | ||||||
| eventlet | eventlet==0.34.2 | ||||||
| Flask==2.1.3 | Flask==2.1.3 | ||||||
| Flask-APScheduler | Flask-APScheduler | ||||||
| Flask-Assets==2.0 | Flask-Assets==2.0 | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user