Compare commits

..

2 Commits

Author SHA1 Message Date
Patrick Jentsch
82d6f6003f Restructure Dockerfile for better caching 2024-03-13 12:58:39 +01:00
Patrick Jentsch
9da74c1c6f Use pathlib where possible 2024-03-07 15:49:04 +01:00
22 changed files with 181 additions and 177 deletions

View File

@ -4,11 +4,13 @@ FROM python:3.10.13-slim-bookworm
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
# Set environment variables
ENV LANG="C.UTF-8"
ENV PYTHONDONTWRITEBYTECODE="1"
ENV PYTHONUNBUFFERED="1"
# Install system dependencies
RUN apt-get update \
&& apt-get install --no-install-recommends --yes \
build-essential \
@ -17,37 +19,42 @@ RUN apt-get update \
&& rm --recursive /var/lib/apt/lists/*
# Create a non-root user
RUN useradd --create-home --no-log-init nopaque \
&& groupadd docker \
&& usermod --append --groups docker nopaque
USER nopaque
WORKDIR /home/nopaque
# Create a Python virtual environment
ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv"
RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}"
ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}"
# Install Python dependencies
COPY --chown=nopaque:nopaque requirements.txt requirements.txt
RUN python3 -m pip install --requirement requirements.txt \
&& rm requirements.txt
# Install the application
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
COPY --chown=nopaque:nopaque app app
COPY --chown=nopaque:nopaque migrations migrations
COPY --chown=nopaque:nopaque tests tests
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./
RUN python3 -m pip install --requirement requirements.txt \
&& mkdir logs
USER root
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
RUN mkdir logs
EXPOSE 5000
USER root
ENTRYPOINT ["docker-nopaque-entrypoint.sh"]

View File

@ -2,32 +2,34 @@ from flask import current_app
from app import db
from app.models import User, Corpus, CorpusFile
from datetime import datetime
from pathlib import Path
from typing import Dict, List
import json
import os
import shutil
class SandpaperConverter:
def __init__(self, json_db_file, data_dir):
def __init__(self, json_db_file: Path, data_dir: Path):
self.json_db_file = json_db_file
self.data_dir = data_dir
def run(self):
with open(self.json_db_file, 'r') as f:
json_db = json.loads(f.read())
with self.json_db_file.open('r') as f:
json_db: List[Dict] = json.load(f)
for json_user in json_db:
if not json_user['confirmed']:
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
continue
user_dir = os.path.join(self.data_dir, str(json_user['id']))
user_dir = self.data_dir / f'{json_user["id"]}'
self.convert_user(json_user, user_dir)
db.session.commit()
def convert_user(self, json_user, user_dir):
def convert_user(self, json_user: Dict, user_dir: Path):
current_app.logger.info(f'Create User {json_user["username"]}...')
user = User(
try:
user = User.create(
confirmed=json_user['confirmed'],
email=json_user['email'],
last_seen=datetime.fromtimestamp(json_user['last_seen']),
@ -35,47 +37,34 @@ class SandpaperConverter:
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
username=json_user['username']
)
db.session.add(user)
db.session.flush(objects=[user])
db.session.refresh(user)
try:
user.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
except OSError:
raise Exception('Internal Server Error')
for json_corpus in json_user['corpora'].values():
if not json_corpus['files'].values():
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
continue
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}'
self.convert_corpus(json_corpus, user, corpus_dir)
current_app.logger.info('Done')
def convert_corpus(self, json_corpus, user, corpus_dir):
def convert_corpus(self, json_corpus: Dict, user: User, corpus_dir: Path):
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
corpus = Corpus(
try:
corpus = Corpus.create(
user=user,
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
description=json_corpus['description'],
title=json_corpus['title']
)
db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
try:
corpus.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
except OSError:
raise Exception('Internal Server Error')
for json_corpus_file in json_corpus['files'].values():
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
current_app.logger.info('Done')
def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
def convert_corpus_file(self, json_corpus_file: Dict, corpus: Corpus, corpus_dir: Path):
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
corpus_file = CorpusFile(
corpus=corpus,
@ -99,13 +88,13 @@ class SandpaperConverter:
db.session.refresh(corpus_file)
try:
shutil.copy2(
os.path.join(corpus_dir, json_corpus_file['filename']),
corpus_dir / json_corpus_file['filename'],
corpus_file.path
)
except:
current_app.logger.warning(
'Can not convert corpus file: '
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
f'{corpus_dir / json_corpus_file["filename"]}'
' -> '
f'{corpus_file.path}'
)

View File

@ -1,7 +1,7 @@
from app.models import Corpus, CorpusStatus
import os
from flask import current_app
import shutil
from app import db
from app.models import Corpus, CorpusStatus
from . import bp
@ -18,10 +18,17 @@ def reset():
]
for corpus in [x for x in Corpus.query.all() if x.status in status]:
print(f'Resetting corpus {corpus}')
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
os.mkdir(os.path.join(corpus.path, 'cwb'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
corpus_cwb_dir = corpus.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
shutil.rmtree(corpus.path / 'cwb', ignore_errors=True)
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
current_app.logger.error(e)
raise
corpus.status = CorpusStatus.UNPREPARED
corpus.num_analysis_sessions = 0
db.session.commit()

View File

@ -12,7 +12,6 @@ from typing import Dict, List
import gzip
import json
import math
import os
from app import db
from app.models import Corpus
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
@ -42,9 +41,9 @@ def ext_corpus_static_data(corpus: str) -> Dict:
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
if os.path.exists(static_data_file_path):
with open(static_data_file_path, 'rb') as f:
static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz'
if static_data_file_path.exists():
with static_data_file_path.open('rb') as f:
return f.read()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']

View File

@ -7,7 +7,6 @@ from flask import (
url_for
)
from flask_breadcrumbs import register_breadcrumb
import os
from app import db
from app.models import Corpus, CorpusFile, CorpusStatus
from ..decorators import corpus_follower_permission_required
@ -92,8 +91,8 @@ def corpus_file(corpus_id, corpus_file_id):
def download_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
return send_from_directory(
os.path.dirname(corpus_file.path),
os.path.basename(corpus_file.path),
corpus_file.path.parent,
corpus_file.path.name,
as_attachment=True,
attachment_filename=corpus_file.filename,
mimetype=corpus_file.mimetype

View File

@ -1,7 +1,6 @@
from flask import abort, current_app
from flask_login import current_user
from threading import Thread
import os
from app import db
from app.decorators import admin_required, content_negotiation
from app.models import Job, JobStatus
@ -39,7 +38,7 @@ def job_log(job_id):
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
response = {'errors': {'message': 'Job status is not completed or failed'}}
return response, 409
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
log = log_file.read()
response_data = {
'jobLog': log

View File

@ -7,7 +7,6 @@ from flask import (
)
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
import os
from app.models import Job, JobInput, JobResult
from . import bp
from .utils import job_dynamic_list_constructor as job_dlc
@ -38,8 +37,8 @@ def download_job_input(job_id, job_input_id):
if not (job_input.job.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
os.path.dirname(job_input.path),
os.path.basename(job_input.path),
job_input.path.parent,
job_input.path.name,
as_attachment=True,
attachment_filename=job_input.filename,
mimetype=job_input.mimetype
@ -52,8 +51,8 @@ def download_job_result(job_id, job_result_id):
if not (job_result.job.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
os.path.dirname(job_result.path),
os.path.basename(job_result.path),
job_result.path.parent,
job_result.path.name,
as_attachment=True,
attachment_filename=job_result.filename,
mimetype=job_result.mimetype

View File

@ -1,6 +1,7 @@
from flask import current_app
from flask_migrate import upgrade
import os
from pathlib import Path
from typing import List
from app.models import (
CorpusFollowerRole,
Role,
@ -17,16 +18,15 @@ def deploy():
# Make default directories
print('Make default directories')
base_dir = current_app.config['NOPAQUE_DATA_DIR']
default_dirs = [
os.path.join(base_dir, 'tmp'),
os.path.join(base_dir, 'users')
default_dirs: List[Path] = [
base_dir / 'tmp',
base_dir / 'users'
]
for dir in default_dirs:
if os.path.exists(dir):
if not os.path.isdir(dir):
raise NotADirectoryError(f'{dir} is not a directory')
else:
os.mkdir(dir)
for default_dir in default_dirs:
if not default_dir.exists():
default_dir.mkdir()
if not default_dir.is_dir():
raise NotADirectoryError(f'{default_dir} is not a directory')
# migrate database to latest revision
print('Migrate database to latest revision')

View File

@ -11,6 +11,7 @@ from .spacy_nlp_pipeline_model import *
from .tesseract_ocr_pipeline_model import *
from .token import *
from .user import *
from app import login
@login.user_loader

View File

@ -1,6 +1,6 @@
from flask import current_app
from flask_hashids import HashidMixin
import os
from pathlib import Path
from app import db
from .file_mixin import FileMixin
@ -15,14 +15,16 @@ class Avatar(HashidMixin, FileMixin, db.Model):
user = db.relationship('User', back_populates='avatar')
@property
def path(self):
return os.path.join(self.user.path, 'avatar')
def path(self) -> Path:
return self.user.path / 'avatar'
# return os.path.join(self.user.path, 'avatar')
def delete(self):
try:
os.remove(self.path)
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):

View File

@ -4,7 +4,7 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin
from sqlalchemy.ext.associationproxy import association_proxy
from typing import Union
import os
from pathlib import Path
import shutil
import xml.etree.ElementTree as ET
from app import db
@ -88,8 +88,8 @@ class Corpus(HashidMixin, db.Model):
return f'{self.user.jsonpatch_path}/corpora/{self.hashid}'
@property
def path(self):
return os.path.join(self.user.path, 'corpora', str(self.id))
def path(self) -> Path:
return self.user.path / 'corpora' / f'{self.id}'
@property
def url(self):
@ -105,27 +105,39 @@ class Corpus(HashidMixin, db.Model):
db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
corpus_files_dir = corpus.path / 'files'
corpus_cwb_dir = corpus.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
os.mkdir(corpus.path)
os.mkdir(os.path.join(corpus.path, 'files'))
os.mkdir(os.path.join(corpus.path, 'cwb'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
corpus.path.mkdir()
corpus_files_dir.mkdir()
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise e
raise
return corpus
def build(self):
build_dir = os.path.join(self.path, 'cwb')
shutil.rmtree(build_dir, ignore_errors=True)
os.mkdir(build_dir)
os.mkdir(os.path.join(build_dir, 'data'))
os.mkdir(os.path.join(build_dir, 'registry'))
corpus_cwb_dir = self.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
shutil.rmtree(corpus_cwb_dir, ignore_errors=True)
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
current_app.logger.error(e)
self.status = CorpusStatus.FAILED
raise
corpus_element = ET.fromstring('<corpus>\n</corpus>')
for corpus_file in self.files:
normalized_vrt_path = os.path.join(build_dir, f'{corpus_file.id}.norm.vrt')
normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt'
try:
normalize_vrt_file(corpus_file.path, normalized_vrt_path)
except:
@ -152,7 +164,7 @@ class Corpus(HashidMixin, db.Model):
# corpus_element.insert(1, text_element)
corpus_element.append(text_element)
ET.ElementTree(corpus_element).write(
os.path.join(build_dir, 'corpus.vrt'),
corpus_cwb_dir / 'corpus.vrt',
encoding='utf-8'
)
self.status = CorpusStatus.SUBMITTED

View File

@ -1,6 +1,6 @@
from flask import current_app, url_for
from flask_hashids import HashidMixin
import os
from pathlib import Path
from app import db
from .corpus import CorpusStatus
from .file_mixin import FileMixin
@ -45,8 +45,8 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
return f'{self.corpus.jsonpatch_path}/files/{self.hashid}'
@property
def path(self):
return os.path.join(self.corpus.path, 'files', str(self.id))
def path(self) -> Path:
return self.corpus.path / 'files' / f'{self.id}'
@property
def url(self):
@ -66,9 +66,10 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
def delete(self):
try:
os.remove(self.path)
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
self.corpus.status = CorpusStatus.UNPREPARED

View File

@ -4,7 +4,7 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin
from time import sleep
from typing import Union
import os
from pathlib import Path
import shutil
from app import db
from app.ext.flask_sqlalchemy import ContainerColumn, IntEnumColumn
@ -79,8 +79,8 @@ class Job(HashidMixin, db.Model):
return f'{self.user.jsonpatch_path}/jobs/{self.hashid}'
@property
def path(self):
return os.path.join(self.user.path, 'jobs', str(self.id))
def path(self) -> Path:
return self.user.path / 'jobs' / f'{self.id}'
@property
def url(self):
@ -96,15 +96,19 @@ class Job(HashidMixin, db.Model):
db.session.add(job)
db.session.flush(objects=[job])
db.session.refresh(job)
job_inputs_dir = job.path / 'inputs'
job_pipeline_data_dir = job.path / 'pipeline_data'
job_results_dir = job.path / 'results'
try:
os.mkdir(job.path)
os.mkdir(os.path.join(job.path, 'inputs'))
os.mkdir(os.path.join(job.path, 'pipeline_data'))
os.mkdir(os.path.join(job.path, 'results'))
job.path.mkdir()
job_inputs_dir.mkdir()
job_pipeline_data_dir.mkdir()
job_results_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise e
raise
return job
def delete(self):
@ -131,8 +135,8 @@ class Job(HashidMixin, db.Model):
''' Restart a job - only if the status is failed '''
if self.status != JobStatus.FAILED:
raise Exception('Job status is not "failed"')
shutil.rmtree(os.path.join(self.path, 'results'), ignore_errors=True)
shutil.rmtree(os.path.join(self.path, 'pyflow.data'), ignore_errors=True)
shutil.rmtree(self.path / 'results', ignore_errors=True)
shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True)
for result in self.results:
db.session.delete(result)
self.end_date = None

View File

@ -1,6 +1,6 @@
from flask import url_for
from flask_hashids import HashidMixin
import os
from pathlib import Path
from app import db
from .file_mixin import FileMixin
@ -33,8 +33,8 @@ class JobInput(FileMixin, HashidMixin, db.Model):
return f'{self.job.jsonpatch_path}/inputs/{self.hashid}'
@property
def path(self):
return os.path.join(self.job.path, 'inputs', str(self.id))
def path(self) -> Path:
return self.job.path / 'inputs' / f'{self.id}'
@property
def url(self):

View File

@ -1,6 +1,6 @@
from flask import url_for
from flask_hashids import HashidMixin
import os
from pathlib import Path
from app import db
from .file_mixin import FileMixin
@ -35,8 +35,8 @@ class JobResult(FileMixin, HashidMixin, db.Model):
return f'{self.job.jsonpatch_path}/results/{self.hashid}'
@property
def path(self):
return os.path.join(self.job.path, 'results', str(self.id))
def path(self) -> Path:
return self.job.path / 'results' / f'{self.id}'
@property
def url(self):

View File

@ -1,8 +1,7 @@
from flask import abort, current_app, url_for
from flask import current_app, url_for
from flask_hashids import HashidMixin
from time import sleep
from tqdm import tqdm
import os
from pathlib import Path
import requests
import yaml
from app import db
@ -32,12 +31,8 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
user = db.relationship('User', back_populates='spacy_nlp_pipeline_models')
@property
def path(self):
return os.path.join(
self.user.path,
'spacy_nlp_pipeline_models',
str(self.id)
)
def path(self) -> Path:
return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}'
@property
def jsonpatch_path(self):
@ -57,14 +52,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
@staticmethod
def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'default_records',
'spacy_nlp_pipeline_model.yml'
)
with open(defaults_file, 'r') as f:
defaults = yaml.safe_load(f)
for m in defaults:
default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml'
with default_records_file.open('r') as f:
default_records = yaml.safe_load(f)
for m in default_records:
model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
@ -96,7 +87,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
if not os.path.exists(model.path) or force_download:
if not model.path.exists() or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
@ -116,9 +107,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
def delete(self):
try:
os.remove(self.path)
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):

View File

@ -1,7 +1,7 @@
from flask import current_app, url_for
from flask_hashids import HashidMixin
from tqdm import tqdm
import os
from pathlib import Path
import requests
import yaml
from app import db
@ -30,12 +30,8 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models')
@property
def path(self):
return os.path.join(
self.user.path,
'tesseract_ocr_pipeline_models',
str(self.id)
)
def path(self) -> Path:
return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}'
@property
def jsonpatch_path(self):
@ -55,14 +51,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
@staticmethod
def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'default_records',
'tesseract_ocr_pipeline_model.yml'
)
with open(defaults_file, 'r') as f:
defaults = yaml.safe_load(f)
for m in defaults:
default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml'
with default_records_file.open('r') as f:
default_records = yaml.safe_load(f)
for m in default_records:
model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
@ -92,7 +84,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.flush(objects=[model])
db.session.refresh(model)
model.filename = f'{model.id}.traineddata'
if not os.path.exists(model.path) or force_download:
if not model.path.exists() or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
@ -112,9 +104,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
def delete(self):
try:
os.remove(self.path)
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):

View File

@ -4,14 +4,14 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin
from flask_login import UserMixin
from sqlalchemy.ext.associationproxy import association_proxy
from pathlib import Path
from typing import Union
from werkzeug.security import generate_password_hash, check_password_hash
import jwt
import os
import re
import secrets
import shutil
from app import db, hashids, login
from app import db, hashids
from app.ext.flask_sqlalchemy import IntEnumColumn
from .corpus import Corpus
from .corpus_follower_association import CorpusFollowerAssociation
@ -145,9 +145,8 @@ class User(HashidMixin, UserMixin, db.Model):
self.password_hash = generate_password_hash(password)
@property
def path(self):
return os.path.join(
current_app.config.get('NOPAQUE_DATA_DIR'), 'users', str(self.id))
def path(self) -> Path:
return current_app.config.get('NOPAQUE_DATA_DIR') / 'users' / f'{self.id}'
@staticmethod
def create(**kwargs):
@ -155,16 +154,21 @@ class User(HashidMixin, UserMixin, db.Model):
db.session.add(user)
db.session.flush(objects=[user])
db.session.refresh(user)
user_spacy_nlp_pipeline_models_dir = user.path / 'spacy_nlp_pipeline_models'
user_tesseract_ocr_pipeline_models_dir = user.path / 'tesseract_ocr_pipeline_models'
user_corpora_dir = user.path / 'corpora'
user_jobs_dir = user.path / 'jobs'
try:
os.mkdir(user.path)
os.mkdir(os.path.join(user.path, 'spacy_nlp_pipeline_models'))
os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models'))
os.mkdir(os.path.join(user.path, 'corpora'))
os.mkdir(os.path.join(user.path, 'jobs'))
user.path.mkdir()
user_spacy_nlp_pipeline_models_dir.mkdir()
user_tesseract_ocr_pipeline_models_dir.mkdir()
user_corpora_dir.mkdir()
user_jobs_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise e
raise
return user
@staticmethod

View File

@ -1,12 +1,11 @@
from flask import Blueprint
from flask_login import login_required
import os
from pathlib import Path
import yaml
services_file = \
os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml')
with open(services_file, 'r') as f:
services_file = Path(__file__).parent / 'services.yml'
with services_file.open('r') as f:
SERVICES = yaml.safe_load(f)
bp = Blueprint('services', __name__)

View File

@ -1,6 +1,4 @@
from app.models import User
import os
import shutil
from app import db
from . import bp

View File

@ -7,7 +7,6 @@ from flask import (
)
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
import os
from app.models import User
from . import bp
from .utils import user_dynamic_list_constructor as user_dlc
@ -40,8 +39,8 @@ def user_avatar(user_id):
if user.avatar is None:
return redirect(url_for('static', filename='images/user_avatar.png'))
return send_from_directory(
os.path.dirname(user.avatar.path),
os.path.basename(user.avatar.path),
user.avatar.path.parent,
user.avatar.path.name,
as_attachment=True,
attachment_filename=user.avatar.filename,
mimetype=user.avatar.mimetype

View File

@ -1,6 +1,7 @@
from dotenv import load_dotenv
from flask import Flask
from logging.handlers import RotatingFileHandler
from pathlib import Path
from werkzeug.middleware.proxy_fix import ProxyFix
import logging
import os
@ -57,8 +58,7 @@ class Config:
''' # nopaque # '''
NOPAQUE_ADMIN = os.environ.get('NOPAQUE_ADMIN')
NOPAQUE_DATA_DIR = \
os.path.abspath(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque'))
NOPAQUE_DATA_DIR = Path(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque'))
NOPAQUE_IS_PRIMARY_INSTANCE = \
os.environ.get('NOPAQUE_IS_PRIMARY_INSTANCE', 'true').lower() == 'true'
NOPAQUE_MAIL_SUBJECT_PREFIX = '[nopaque]'