Compare commits

..

2 Commits

Author SHA1 Message Date
Patrick Jentsch
82d6f6003f Restructure Dockerfile for better caching 2024-03-13 12:58:39 +01:00
Patrick Jentsch
9da74c1c6f Use pathlib where possible 2024-03-07 15:49:04 +01:00
22 changed files with 181 additions and 177 deletions

View File

@ -4,11 +4,13 @@ FROM python:3.10.13-slim-bookworm
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>" LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>"
# Set environment variables
ENV LANG="C.UTF-8" ENV LANG="C.UTF-8"
ENV PYTHONDONTWRITEBYTECODE="1" ENV PYTHONDONTWRITEBYTECODE="1"
ENV PYTHONUNBUFFERED="1" ENV PYTHONUNBUFFERED="1"
# Install system dependencies
RUN apt-get update \ RUN apt-get update \
&& apt-get install --no-install-recommends --yes \ && apt-get install --no-install-recommends --yes \
build-essential \ build-essential \
@ -17,37 +19,42 @@ RUN apt-get update \
&& rm --recursive /var/lib/apt/lists/* && rm --recursive /var/lib/apt/lists/*
# Create a non-root user
RUN useradd --create-home --no-log-init nopaque \ RUN useradd --create-home --no-log-init nopaque \
&& groupadd docker \ && groupadd docker \
&& usermod --append --groups docker nopaque && usermod --append --groups docker nopaque
USER nopaque USER nopaque
WORKDIR /home/nopaque WORKDIR /home/nopaque
# Create a Python virtual environment
ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv" ENV NOPAQUE_PYTHON3_VENV_PATH="/home/nopaque/.venv"
RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}" RUN python3 -m venv "${NOPAQUE_PYTHON3_VENV_PATH}"
ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}" ENV PATH="${NOPAQUE_PYTHON3_VENV_PATH}/bin:${PATH}"
# Install Python dependencies
COPY --chown=nopaque:nopaque requirements.txt requirements.txt
RUN python3 -m pip install --requirement requirements.txt \
&& rm requirements.txt
# Install the application
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
COPY --chown=nopaque:nopaque app app COPY --chown=nopaque:nopaque app app
COPY --chown=nopaque:nopaque migrations migrations COPY --chown=nopaque:nopaque migrations migrations
COPY --chown=nopaque:nopaque tests tests COPY --chown=nopaque:nopaque tests tests
COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./ COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py requirements.txt ./
RUN mkdir logs
RUN python3 -m pip install --requirement requirements.txt \
&& mkdir logs
USER root
COPY docker-nopaque-entrypoint.sh /usr/local/bin/
EXPOSE 5000 EXPOSE 5000
USER root
ENTRYPOINT ["docker-nopaque-entrypoint.sh"] ENTRYPOINT ["docker-nopaque-entrypoint.sh"]

View File

@ -2,80 +2,69 @@ from flask import current_app
from app import db from app import db
from app.models import User, Corpus, CorpusFile from app.models import User, Corpus, CorpusFile
from datetime import datetime from datetime import datetime
from pathlib import Path
from typing import Dict, List
import json import json
import os
import shutil import shutil
class SandpaperConverter: class SandpaperConverter:
def __init__(self, json_db_file, data_dir): def __init__(self, json_db_file: Path, data_dir: Path):
self.json_db_file = json_db_file self.json_db_file = json_db_file
self.data_dir = data_dir self.data_dir = data_dir
def run(self): def run(self):
with open(self.json_db_file, 'r') as f: with self.json_db_file.open('r') as f:
json_db = json.loads(f.read()) json_db: List[Dict] = json.load(f)
for json_user in json_db: for json_user in json_db:
if not json_user['confirmed']: if not json_user['confirmed']:
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}') current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
continue continue
user_dir = os.path.join(self.data_dir, str(json_user['id'])) user_dir = self.data_dir / f'{json_user["id"]}'
self.convert_user(json_user, user_dir) self.convert_user(json_user, user_dir)
db.session.commit() db.session.commit()
def convert_user(self, json_user, user_dir): def convert_user(self, json_user: Dict, user_dir: Path):
current_app.logger.info(f'Create User {json_user["username"]}...') current_app.logger.info(f'Create User {json_user["username"]}...')
user = User(
confirmed=json_user['confirmed'],
email=json_user['email'],
last_seen=datetime.fromtimestamp(json_user['last_seen']),
member_since=datetime.fromtimestamp(json_user['member_since']),
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
username=json_user['username']
)
db.session.add(user)
db.session.flush(objects=[user])
db.session.refresh(user)
try: try:
user.makedirs() user = User.create(
except OSError as e: confirmed=json_user['confirmed'],
current_app.logger.error(e) email=json_user['email'],
db.session.rollback() last_seen=datetime.fromtimestamp(json_user['last_seen']),
member_since=datetime.fromtimestamp(json_user['member_since']),
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
username=json_user['username']
)
except OSError:
raise Exception('Internal Server Error') raise Exception('Internal Server Error')
for json_corpus in json_user['corpora'].values(): for json_corpus in json_user['corpora'].values():
if not json_corpus['files'].values(): if not json_corpus['files'].values():
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}') current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
continue continue
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id'])) corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}'
self.convert_corpus(json_corpus, user, corpus_dir) self.convert_corpus(json_corpus, user, corpus_dir)
current_app.logger.info('Done') current_app.logger.info('Done')
def convert_corpus(self, json_corpus, user, corpus_dir): def convert_corpus(self, json_corpus: Dict, user: User, corpus_dir: Path):
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...') current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
corpus = Corpus(
user=user,
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
description=json_corpus['description'],
title=json_corpus['title']
)
db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
try: try:
corpus.makedirs() corpus = Corpus.create(
except OSError as e: user=user,
current_app.logger.error(e) creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
db.session.rollback() description=json_corpus['description'],
title=json_corpus['title']
)
except OSError:
raise Exception('Internal Server Error') raise Exception('Internal Server Error')
for json_corpus_file in json_corpus['files'].values(): for json_corpus_file in json_corpus['files'].values():
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir) self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
current_app.logger.info('Done') current_app.logger.info('Done')
def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir): def convert_corpus_file(self, json_corpus_file: Dict, corpus: Corpus, corpus_dir: Path):
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...') current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
corpus_file = CorpusFile( corpus_file = CorpusFile(
corpus=corpus, corpus=corpus,
@ -99,13 +88,13 @@ class SandpaperConverter:
db.session.refresh(corpus_file) db.session.refresh(corpus_file)
try: try:
shutil.copy2( shutil.copy2(
os.path.join(corpus_dir, json_corpus_file['filename']), corpus_dir / json_corpus_file['filename'],
corpus_file.path corpus_file.path
) )
except: except:
current_app.logger.warning( current_app.logger.warning(
'Can not convert corpus file: ' 'Can not convert corpus file: '
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}' f'{corpus_dir / json_corpus_file["filename"]}'
' -> ' ' -> '
f'{corpus_file.path}' f'{corpus_file.path}'
) )

View File

@ -1,7 +1,7 @@
from app.models import Corpus, CorpusStatus from flask import current_app
import os
import shutil import shutil
from app import db from app import db
from app.models import Corpus, CorpusStatus
from . import bp from . import bp
@ -18,10 +18,17 @@ def reset():
] ]
for corpus in [x for x in Corpus.query.all() if x.status in status]: for corpus in [x for x in Corpus.query.all() if x.status in status]:
print(f'Resetting corpus {corpus}') print(f'Resetting corpus {corpus}')
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True) corpus_cwb_dir = corpus.path / 'cwb'
os.mkdir(os.path.join(corpus.path, 'cwb')) corpus_cwb_data_dir = corpus_cwb_dir / 'data'
os.mkdir(os.path.join(corpus.path, 'cwb', 'data')) corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry')) try:
shutil.rmtree(corpus.path / 'cwb', ignore_errors=True)
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
current_app.logger.error(e)
raise
corpus.status = CorpusStatus.UNPREPARED corpus.status = CorpusStatus.UNPREPARED
corpus.num_analysis_sessions = 0 corpus.num_analysis_sessions = 0
db.session.commit() db.session.commit()

View File

@ -12,7 +12,6 @@ from typing import Dict, List
import gzip import gzip
import json import json
import math import math
import os
from app import db from app import db
from app.models import Corpus from app.models import Corpus
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
@ -42,9 +41,9 @@ def ext_corpus_static_data(corpus: str) -> Dict:
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id'] db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id) db_corpus: Corpus = Corpus.query.get(db_corpus_id)
static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz') static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz'
if os.path.exists(static_data_file_path): if static_data_file_path.exists():
with open(static_data_file_path, 'rb') as f: with static_data_file_path.open('rb') as f:
return f.read() return f.read()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']

View File

@ -7,7 +7,6 @@ from flask import (
url_for url_for
) )
from flask_breadcrumbs import register_breadcrumb from flask_breadcrumbs import register_breadcrumb
import os
from app import db from app import db
from app.models import Corpus, CorpusFile, CorpusStatus from app.models import Corpus, CorpusFile, CorpusStatus
from ..decorators import corpus_follower_permission_required from ..decorators import corpus_follower_permission_required
@ -92,8 +91,8 @@ def corpus_file(corpus_id, corpus_file_id):
def download_corpus_file(corpus_id, corpus_file_id): def download_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404() corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
return send_from_directory( return send_from_directory(
os.path.dirname(corpus_file.path), corpus_file.path.parent,
os.path.basename(corpus_file.path), corpus_file.path.name,
as_attachment=True, as_attachment=True,
attachment_filename=corpus_file.filename, attachment_filename=corpus_file.filename,
mimetype=corpus_file.mimetype mimetype=corpus_file.mimetype

View File

@ -1,7 +1,6 @@
from flask import abort, current_app from flask import abort, current_app
from flask_login import current_user from flask_login import current_user
from threading import Thread from threading import Thread
import os
from app import db from app import db
from app.decorators import admin_required, content_negotiation from app.decorators import admin_required, content_negotiation
from app.models import Job, JobStatus from app.models import Job, JobStatus
@ -39,7 +38,7 @@ def job_log(job_id):
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
response = {'errors': {'message': 'Job status is not completed or failed'}} response = {'errors': {'message': 'Job status is not completed or failed'}}
return response, 409 return response, 409
with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file: with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
log = log_file.read() log = log_file.read()
response_data = { response_data = {
'jobLog': log 'jobLog': log

View File

@ -7,7 +7,6 @@ from flask import (
) )
from flask_breadcrumbs import register_breadcrumb from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user from flask_login import current_user
import os
from app.models import Job, JobInput, JobResult from app.models import Job, JobInput, JobResult
from . import bp from . import bp
from .utils import job_dynamic_list_constructor as job_dlc from .utils import job_dynamic_list_constructor as job_dlc
@ -38,8 +37,8 @@ def download_job_input(job_id, job_input_id):
if not (job_input.job.user == current_user or current_user.is_administrator()): if not (job_input.job.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
return send_from_directory( return send_from_directory(
os.path.dirname(job_input.path), job_input.path.parent,
os.path.basename(job_input.path), job_input.path.name,
as_attachment=True, as_attachment=True,
attachment_filename=job_input.filename, attachment_filename=job_input.filename,
mimetype=job_input.mimetype mimetype=job_input.mimetype
@ -52,8 +51,8 @@ def download_job_result(job_id, job_result_id):
if not (job_result.job.user == current_user or current_user.is_administrator()): if not (job_result.job.user == current_user or current_user.is_administrator()):
abort(403) abort(403)
return send_from_directory( return send_from_directory(
os.path.dirname(job_result.path), job_result.path.parent,
os.path.basename(job_result.path), job_result.path.name,
as_attachment=True, as_attachment=True,
attachment_filename=job_result.filename, attachment_filename=job_result.filename,
mimetype=job_result.mimetype mimetype=job_result.mimetype

View File

@ -1,6 +1,7 @@
from flask import current_app from flask import current_app
from flask_migrate import upgrade from flask_migrate import upgrade
import os from pathlib import Path
from typing import List
from app.models import ( from app.models import (
CorpusFollowerRole, CorpusFollowerRole,
Role, Role,
@ -17,16 +18,15 @@ def deploy():
# Make default directories # Make default directories
print('Make default directories') print('Make default directories')
base_dir = current_app.config['NOPAQUE_DATA_DIR'] base_dir = current_app.config['NOPAQUE_DATA_DIR']
default_dirs = [ default_dirs: List[Path] = [
os.path.join(base_dir, 'tmp'), base_dir / 'tmp',
os.path.join(base_dir, 'users') base_dir / 'users'
] ]
for dir in default_dirs: for default_dir in default_dirs:
if os.path.exists(dir): if not default_dir.exists():
if not os.path.isdir(dir): default_dir.mkdir()
raise NotADirectoryError(f'{dir} is not a directory') if not default_dir.is_dir():
else: raise NotADirectoryError(f'{default_dir} is not a directory')
os.mkdir(dir)
# migrate database to latest revision # migrate database to latest revision
print('Migrate database to latest revision') print('Migrate database to latest revision')

View File

@ -11,6 +11,7 @@ from .spacy_nlp_pipeline_model import *
from .tesseract_ocr_pipeline_model import * from .tesseract_ocr_pipeline_model import *
from .token import * from .token import *
from .user import * from .user import *
from app import login
@login.user_loader @login.user_loader

View File

@ -1,6 +1,6 @@
from flask import current_app from flask import current_app
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
import os from pathlib import Path
from app import db from app import db
from .file_mixin import FileMixin from .file_mixin import FileMixin
@ -15,14 +15,16 @@ class Avatar(HashidMixin, FileMixin, db.Model):
user = db.relationship('User', back_populates='avatar') user = db.relationship('User', back_populates='avatar')
@property @property
def path(self): def path(self) -> Path:
return os.path.join(self.user.path, 'avatar') return self.user.path / 'avatar'
# return os.path.join(self.user.path, 'avatar')
def delete(self): def delete(self):
try: try:
os.remove(self.path) self.path.unlink(missing_ok=True)
except OSError as e: except OSError as e:
current_app.logger.error(e) current_app.logger.error(e)
raise
db.session.delete(self) db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False): def to_json_serializeable(self, backrefs=False, relationships=False):

View File

@ -4,7 +4,7 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
from sqlalchemy.ext.associationproxy import association_proxy from sqlalchemy.ext.associationproxy import association_proxy
from typing import Union from typing import Union
import os from pathlib import Path
import shutil import shutil
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from app import db from app import db
@ -88,8 +88,8 @@ class Corpus(HashidMixin, db.Model):
return f'{self.user.jsonpatch_path}/corpora/{self.hashid}' return f'{self.user.jsonpatch_path}/corpora/{self.hashid}'
@property @property
def path(self): def path(self) -> Path:
return os.path.join(self.user.path, 'corpora', str(self.id)) return self.user.path / 'corpora' / f'{self.id}'
@property @property
def url(self): def url(self):
@ -105,27 +105,39 @@ class Corpus(HashidMixin, db.Model):
db.session.add(corpus) db.session.add(corpus)
db.session.flush(objects=[corpus]) db.session.flush(objects=[corpus])
db.session.refresh(corpus) db.session.refresh(corpus)
corpus_files_dir = corpus.path / 'files'
corpus_cwb_dir = corpus.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try: try:
os.mkdir(corpus.path) corpus.path.mkdir()
os.mkdir(os.path.join(corpus.path, 'files')) corpus_files_dir.mkdir()
os.mkdir(os.path.join(corpus.path, 'cwb')) corpus_cwb_dir.mkdir()
os.mkdir(os.path.join(corpus.path, 'cwb', 'data')) corpus_cwb_data_dir.mkdir()
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry')) corpus_cwb_registry_dir.mkdir()
except OSError as e: except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e) current_app.logger.error(e)
db.session.rollback() db.session.rollback()
raise e raise
return corpus return corpus
def build(self): def build(self):
build_dir = os.path.join(self.path, 'cwb') corpus_cwb_dir = self.path / 'cwb'
shutil.rmtree(build_dir, ignore_errors=True) corpus_cwb_data_dir = corpus_cwb_dir / 'data'
os.mkdir(build_dir) corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
os.mkdir(os.path.join(build_dir, 'data')) try:
os.mkdir(os.path.join(build_dir, 'registry')) shutil.rmtree(corpus_cwb_dir, ignore_errors=True)
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
current_app.logger.error(e)
self.status = CorpusStatus.FAILED
raise
corpus_element = ET.fromstring('<corpus>\n</corpus>') corpus_element = ET.fromstring('<corpus>\n</corpus>')
for corpus_file in self.files: for corpus_file in self.files:
normalized_vrt_path = os.path.join(build_dir, f'{corpus_file.id}.norm.vrt') normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt'
try: try:
normalize_vrt_file(corpus_file.path, normalized_vrt_path) normalize_vrt_file(corpus_file.path, normalized_vrt_path)
except: except:
@ -152,7 +164,7 @@ class Corpus(HashidMixin, db.Model):
# corpus_element.insert(1, text_element) # corpus_element.insert(1, text_element)
corpus_element.append(text_element) corpus_element.append(text_element)
ET.ElementTree(corpus_element).write( ET.ElementTree(corpus_element).write(
os.path.join(build_dir, 'corpus.vrt'), corpus_cwb_dir / 'corpus.vrt',
encoding='utf-8' encoding='utf-8'
) )
self.status = CorpusStatus.SUBMITTED self.status = CorpusStatus.SUBMITTED

View File

@ -1,6 +1,6 @@
from flask import current_app, url_for from flask import current_app, url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
import os from pathlib import Path
from app import db from app import db
from .corpus import CorpusStatus from .corpus import CorpusStatus
from .file_mixin import FileMixin from .file_mixin import FileMixin
@ -45,8 +45,8 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
return f'{self.corpus.jsonpatch_path}/files/{self.hashid}' return f'{self.corpus.jsonpatch_path}/files/{self.hashid}'
@property @property
def path(self): def path(self) -> Path:
return os.path.join(self.corpus.path, 'files', str(self.id)) return self.corpus.path / 'files' / f'{self.id}'
@property @property
def url(self): def url(self):
@ -66,9 +66,10 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
def delete(self): def delete(self):
try: try:
os.remove(self.path) self.path.unlink(missing_ok=True)
except OSError as e: except OSError as e:
current_app.logger.error(e) current_app.logger.error(e)
raise
db.session.delete(self) db.session.delete(self)
self.corpus.status = CorpusStatus.UNPREPARED self.corpus.status = CorpusStatus.UNPREPARED

View File

@ -4,7 +4,7 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
from time import sleep from time import sleep
from typing import Union from typing import Union
import os from pathlib import Path
import shutil import shutil
from app import db from app import db
from app.ext.flask_sqlalchemy import ContainerColumn, IntEnumColumn from app.ext.flask_sqlalchemy import ContainerColumn, IntEnumColumn
@ -79,8 +79,8 @@ class Job(HashidMixin, db.Model):
return f'{self.user.jsonpatch_path}/jobs/{self.hashid}' return f'{self.user.jsonpatch_path}/jobs/{self.hashid}'
@property @property
def path(self): def path(self) -> Path:
return os.path.join(self.user.path, 'jobs', str(self.id)) return self.user.path / 'jobs' / f'{self.id}'
@property @property
def url(self): def url(self):
@ -96,15 +96,19 @@ class Job(HashidMixin, db.Model):
db.session.add(job) db.session.add(job)
db.session.flush(objects=[job]) db.session.flush(objects=[job])
db.session.refresh(job) db.session.refresh(job)
job_inputs_dir = job.path / 'inputs'
job_pipeline_data_dir = job.path / 'pipeline_data'
job_results_dir = job.path / 'results'
try: try:
os.mkdir(job.path) job.path.mkdir()
os.mkdir(os.path.join(job.path, 'inputs')) job_inputs_dir.mkdir()
os.mkdir(os.path.join(job.path, 'pipeline_data')) job_pipeline_data_dir.mkdir()
os.mkdir(os.path.join(job.path, 'results')) job_results_dir.mkdir()
except OSError as e: except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e) current_app.logger.error(e)
db.session.rollback() db.session.rollback()
raise e raise
return job return job
def delete(self): def delete(self):
@ -131,8 +135,8 @@ class Job(HashidMixin, db.Model):
''' Restart a job - only if the status is failed ''' ''' Restart a job - only if the status is failed '''
if self.status != JobStatus.FAILED: if self.status != JobStatus.FAILED:
raise Exception('Job status is not "failed"') raise Exception('Job status is not "failed"')
shutil.rmtree(os.path.join(self.path, 'results'), ignore_errors=True) shutil.rmtree(self.path / 'results', ignore_errors=True)
shutil.rmtree(os.path.join(self.path, 'pyflow.data'), ignore_errors=True) shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True)
for result in self.results: for result in self.results:
db.session.delete(result) db.session.delete(result)
self.end_date = None self.end_date = None

View File

@ -1,6 +1,6 @@
from flask import url_for from flask import url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
import os from pathlib import Path
from app import db from app import db
from .file_mixin import FileMixin from .file_mixin import FileMixin
@ -33,8 +33,8 @@ class JobInput(FileMixin, HashidMixin, db.Model):
return f'{self.job.jsonpatch_path}/inputs/{self.hashid}' return f'{self.job.jsonpatch_path}/inputs/{self.hashid}'
@property @property
def path(self): def path(self) -> Path:
return os.path.join(self.job.path, 'inputs', str(self.id)) return self.job.path / 'inputs' / f'{self.id}'
@property @property
def url(self): def url(self):

View File

@ -1,6 +1,6 @@
from flask import url_for from flask import url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
import os from pathlib import Path
from app import db from app import db
from .file_mixin import FileMixin from .file_mixin import FileMixin
@ -35,8 +35,8 @@ class JobResult(FileMixin, HashidMixin, db.Model):
return f'{self.job.jsonpatch_path}/results/{self.hashid}' return f'{self.job.jsonpatch_path}/results/{self.hashid}'
@property @property
def path(self): def path(self) -> Path:
return os.path.join(self.job.path, 'results', str(self.id)) return self.job.path / 'results' / f'{self.id}'
@property @property
def url(self): def url(self):

View File

@ -1,8 +1,7 @@
from flask import abort, current_app, url_for from flask import current_app, url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
from time import sleep
from tqdm import tqdm from tqdm import tqdm
import os from pathlib import Path
import requests import requests
import yaml import yaml
from app import db from app import db
@ -32,12 +31,8 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
user = db.relationship('User', back_populates='spacy_nlp_pipeline_models') user = db.relationship('User', back_populates='spacy_nlp_pipeline_models')
@property @property
def path(self): def path(self) -> Path:
return os.path.join( return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}'
self.user.path,
'spacy_nlp_pipeline_models',
str(self.id)
)
@property @property
def jsonpatch_path(self): def jsonpatch_path(self):
@ -57,14 +52,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
@staticmethod @staticmethod
def insert_defaults(force_download=False): def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first() nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join( default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml'
os.path.dirname(os.path.abspath(__file__)), with default_records_file.open('r') as f:
'default_records', default_records = yaml.safe_load(f)
'spacy_nlp_pipeline_model.yml' for m in default_records:
)
with open(defaults_file, 'r') as f:
defaults = yaml.safe_load(f)
for m in defaults:
model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None: if model is not None:
model.compatible_service_versions = m['compatible_service_versions'] model.compatible_service_versions = m['compatible_service_versions']
@ -96,7 +87,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.add(model) db.session.add(model)
db.session.flush(objects=[model]) db.session.flush(objects=[model])
db.session.refresh(model) db.session.refresh(model)
if not os.path.exists(model.path) or force_download: if not model.path.exists() or force_download:
r = requests.get(m['url'], stream=True) r = requests.get(m['url'], stream=True)
pbar = tqdm( pbar = tqdm(
desc=f'{model.title} ({model.filename})', desc=f'{model.title} ({model.filename})',
@ -116,9 +107,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
def delete(self): def delete(self):
try: try:
os.remove(self.path) self.path.unlink(missing_ok=True)
except OSError as e: except OSError as e:
current_app.logger.error(e) current_app.logger.error(e)
raise
db.session.delete(self) db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False): def to_json_serializeable(self, backrefs=False, relationships=False):

View File

@ -1,7 +1,7 @@
from flask import current_app, url_for from flask import current_app, url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
from tqdm import tqdm from tqdm import tqdm
import os from pathlib import Path
import requests import requests
import yaml import yaml
from app import db from app import db
@ -30,12 +30,8 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models') user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models')
@property @property
def path(self): def path(self) -> Path:
return os.path.join( return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}'
self.user.path,
'tesseract_ocr_pipeline_models',
str(self.id)
)
@property @property
def jsonpatch_path(self): def jsonpatch_path(self):
@ -55,14 +51,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
@staticmethod @staticmethod
def insert_defaults(force_download=False): def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first() nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join( default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml'
os.path.dirname(os.path.abspath(__file__)), with default_records_file.open('r') as f:
'default_records', default_records = yaml.safe_load(f)
'tesseract_ocr_pipeline_model.yml' for m in default_records:
)
with open(defaults_file, 'r') as f:
defaults = yaml.safe_load(f)
for m in defaults:
model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None: if model is not None:
model.compatible_service_versions = m['compatible_service_versions'] model.compatible_service_versions = m['compatible_service_versions']
@ -92,7 +84,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.flush(objects=[model]) db.session.flush(objects=[model])
db.session.refresh(model) db.session.refresh(model)
model.filename = f'{model.id}.traineddata' model.filename = f'{model.id}.traineddata'
if not os.path.exists(model.path) or force_download: if not model.path.exists() or force_download:
r = requests.get(m['url'], stream=True) r = requests.get(m['url'], stream=True)
pbar = tqdm( pbar = tqdm(
desc=f'{model.title} ({model.filename})', desc=f'{model.title} ({model.filename})',
@ -112,9 +104,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
def delete(self): def delete(self):
try: try:
os.remove(self.path) self.path.unlink(missing_ok=True)
except OSError as e: except OSError as e:
current_app.logger.error(e) current_app.logger.error(e)
raise
db.session.delete(self) db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False): def to_json_serializeable(self, backrefs=False, relationships=False):

View File

@ -4,14 +4,14 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
from flask_login import UserMixin from flask_login import UserMixin
from sqlalchemy.ext.associationproxy import association_proxy from sqlalchemy.ext.associationproxy import association_proxy
from pathlib import Path
from typing import Union from typing import Union
from werkzeug.security import generate_password_hash, check_password_hash from werkzeug.security import generate_password_hash, check_password_hash
import jwt import jwt
import os
import re import re
import secrets import secrets
import shutil import shutil
from app import db, hashids, login from app import db, hashids
from app.ext.flask_sqlalchemy import IntEnumColumn from app.ext.flask_sqlalchemy import IntEnumColumn
from .corpus import Corpus from .corpus import Corpus
from .corpus_follower_association import CorpusFollowerAssociation from .corpus_follower_association import CorpusFollowerAssociation
@ -145,9 +145,8 @@ class User(HashidMixin, UserMixin, db.Model):
self.password_hash = generate_password_hash(password) self.password_hash = generate_password_hash(password)
@property @property
def path(self): def path(self) -> Path:
return os.path.join( return current_app.config.get('NOPAQUE_DATA_DIR') / 'users' / f'{self.id}'
current_app.config.get('NOPAQUE_DATA_DIR'), 'users', str(self.id))
@staticmethod @staticmethod
def create(**kwargs): def create(**kwargs):
@ -155,16 +154,21 @@ class User(HashidMixin, UserMixin, db.Model):
db.session.add(user) db.session.add(user)
db.session.flush(objects=[user]) db.session.flush(objects=[user])
db.session.refresh(user) db.session.refresh(user)
user_spacy_nlp_pipeline_models_dir = user.path / 'spacy_nlp_pipeline_models'
user_tesseract_ocr_pipeline_models_dir = user.path / 'tesseract_ocr_pipeline_models'
user_corpora_dir = user.path / 'corpora'
user_jobs_dir = user.path / 'jobs'
try: try:
os.mkdir(user.path) user.path.mkdir()
os.mkdir(os.path.join(user.path, 'spacy_nlp_pipeline_models')) user_spacy_nlp_pipeline_models_dir.mkdir()
os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models')) user_tesseract_ocr_pipeline_models_dir.mkdir()
os.mkdir(os.path.join(user.path, 'corpora')) user_corpora_dir.mkdir()
os.mkdir(os.path.join(user.path, 'jobs')) user_jobs_dir.mkdir()
except OSError as e: except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e) current_app.logger.error(e)
db.session.rollback() db.session.rollback()
raise e raise
return user return user
@staticmethod @staticmethod

View File

@ -1,12 +1,11 @@
from flask import Blueprint from flask import Blueprint
from flask_login import login_required from flask_login import login_required
import os from pathlib import Path
import yaml import yaml
services_file = \ services_file = Path(__file__).parent / 'services.yml'
os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml') with services_file.open('r') as f:
with open(services_file, 'r') as f:
SERVICES = yaml.safe_load(f) SERVICES = yaml.safe_load(f)
bp = Blueprint('services', __name__) bp = Blueprint('services', __name__)

View File

@ -1,6 +1,4 @@
from app.models import User from app.models import User
import os
import shutil
from app import db from app import db
from . import bp from . import bp

View File

@ -7,7 +7,6 @@ from flask import (
) )
from flask_breadcrumbs import register_breadcrumb from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user from flask_login import current_user
import os
from app.models import User from app.models import User
from . import bp from . import bp
from .utils import user_dynamic_list_constructor as user_dlc from .utils import user_dynamic_list_constructor as user_dlc
@ -40,8 +39,8 @@ def user_avatar(user_id):
if user.avatar is None: if user.avatar is None:
return redirect(url_for('static', filename='images/user_avatar.png')) return redirect(url_for('static', filename='images/user_avatar.png'))
return send_from_directory( return send_from_directory(
os.path.dirname(user.avatar.path), user.avatar.path.parent,
os.path.basename(user.avatar.path), user.avatar.path.name,
as_attachment=True, as_attachment=True,
attachment_filename=user.avatar.filename, attachment_filename=user.avatar.filename,
mimetype=user.avatar.mimetype mimetype=user.avatar.mimetype

View File

@ -1,6 +1,7 @@
from dotenv import load_dotenv from dotenv import load_dotenv
from flask import Flask from flask import Flask
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
from pathlib import Path
from werkzeug.middleware.proxy_fix import ProxyFix from werkzeug.middleware.proxy_fix import ProxyFix
import logging import logging
import os import os
@ -57,8 +58,7 @@ class Config:
''' # nopaque # ''' ''' # nopaque # '''
NOPAQUE_ADMIN = os.environ.get('NOPAQUE_ADMIN') NOPAQUE_ADMIN = os.environ.get('NOPAQUE_ADMIN')
NOPAQUE_DATA_DIR = \ NOPAQUE_DATA_DIR = Path(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque'))
os.path.abspath(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque'))
NOPAQUE_IS_PRIMARY_INSTANCE = \ NOPAQUE_IS_PRIMARY_INSTANCE = \
os.environ.get('NOPAQUE_IS_PRIMARY_INSTANCE', 'true').lower() == 'true' os.environ.get('NOPAQUE_IS_PRIMARY_INSTANCE', 'true').lower() == 'true'
NOPAQUE_MAIL_SUBJECT_PREFIX = '[nopaque]' NOPAQUE_MAIL_SUBJECT_PREFIX = '[nopaque]'